Exemple #1
0
    def _clear_db(self, db_name: str):
        """Dangerous, make sure you are deleting the right DB"""

        # make sure it's the right DB
        if get_db().name == db_name:
            logging.info('Clearing database: {}'.format(db_name))
            Result.drop_collection()
            Molecule.drop_collection()
            Options.drop_collection()
            Collection.drop_collection()
            TaskQueue.drop_collection()
            Procedure.drop_collection()
            User.drop_collection()

            self.client.drop_database(db_name)
Exemple #2
0
    def queue_get_by_id(self, ids: List[str], limit: int=100, as_json: bool=True):
        """Get tasks by their IDs

        Parameters
        ----------
        ids : list of str
            List of the task Ids in the DB
        limit : int (optional)
            max number of returned tasks. If limit > max_limit, max_limit
            will be returned instead (safe query)
        as_json : bool
            Return tasks as JSON

        Returns
        -------
        list of the found tasks
        """

        q_limit = limit if limit and limit < self._max_limit else self._max_limit
        found = TaskQueue.objects(id__in=ids).limit(q_limit)

        if as_json:
            found = [self._doc_to_json(task, with_ids=True) for task in found]

        return found
Exemple #3
0
    def queue_get_next(self, limit=100, tag=None, as_json=True):

        # Figure out query, tagless has no requirements
        query = {"status": "WAITING"}
        if tag is not None:
            query["tag"] = tag

        found = TaskQueue.objects(**query).limit(limit).order_by('-created_on')

        query = {"_id": {"$in": [x.id for x in found]}}

        # update_many using pymongo in one DB access
        upd = TaskQueue._collection.update_many(
            query, {"$set": {
                "status": "RUNNING",
                "modified_on": datetime.datetime.utcnow()
            }})

        if as_json:
            found = [self._doc_to_json(task, with_ids=True) for task in found]

        if upd.modified_count != len(found):
            self.logger.warning("QUEUE: Number of found projects does not match the number of updated projects.")

        return found
Exemple #4
0
def test_queue():
    tasks = TaskQueue.objects(status='WAITING')\
                .limit(1000)\
                .order_by('-created_on')\
                .select_related()   # *** no lazy load of ReferenceField, get them now (trurns of dereferencing, max_depth=1)
    # .only(projections_list)
    # .fields(..)
    # .exculde(..)
    # .no_dereference()  # don't get any of the ReferenceFields (ids) (Turning off dereferencing)
    assert len(tasks) == 3
Exemple #5
0
    def queue_mark_complete(self, task_ids: List[str]) -> int:
        """Update the given tasks as complete
        Note that each task is already pointing to its result location

        Parameters
        ----------
        task_ids : list
            IDs of the tasks to mark as COMPLETE

        Returns
        -------
        int
            Updated count
        """

        found = TaskQueue.objects(id__in=task_ids).update(status='COMPLETE')

        return found
Exemple #6
0
def test_add_task_queue():
    """
        Simple test of adding a task using the ME classes
        in QCFractal, tasks should be added using storage_socket
    """

    assert TaskQueue.objects.count() == 0
    TaskQueue.objects().delete()

    # add a task that reference results
    result = Result.objects().first()

    task = TaskQueue(base_result=result)
    task.save()
    assert TaskQueue.objects().count() == 1

    # add a task that reference Optimization Procedure
    opt = OptimizationProcedure.objects().first()

    task = TaskQueue(base_result=opt)
    task.save()
    assert TaskQueue.objects().count() == 2

    # add a task that reference Torsiondrive Procedure
    tor = TorsiondriveProcedure.objects().first()

    task = TaskQueue(base_result=tor)
    task.save()
    assert TaskQueue.objects().count() == 3
Exemple #7
0
    def queue_reset_status(self, task_ids):
        """TODO: needs tests"""
        found = TaskQueue.objects(id__in=task_ids).update(status='WAITING')

        return found
Exemple #8
0
    def queue_submit(self, data: List[Dict]):
        """Submit a list of tasks to the queue.
        Tasks are unique by their base_result, which should be inserted into
        the DB first before submitting it's corresponding task to the queue
        (with result.status='INCOMPLETE' as the default)
        The default task.status is 'WAITING'

        Duplicate tasks sould be a rare case.
        Hooks are merged if the task already exists

        Parameters
        ----------
        data : list of tasks (dict)
            A task is a dict, with the following fields:
            - hash_index: idx, not used anymore
            - spec: dynamic field (dict-like), can have any structure
            - hooks: list of any objects representing listeners (for now)
            - tag: str
            - base_results: tuple (required), first value is the class type
             of the result, {'results' or 'procedure'). The second value is
             the ID of the result in the DB. Example:
             "base_result": ('results', result_id)

        Returns
        -------
        dict (data and meta)
            'data' is a list of the IDs of the tasks IN ORDER, including
            duplicates. An errored task has 'None' in its ID
            meta['duplicates'] has the duplicate tasks
        """

        meta = storage_utils.add_metadata()

        results = []
        for d in data:
            try:
                if not isinstance(d['base_result'], tuple):
                    raise Exception("base_result must be a tuple not {}."
                                    .format(type(d['base_result'])))

                # If saved as DBRef, then use raw query to retrieve (avoid this)
                # if d['base_result'][0] in ('results', 'procedure'):
                #     base_result = DBRef(d['base_result'][0], d['base_result'][1])

                result_obj = None
                if d['base_result'][0] == 'results':
                    result_obj = Result(id=d['base_result'][1])
                elif d['base_result'][0] == 'procedure':
                    result_obj = Procedure(id=d['base_result'][1])
                else:
                    raise TypeError("Base_result type must be 'results' or 'procedure',"
                                    " {} is given.".format(d['base_result'][0]))
                task = TaskQueue(**d)
                task.base_result = result_obj
                task.save()
                results.append(str(task.id))
                meta['n_inserted'] += 1
            except mongoengine.errors.NotUniqueError as err:  # rare case
                # If results is stored as DBRef, get it with:
                # task = TaskQueue.objects(__raw__={'base_result': base_result}).first()  # avoid

                # If base_result is stored as a Result or Procedure class, get it with:
                task = TaskQueue.objects(base_result=result_obj).first()
                self.logger.warning('queue_submit got a duplicate task: ', task.to_mongo())
                if d['hooks']:  # merge hooks
                    task.hooks.extend(d['hooks'])
                    task.save()
                results.append(str(task.id))
                meta['duplicates'].append(self._doc_to_tuples(task, with_ids=False))  # TODO
            except Exception as err:
                meta["success"] = False
                meta["errors"].append(str(err))
                results.append(None)

        meta["success"] = True

        ret = {"data": results, "meta": meta}
        return ret