def _clear_db(self, db_name: str): """Dangerous, make sure you are deleting the right DB""" # make sure it's the right DB if get_db().name == db_name: logging.info('Clearing database: {}'.format(db_name)) Result.drop_collection() Molecule.drop_collection() Options.drop_collection() Collection.drop_collection() TaskQueue.drop_collection() Procedure.drop_collection() User.drop_collection() self.client.drop_database(db_name)
def queue_get_by_id(self, ids: List[str], limit: int=100, as_json: bool=True): """Get tasks by their IDs Parameters ---------- ids : list of str List of the task Ids in the DB limit : int (optional) max number of returned tasks. If limit > max_limit, max_limit will be returned instead (safe query) as_json : bool Return tasks as JSON Returns ------- list of the found tasks """ q_limit = limit if limit and limit < self._max_limit else self._max_limit found = TaskQueue.objects(id__in=ids).limit(q_limit) if as_json: found = [self._doc_to_json(task, with_ids=True) for task in found] return found
def queue_get_next(self, limit=100, tag=None, as_json=True): # Figure out query, tagless has no requirements query = {"status": "WAITING"} if tag is not None: query["tag"] = tag found = TaskQueue.objects(**query).limit(limit).order_by('-created_on') query = {"_id": {"$in": [x.id for x in found]}} # update_many using pymongo in one DB access upd = TaskQueue._collection.update_many( query, {"$set": { "status": "RUNNING", "modified_on": datetime.datetime.utcnow() }}) if as_json: found = [self._doc_to_json(task, with_ids=True) for task in found] if upd.modified_count != len(found): self.logger.warning("QUEUE: Number of found projects does not match the number of updated projects.") return found
def test_queue(): tasks = TaskQueue.objects(status='WAITING')\ .limit(1000)\ .order_by('-created_on')\ .select_related() # *** no lazy load of ReferenceField, get them now (trurns of dereferencing, max_depth=1) # .only(projections_list) # .fields(..) # .exculde(..) # .no_dereference() # don't get any of the ReferenceFields (ids) (Turning off dereferencing) assert len(tasks) == 3
def queue_mark_complete(self, task_ids: List[str]) -> int: """Update the given tasks as complete Note that each task is already pointing to its result location Parameters ---------- task_ids : list IDs of the tasks to mark as COMPLETE Returns ------- int Updated count """ found = TaskQueue.objects(id__in=task_ids).update(status='COMPLETE') return found
def test_add_task_queue(): """ Simple test of adding a task using the ME classes in QCFractal, tasks should be added using storage_socket """ assert TaskQueue.objects.count() == 0 TaskQueue.objects().delete() # add a task that reference results result = Result.objects().first() task = TaskQueue(base_result=result) task.save() assert TaskQueue.objects().count() == 1 # add a task that reference Optimization Procedure opt = OptimizationProcedure.objects().first() task = TaskQueue(base_result=opt) task.save() assert TaskQueue.objects().count() == 2 # add a task that reference Torsiondrive Procedure tor = TorsiondriveProcedure.objects().first() task = TaskQueue(base_result=tor) task.save() assert TaskQueue.objects().count() == 3
def queue_reset_status(self, task_ids): """TODO: needs tests""" found = TaskQueue.objects(id__in=task_ids).update(status='WAITING') return found
def queue_submit(self, data: List[Dict]): """Submit a list of tasks to the queue. Tasks are unique by their base_result, which should be inserted into the DB first before submitting it's corresponding task to the queue (with result.status='INCOMPLETE' as the default) The default task.status is 'WAITING' Duplicate tasks sould be a rare case. Hooks are merged if the task already exists Parameters ---------- data : list of tasks (dict) A task is a dict, with the following fields: - hash_index: idx, not used anymore - spec: dynamic field (dict-like), can have any structure - hooks: list of any objects representing listeners (for now) - tag: str - base_results: tuple (required), first value is the class type of the result, {'results' or 'procedure'). The second value is the ID of the result in the DB. Example: "base_result": ('results', result_id) Returns ------- dict (data and meta) 'data' is a list of the IDs of the tasks IN ORDER, including duplicates. An errored task has 'None' in its ID meta['duplicates'] has the duplicate tasks """ meta = storage_utils.add_metadata() results = [] for d in data: try: if not isinstance(d['base_result'], tuple): raise Exception("base_result must be a tuple not {}." .format(type(d['base_result']))) # If saved as DBRef, then use raw query to retrieve (avoid this) # if d['base_result'][0] in ('results', 'procedure'): # base_result = DBRef(d['base_result'][0], d['base_result'][1]) result_obj = None if d['base_result'][0] == 'results': result_obj = Result(id=d['base_result'][1]) elif d['base_result'][0] == 'procedure': result_obj = Procedure(id=d['base_result'][1]) else: raise TypeError("Base_result type must be 'results' or 'procedure'," " {} is given.".format(d['base_result'][0])) task = TaskQueue(**d) task.base_result = result_obj task.save() results.append(str(task.id)) meta['n_inserted'] += 1 except mongoengine.errors.NotUniqueError as err: # rare case # If results is stored as DBRef, get it with: # task = TaskQueue.objects(__raw__={'base_result': base_result}).first() # avoid # If base_result is stored as a Result or Procedure class, get it with: task = TaskQueue.objects(base_result=result_obj).first() self.logger.warning('queue_submit got a duplicate task: ', task.to_mongo()) if d['hooks']: # merge hooks task.hooks.extend(d['hooks']) task.save() results.append(str(task.id)) meta['duplicates'].append(self._doc_to_tuples(task, with_ids=False)) # TODO except Exception as err: meta["success"] = False meta["errors"].append(str(err)) results.append(None) meta["success"] = True ret = {"data": results, "meta": meta} return ret