def query_results(n_query, mol): for i in range(n_query): # mol = Molecule.objects.first() # --> the overhead in this query # option = Keywords.objects(program='Psi4').first() # or [0], throws ex query = { "molecule": mol, "method": str(i), "basis": "B1", "keywords": None, "program": "P1", "driver": "energy", } Result.objects(**query) # second DB access
def test_add_task_queue(): """ Simple test of adding a task using the ME classes in QCFractal, tasks should be added using storage_socket """ assert TaskQueue.objects.count() == 0 TaskQueue.objects().delete() # add a task that reference results result = Result.objects().first() task = TaskQueue(base_result=result) task.save() assert TaskQueue.objects().count() == 1 # add a task that reference Optimization Procedure opt = OptimizationProcedure.objects().first() task = TaskQueue(base_result=opt) task.save() assert TaskQueue.objects().count() == 2 # add a task that reference Torsiondrive Procedure tor = TorsiondriveProcedure.objects().first() task = TaskQueue(base_result=tor) task.save() assert TaskQueue.objects().count() == 3
def add_results(self, data: List[dict], update_existing: bool=False, return_json=True): """ Add results from a given dict. The dict should have all the required keys of a result. Parameters ---------- data : list of dict Each dict must have: program, driver, method, basis, options, molecule Where molecule is the molecule id in the DB In addition, it should have the other attributes that it needs to store update_existing : bool (default False) Update existing results Returns ------- Dict with keys: data, meta Data is the ids of the inserted/updated/existing docs """ for d in data: for i in self._lower_results_index: if d[i] is None: continue d[i] = d[i].lower() meta = storage_utils.add_metadata() results = [] # try: for d in data: # search by index keywords not by all keys, much faster doc = Result.objects(program=d['program'], name=d['driver'], method=d['method'], basis=d['basis'], options=d['options'], molecule=d['molecule']) if doc.count() == 0 or update_existing: if not isinstance(d['molecule'], ObjectId): d['molecule'] = ObjectId(d['molecule']) doc = doc.upsert_one(**d) results.append(str(doc.id)) meta['n_inserted'] += 1 else: meta['duplicates'].append(self._doc_to_tuples(doc.first(), with_ids=False)) # TODO # If new or duplicate, add the id to the return list results.append(str(doc.first().id)) meta["success"] = True # except (mongoengine.errors.ValidationError, KeyError) as err: # meta["validation_errors"].append(err) # except Exception as err: # meta['error_description'] = err ret = {"data": results, "meta": meta} return ret
def get_results_by_ids(self, ids: List[str]=None, projection=None, return_json=True, with_ids=True): """ Get list of Results using the given list of Ids Parameters ---------- ids : List of str Ids of the results in the DB projection : list/set/tuple of keys, default is None The fields to return, default to return all return_json : bool, default is True Return the results as a list of json inseated of objects with_ids: bool, default is True Include the ids in the returned objects/dicts Returns ------- Dict with keys: data, meta Data is the objects found """ meta = storage_utils.get_metadata() data = [] # try: if projection: data = Result.objects(id__in=ids).only(*projection).limit(self._max_limit) else: data = Result.objects(id__in=ids).limit(self._max_limit) meta["n_found"] = data.count() meta["success"] = True # except Exception as err: # meta['error_description'] = str(err) if return_json: rdata = [self._doc_to_json(d, with_ids) for d in data] else: rdata = data return {"data": rdata, "meta": meta}
def del_results(self, ids: List[str]): """ Removes results from the database using their ids (Should be cautious! other tables maybe referencing results) Parameters ---------- ids : list of str The Ids of the results to be deleted Returns ------- int number of results deleted """ obj_ids = [ObjectId(x) for x in ids] return Result.objects(id__in=obj_ids).delete()
def duplicate_results(n_results, mol): """Half the documents are duplicates""" tosave_results = [] for i in range(n_results): # mol = Molecule.objects.first() # one DB access # option = Keywords.objects().first() data = { "molecule": mol, "method": str(i + int(n_results / 2)), "basis": "Bulk", "keywords": None, "program": "P1", "driver": "energy", "other_data": 5, } found = Result.objects(**data).first() if not found: tosave_results.append(Result(**data)) Result.objects.insert(tosave_results) print("Duplciates: ", len(tosave_results))
def test_results(storage_socket): """ Handling results throught the ME classes """ assert Result.objects().count() == 0 assert Options.objects().count() == 0 molecules = Molecule.objects(molecular_formula='H4O2') assert molecules.count() == 2 page1 = { "molecule": molecules[0], "method": "M1", "basis": "B1", "options": None, "program": "P1", "driver": "energy", "other_data": 5, } page2 = { "molecule": molecules[1], "method": "M1", "basis": "B1", "options": None, "program": "P1", "driver": "energy", "other_data": 10, } result = Result(**page1) result.save() # print('Result After save: ', result.to_json()) assert result.molecule.molecular_formula == 'H4O2'
def get_results(self, program: str=None, method: str=None, basis: str=None, molecule: str=None, driver: str=None, options: str=None, status: str='COMPLETE', projection=None, limit: int=None, skip: int=None, return_json=True, with_ids=True): """ Parameters ---------- program : str method : str basis : str molecule : str Molecule id in the DB driver : str options : str The id of the option in the DB status : bool, default is 'COMPLETE' The status of the result: 'COMPLETE', 'INCOMPLETE', or 'ERROR' projection : list/set/tuple of keys, default is None The fields to return, default to return all limit : int, default is None maximum number of results to return if 'limit' is greater than the global setting self._max_limit, the self._max_limit will be returned instead (This is to avoid overloading the server) skip : int, default is None TODO skip the first 'skip' resaults. Used to paginate return_json : bool, deafult is True Return the results as a list of json inseated of objects with_ids : bool, default is True Include the ids in the returned objects/dicts Returns ------- Dict with keys: data, meta Data is the objects found """ meta = storage_utils.get_metadata() query = {} parsed_query = {} if program: query['program'] = program if method: query['method'] = method if basis: query['basis'] = basis if molecule: query['molecule'], _ = _str_to_indices_with_errors(molecule) if driver: query['driver'] = driver if options: query['options'] = options if status: query['status'] = status for key, value in query.items(): if key == "molecule": parsed_query[key + "__in"] = query[key] elif key == "status": parsed_query[key] = value elif isinstance(value, (list, tuple)): parsed_query[key + "__in"] = [v.lower() for v in value] else: parsed_query[key] = value.lower() q_limit = limit if limit and limit < self._max_limit else self._max_limit data = [] try: if projection: data = Result.objects(**parsed_query).only(*projection).limit(q_limit) else: data = Result.objects(**parsed_query).limit(q_limit) meta["n_found"] = data.count() meta["success"] = True except Exception as err: meta['error_description'] = str(err) if return_json: rdata = [] for d in data: d = self._doc_to_json(d, with_ids) if "molecule" in d: d["molecule"] = d["molecule"]["$oid"] rdata.append(d) else: rdata = data return {"data": rdata, "meta": meta}