def test_get_mongolike(self): d = {"a": [{"b": 1}, {"c": {"d": 2}}], "e": {"f": {"g": 3}}, "g": 4} self.assertEqual(get_mongolike(d, "g"), 4) self.assertEqual(get_mongolike(d, "e.f.g"), 3) self.assertEqual(get_mongolike(d, "a.0.b"), 1) self.assertEqual(get_mongolike(d, "a.1.c.d"), 2)
def test_get_mongolike(self): d = {"a": [{"b": 1}, {"c": {"d": 2}}], "e": {"f": {"g": 3}}, "g": 4, "h":[5, 6]} self.assertEqual(get_mongolike(d, "g"), 4) self.assertEqual(get_mongolike(d, "e.f.g"), 3) self.assertEqual(get_mongolike(d, "a.0.b"), 1) self.assertEqual(get_mongolike(d, "a.1.c.d"), 2) self.assertEqual(get_mongolike(d, "h.-1"), 6)
def _update_material(self, m_id, taskdoc): """ Update a material document based on a new task Args: m_id (int): material_id for material document to update taskdoc (dict): a JSON-like task document """ # get list of labels for each existing property in material # this is used to decide if the taskdoc has higher quality data prop_tlabels = self._materials.find_one( {"material_id": m_id}, {"_tasksbuilder.prop_metadata.labels": 1})["_tasksbuilder"]["prop_metadata"]["labels"] task_label = taskdoc["task_label"] #task label of current doc # figure out what properties need to be updated for x in self.property_settings: for p in x["properties"]: # check if this is a valid task for getting the property if task_label in x["quality_scores"]: # assert: this is a valid task for the property # but is it the best task for the property? t_quality = x["quality_scores"][task_label] m_quality = x["quality_scores"].get(prop_tlabels.get(p, None), None) # check if this task's quality is better than existing data # 3 possibilities: # i) materials property data not present, so this is best # ii) task quality higher based on task label # iii) task quality equal to materials; use lowest energy task if not m_quality or t_quality > m_quality \ or (t_quality == m_quality and taskdoc["output"]["energy_per_atom"] < self._materials.find_one( {"material_id": m_id}, {"_tasksbuilder": 1})["_tasksbuilder"]["prop_metadata"]["energies"][p]): # insert task's properties into material materials_key = "{}.{}".format(x["materials_key"], p) \ if x.get("materials_key") else p tasks_key = "{}.{}".format(x["tasks_key"], p) \ if x.get("tasks_key") else p # insert metadata about this task self._materials.\ update_one({"material_id": m_id}, {"$set": {materials_key: get_mongolike(taskdoc, tasks_key), "_tasksbuilder.prop_metadata.labels.{}".format(p): task_label, "_tasksbuilder.prop_metadata.task_ids.{}".format(p): self.tid_str(taskdoc["task_id"]), "_tasksbuilder.prop_metadata.energies.{}".format(p): taskdoc["output"]["energy_per_atom"], "_tasksbuilder.updated_at": datetime.utcnow()}}) # copy property to document root if in properties_root if p in self.properties_root: self._materials.\ update_one({"material_id": m_id}, {"$set": {p: get_mongolike(taskdoc, tasks_key)}}) self._materials.update_one({"material_id": m_id}, {"$push": {"_tasksbuilder.all_task_ids": self.tid_str(taskdoc["task_id"])}})
def group_by_parent_lattice(docs, tol=1e-6): """ Groups a set of documents by parent lattice equivalence Args: docs ([{}]): list of documents e. g. dictionaries or cursor tol (float): tolerance for equivalent lattice finding using, np.allclose, default 1e-10 """ docs_by_lattice = [] for doc in docs: sim_lattice = get_mongolike(doc, "output.structure.lattice.matrix") if "deformation" in doc['task_label']: # Note that this assumes only one transformation, deformstructuretransformation defo = doc['transmuter']['transformation_params'][0]['deformation'] parent_lattice = np.dot(sim_lattice, np.transpose(np.linalg.inv(defo))) else: parent_lattice = np.array(sim_lattice) match = False for unique_lattice, lattice_docs in docs_by_lattice: match = np.allclose(unique_lattice, parent_lattice, atol=tol) if match: lattice_docs.append(doc) break if not match: docs_by_lattice.append([parent_lattice, [doc]]) return docs_by_lattice
def convert_mpworks_to_atomate(mpworks_doc, update_mpworks=True): """ Function to convert an mpworks document into an atomate document, uses schema above and a few custom cases Args: mpworks_doc (dict): mpworks task document update_mpworks (bool): flag to indicate that mpworks schema should be updated to final MPWorks version """ if update_mpworks: update_mpworks_schema(mpworks_doc) atomate_doc = {} for key_mpworks, key_atomate in settings['task_conversion_keys'].items(): val = get_mongolike(mpworks_doc, key_mpworks) set_mongolike(atomate_doc, key_atomate, val) # Task type atomate_doc["task_label"] = settings['task_label_conversions'].get( mpworks_doc["task_type"]) # calculations atomate_doc["calcs_reversed"] = mpworks_doc["calculations"][::-1] # anonymous formula comp = Composition(atomate_doc['composition_reduced']) atomate_doc["formula_anonymous"] = comp.anonymized_formula # deformation matrix and original_task_id if "deformation_matrix" in mpworks_doc: # Transpose this b/c of old bug, should verify in doc processing defo = mpworks_doc["deformation_matrix"] if isinstance(defo, str): defo = convert_string_deformation_to_list(defo) defo = np.transpose(defo).tolist() set_mongolike(atomate_doc, "transmuter.transformations", ["DeformStructureTransformation"]) set_mongolike(atomate_doc, "transmuter.transformation_params", [{ "deformation": defo }]) return atomate_doc
def _update_material(self, m_id, taskdoc): """ Update a material document based on a new task and using complex logic Args: m_id (int): material_id for material document to update taskdoc (dict): a JSON-like task document """ # For each materials property, figure out what kind of task the data is currently based on # as defined by the task label. This is used to decide if the new taskdoc is a type of # calculation that provides higher quality data for that property prop_tlabels = self._materials.find_one( {"material_id": m_id}, {"_tasksbuilder.prop_metadata.labels": 1 })["_tasksbuilder"]["prop_metadata"]["labels"] task_label = taskdoc[ "task_label"] # task label of new doc that updates this material # figure out what materials properties need to be updated based on new task for x in self.property_settings: for p in x["properties"]: # check if this is a valid task for getting the property if task_label in x["quality_scores"]: # assert: this is a valid task for the property # but is it the "best" task for that property (highest quality score)? t_quality = x["quality_scores"][task_label] m_quality = x["quality_scores"].get( prop_tlabels.get(p, None), None) # check if this task's quality is better than existing data # 3 possibilities: # i) materials property data not present, so this is best # ii) task quality higher based on task label # iii) task quality equal to materials; use lowest energy task if not m_quality or t_quality > m_quality \ or (t_quality == m_quality and taskdoc["output"]["energy_per_atom"] < self._materials.find_one({"material_id": m_id}, { "_tasksbuilder": 1})["_tasksbuilder"]["prop_metadata"][ "energies"][p]): # this task has better quality data # figure out where the property data lives in the materials doc and # in the task doc materials_key = "{}.{}".format(x["materials_key"], p) \ if x.get("materials_key") else p tasks_key = "{}.{}".format(x["tasks_key"], p) \ if x.get("tasks_key") else p # insert property data AND metadata about this task self._materials.\ update_one({"material_id": m_id}, {"$set": {materials_key: get_mongolike(taskdoc, tasks_key), "_tasksbuilder.prop_metadata.labels.{}".format(p): task_label, "_tasksbuilder.prop_metadata.task_ids.{}".format(p): dbid_to_str( self._t_prefix, taskdoc["task_id"]), "_tasksbuilder.prop_metadata.energies.{}".format(p): taskdoc["output"]["energy_per_atom"], "_tasksbuilder.updated_at": datetime.utcnow()}}) # copy property to document root if in properties_root # i.e., intentionally duplicate some data to the root level if p in self.properties_root: self._materials.\ update_one({"material_id": m_id}, {"$set": {p: get_mongolike(taskdoc, tasks_key)}}) # update the database to reflect that this task_id was already processed self._materials.update_one({"material_id": m_id}, { "$push": { "_tasksbuilder.all_task_ids": dbid_to_str(self._t_prefix, taskdoc["task_id"]) } })
def _update_material(self, m_id, taskdoc): """ Update a material document based on a new task and using complex logic Args: m_id (int): material_id for material document to update taskdoc (dict): a JSON-like task document """ # For each materials property, figure out what kind of task the data is currently based on # as defined by the task label. This is used to decide if the new taskdoc is a type of # calculation that provides higher quality data for that property prop_tlabels = self._materials.find_one( {"material_id": m_id}, {"_tasksbuilder.prop_metadata.labels": 1})[ "_tasksbuilder"]["prop_metadata"]["labels"] task_label = taskdoc["task_label"] # task label of new doc that updates this material # figure out what materials properties need to be updated based on new task for x in self.property_settings: for p in x["properties"]: # check if this is a valid task for getting the property if task_label in x["quality_scores"]: # assert: this is a valid task for the property # but is it the "best" task for that property (highest quality score)? t_quality = x["quality_scores"][task_label] m_quality = x["quality_scores"].get(prop_tlabels.get(p, None), None) # check if this task's quality is better than existing data # 3 possibilities: # i) materials property data not present, so this is best # ii) task quality higher based on task label # iii) task quality equal to materials; use lowest energy task if not m_quality or t_quality > m_quality \ or (t_quality == m_quality and taskdoc["output"]["energy_per_atom"] < self._materials.find_one({"material_id": m_id}, { "_tasksbuilder": 1})["_tasksbuilder"]["prop_metadata"][ "energies"][p]): # this task has better quality data # figure out where the property data lives in the materials doc and # in the task doc materials_key = "{}.{}".format(x["materials_key"], p) \ if x.get("materials_key") else p tasks_key = "{}.{}".format(x["tasks_key"], p) \ if x.get("tasks_key") else p # insert property data AND metadata about this task self._materials.\ update_one({"material_id": m_id}, {"$set": {materials_key: get_mongolike(taskdoc, tasks_key), "_tasksbuilder.prop_metadata.labels.{}".format(p): task_label, "_tasksbuilder.prop_metadata.task_ids.{}".format(p): dbid_to_str( self._t_prefix, taskdoc["task_id"]), "_tasksbuilder.prop_metadata.energies.{}".format(p): taskdoc["output"]["energy_per_atom"], "_tasksbuilder.updated_at": datetime.utcnow()}}) # copy property to document root if in properties_root # i.e., intentionally duplicate some data to the root level if p in self.properties_root: self._materials.\ update_one({"material_id": m_id}, {"$set": {p: get_mongolike(taskdoc, tasks_key)}}) # update the database to reflect that this task_id was already processed self._materials.update_one({"material_id": m_id}, {"$push": {"_tasksbuilder.all_task_ids": dbid_to_str( self._t_prefix, taskdoc["task_id"])}})