def test_get_mongolike(self):
        d = {"a": [{"b": 1}, {"c": {"d": 2}}], "e": {"f": {"g": 3}}, "g": 4}

        self.assertEqual(get_mongolike(d, "g"), 4)
        self.assertEqual(get_mongolike(d, "e.f.g"), 3)
        self.assertEqual(get_mongolike(d, "a.0.b"), 1)
        self.assertEqual(get_mongolike(d, "a.1.c.d"), 2)
Example #2
0
    def test_get_mongolike(self):
        d = {"a": [{"b": 1}, {"c": {"d": 2}}], "e": {"f": {"g": 3}}, "g": 4, "h":[5, 6]}

        self.assertEqual(get_mongolike(d, "g"), 4)
        self.assertEqual(get_mongolike(d, "e.f.g"), 3)
        self.assertEqual(get_mongolike(d, "a.0.b"), 1)
        self.assertEqual(get_mongolike(d, "a.1.c.d"), 2)
        self.assertEqual(get_mongolike(d, "h.-1"), 6)
    def _update_material(self, m_id, taskdoc):
        """
        Update a material document based on a new task

        Args:
            m_id (int): material_id for material document to update
            taskdoc (dict): a JSON-like task document
        """
        # get list of labels for each existing property in material
        # this is used to decide if the taskdoc has higher quality data
        prop_tlabels = self._materials.find_one(
            {"material_id": m_id}, {"_tasksbuilder.prop_metadata.labels": 1})["_tasksbuilder"]["prop_metadata"]["labels"]

        task_label = taskdoc["task_label"]  #task label of current doc
        # figure out what properties need to be updated
        for x in self.property_settings:
            for p in x["properties"]:
                # check if this is a valid task for getting the property
                if task_label in x["quality_scores"]:
                    # assert: this is a valid task for the property
                    # but is it the best task for the property?
                    t_quality = x["quality_scores"][task_label]
                    m_quality = x["quality_scores"].get(prop_tlabels.get(p, None), None)
                    # check if this task's quality is better than existing data
                    # 3 possibilities:
                    # i) materials property data not present, so this is best
                    # ii) task quality higher based on task label
                    # iii) task quality equal to materials; use lowest energy task
                    if not m_quality or t_quality > m_quality \
                            or (t_quality == m_quality
                                and taskdoc["output"]["energy_per_atom"] <
                                    self._materials.find_one(
                                        {"material_id": m_id},
                                        {"_tasksbuilder": 1})["_tasksbuilder"]["prop_metadata"]["energies"][p]):

                        # insert task's properties into material
                        materials_key = "{}.{}".format(x["materials_key"], p) \
                            if x.get("materials_key") else p
                        tasks_key = "{}.{}".format(x["tasks_key"], p) \
                            if x.get("tasks_key") else p

                        # insert metadata about this task
                        self._materials.\
                            update_one({"material_id": m_id},
                                       {"$set": {materials_key: get_mongolike(taskdoc, tasks_key),
                                                 "_tasksbuilder.prop_metadata.labels.{}".format(p): task_label,
                                                 "_tasksbuilder.prop_metadata.task_ids.{}".format(p): self.tid_str(taskdoc["task_id"]),
                                                 "_tasksbuilder.prop_metadata.energies.{}".format(p): taskdoc["output"]["energy_per_atom"],
                                                 "_tasksbuilder.updated_at": datetime.utcnow()}})

                        # copy property to document root if in properties_root
                        if p in self.properties_root:
                            self._materials.\
                            update_one({"material_id": m_id},
                                       {"$set": {p: get_mongolike(taskdoc, tasks_key)}})

        self._materials.update_one({"material_id": m_id},
                                   {"$push": {"_tasksbuilder.all_task_ids": self.tid_str(taskdoc["task_id"])}})
Example #4
0
def group_by_parent_lattice(docs, tol=1e-6):
    """
    Groups a set of documents by parent lattice equivalence

    Args:
        docs ([{}]): list of documents e. g. dictionaries or cursor
        tol (float): tolerance for equivalent lattice finding using,
            np.allclose, default 1e-10
    """
    docs_by_lattice = []
    for doc in docs:
        sim_lattice = get_mongolike(doc, "output.structure.lattice.matrix")

        if "deformation" in doc['task_label']:
            # Note that this assumes only one transformation, deformstructuretransformation
            defo = doc['transmuter']['transformation_params'][0]['deformation']
            parent_lattice = np.dot(sim_lattice,
                                    np.transpose(np.linalg.inv(defo)))
        else:
            parent_lattice = np.array(sim_lattice)
        match = False
        for unique_lattice, lattice_docs in docs_by_lattice:
            match = np.allclose(unique_lattice, parent_lattice, atol=tol)
            if match:
                lattice_docs.append(doc)
                break
        if not match:
            docs_by_lattice.append([parent_lattice, [doc]])
    return docs_by_lattice
Example #5
0
def convert_mpworks_to_atomate(mpworks_doc, update_mpworks=True):
    """
    Function to convert an mpworks document into an atomate
    document, uses schema above and a few custom cases

    Args:
        mpworks_doc (dict): mpworks task document
        update_mpworks (bool): flag to indicate that mpworks schema
            should be updated to final MPWorks version
    """
    if update_mpworks:
        update_mpworks_schema(mpworks_doc)

    atomate_doc = {}
    for key_mpworks, key_atomate in settings['task_conversion_keys'].items():
        val = get_mongolike(mpworks_doc, key_mpworks)
        set_mongolike(atomate_doc, key_atomate, val)

    # Task type
    atomate_doc["task_label"] = settings['task_label_conversions'].get(
        mpworks_doc["task_type"])

    # calculations
    atomate_doc["calcs_reversed"] = mpworks_doc["calculations"][::-1]

    # anonymous formula
    comp = Composition(atomate_doc['composition_reduced'])
    atomate_doc["formula_anonymous"] = comp.anonymized_formula

    # deformation matrix and original_task_id
    if "deformation_matrix" in mpworks_doc:
        # Transpose this b/c of old bug, should verify in doc processing
        defo = mpworks_doc["deformation_matrix"]
        if isinstance(defo, str):
            defo = convert_string_deformation_to_list(defo)
        defo = np.transpose(defo).tolist()
        set_mongolike(atomate_doc, "transmuter.transformations",
                      ["DeformStructureTransformation"])
        set_mongolike(atomate_doc, "transmuter.transformation_params",
                      [{
                          "deformation": defo
                      }])

    return atomate_doc
Example #6
0
    def _update_material(self, m_id, taskdoc):
        """
        Update a material document based on a new task and using complex logic

        Args:
            m_id (int): material_id for material document to update
            taskdoc (dict): a JSON-like task document
        """

        # For each materials property, figure out what kind of task the data is currently based on
        # as defined by the task label.  This is used to decide if the new taskdoc is a type of
        # calculation that provides higher quality data for that property
        prop_tlabels = self._materials.find_one(
            {"material_id": m_id},
            {"_tasksbuilder.prop_metadata.labels": 1
             })["_tasksbuilder"]["prop_metadata"]["labels"]

        task_label = taskdoc[
            "task_label"]  # task label of new doc that updates this material

        # figure out what materials properties need to be updated based on new task
        for x in self.property_settings:
            for p in x["properties"]:
                # check if this is a valid task for getting the property
                if task_label in x["quality_scores"]:
                    # assert: this is a valid task for the property
                    # but is it the "best" task for that property (highest quality score)?
                    t_quality = x["quality_scores"][task_label]
                    m_quality = x["quality_scores"].get(
                        prop_tlabels.get(p, None), None)
                    # check if this task's quality is better than existing data
                    # 3 possibilities:
                    # i) materials property data not present, so this is best
                    # ii) task quality higher based on task label
                    # iii) task quality equal to materials; use lowest energy task
                    if not m_quality or t_quality > m_quality \
                            or (t_quality == m_quality
                                and taskdoc["output"]["energy_per_atom"] <
                                    self._materials.find_one({"material_id": m_id}, {
                                        "_tasksbuilder": 1})["_tasksbuilder"]["prop_metadata"][
                                        "energies"][p]):

                        # this task has better quality data
                        # figure out where the property data lives in the materials doc and
                        # in the task doc
                        materials_key = "{}.{}".format(x["materials_key"], p) \
                            if x.get("materials_key") else p
                        tasks_key = "{}.{}".format(x["tasks_key"], p) \
                            if x.get("tasks_key") else p

                        # insert property data AND metadata about this task
                        self._materials.\
                            update_one({"material_id": m_id},
                                       {"$set": {materials_key: get_mongolike(taskdoc, tasks_key),
                                                 "_tasksbuilder.prop_metadata.labels.{}".format(p): task_label,
                                                 "_tasksbuilder.prop_metadata.task_ids.{}".format(p): dbid_to_str(
                                                     self._t_prefix, taskdoc["task_id"]),
                                                 "_tasksbuilder.prop_metadata.energies.{}".format(p): taskdoc["output"]["energy_per_atom"],
                                                 "_tasksbuilder.updated_at": datetime.utcnow()}})

                        # copy property to document root if in properties_root
                        # i.e., intentionally duplicate some data to the root level
                        if p in self.properties_root:
                            self._materials.\
                            update_one({"material_id": m_id},
                                       {"$set": {p: get_mongolike(taskdoc, tasks_key)}})

        # update the database to reflect that this task_id was already processed
        self._materials.update_one({"material_id": m_id}, {
            "$push": {
                "_tasksbuilder.all_task_ids":
                dbid_to_str(self._t_prefix, taskdoc["task_id"])
            }
        })
Example #7
0
    def _update_material(self, m_id, taskdoc):
        """
        Update a material document based on a new task and using complex logic

        Args:
            m_id (int): material_id for material document to update
            taskdoc (dict): a JSON-like task document
        """

        # For each materials property, figure out what kind of task the data is currently based on
        # as defined by the task label.  This is used to decide if the new taskdoc is a type of
        # calculation that provides higher quality data for that property
        prop_tlabels = self._materials.find_one(
            {"material_id": m_id}, {"_tasksbuilder.prop_metadata.labels": 1})[
            "_tasksbuilder"]["prop_metadata"]["labels"]

        task_label = taskdoc["task_label"]  # task label of new doc that updates this material

        # figure out what materials properties need to be updated based on new task
        for x in self.property_settings:
            for p in x["properties"]:
                # check if this is a valid task for getting the property
                if task_label in x["quality_scores"]:
                    # assert: this is a valid task for the property
                    # but is it the "best" task for that property (highest quality score)?
                    t_quality = x["quality_scores"][task_label]
                    m_quality = x["quality_scores"].get(prop_tlabels.get(p, None), None)
                    # check if this task's quality is better than existing data
                    # 3 possibilities:
                    # i) materials property data not present, so this is best
                    # ii) task quality higher based on task label
                    # iii) task quality equal to materials; use lowest energy task
                    if not m_quality or t_quality > m_quality \
                            or (t_quality == m_quality
                                and taskdoc["output"]["energy_per_atom"] <
                                    self._materials.find_one({"material_id": m_id}, {
                                        "_tasksbuilder": 1})["_tasksbuilder"]["prop_metadata"][
                                        "energies"][p]):

                        # this task has better quality data
                        # figure out where the property data lives in the materials doc and
                        # in the task doc
                        materials_key = "{}.{}".format(x["materials_key"], p) \
                            if x.get("materials_key") else p
                        tasks_key = "{}.{}".format(x["tasks_key"], p) \
                            if x.get("tasks_key") else p

                        # insert property data AND metadata about this task
                        self._materials.\
                            update_one({"material_id": m_id},
                                       {"$set": {materials_key: get_mongolike(taskdoc, tasks_key),
                                                 "_tasksbuilder.prop_metadata.labels.{}".format(p): task_label,
                                                 "_tasksbuilder.prop_metadata.task_ids.{}".format(p): dbid_to_str(
                                                     self._t_prefix, taskdoc["task_id"]),
                                                 "_tasksbuilder.prop_metadata.energies.{}".format(p): taskdoc["output"]["energy_per_atom"],
                                                 "_tasksbuilder.updated_at": datetime.utcnow()}})

                        # copy property to document root if in properties_root
                        # i.e., intentionally duplicate some data to the root level
                        if p in self.properties_root:
                            self._materials.\
                            update_one({"material_id": m_id},
                                       {"$set": {p: get_mongolike(taskdoc, tasks_key)}})

        # update the database to reflect that this task_id was already processed
        self._materials.update_one({"material_id": m_id},
                                   {"$push": {"_tasksbuilder.all_task_ids": dbid_to_str(
                                       self._t_prefix, taskdoc["task_id"])}})