Example #1
0
 def process_vasprun(self, dir_name, taskname, filename):
     """
     Process a vasprun.xml file.
     """
     vasprun_file = os.path.join(dir_name, filename)
     if self.parse_projected_eigen and (self.parse_projected_eigen != 'final' or \
                          taskname == self.runs[-1]):
         parse_projected_eigen = True
     else:
         parse_projected_eigen = False
     r = Vasprun(vasprun_file, parse_projected_eigen=parse_projected_eigen)
     d = r.as_dict()
     d["dir_name"] = os.path.abspath(dir_name)
     d["completed_at"] = \
         str(datetime.datetime.fromtimestamp(os.path.getmtime(
             vasprun_file)))
     d["cif"] = str(CifWriter(r.final_structure))
     d["density"] = r.final_structure.density
     if self.parse_dos and (self.parse_dos != 'final' \
                            or taskname == self.runs[-1]):
         try:
             d["dos"] = r.complete_dos.as_dict()
         except Exception:
             logger.warning(
                 "No valid dos data exist in {}.\n Skipping dos".format(
                     dir_name))
     if taskname == "relax1" or taskname == "relax2":
         d["task"] = {"type": "aflow", "name": taskname}
     else:
         d["task"] = {"type": taskname, "name": taskname}
     d["oxide_type"] = oxide_type(r.final_structure)
     return d
Example #2
0
def old_style_mat(new_style_mat):
    """
    Creates the base document for the old MP mapidoc style from the new document structure
    """

    mat = {}
    for mp, new_key in mp_conversion_dict.items():
        if has(new_style_mat, new_key):
            set_(mat, mp, get(new_style_mat, new_key))

    mat["is_ordered"] = True
    mat["is_compatible"] = True

    struc = Structure.from_dict(mat["structure"])
    mat["oxide_type"] = oxide_type(struc)
    mat["reduced_cell_formula"] = struc.composition.reduced_composition.as_dict()
    mat["unit_cell_formula"] = struc.composition.as_dict()
    mat["full_formula"] = "".join(struc.formula.split())
    vals = sorted(mat["reduced_cell_formula"].values())
    mat["anonymous_formula"] = {string.ascii_uppercase[i]: float(vals[i]) for i in range(len(vals))}
    mat["initial_structure"] = new_style_mat.get("initial_structure", None)
    mat["nsites"] = struc.get_primitive_structure().num_sites


    set_(mat, "pseudo_potential.functional", "PBE")

    set_(mat, "pseudo_potential.labels",
         [p["titel"].split()[1] for p in get(new_style_mat, "calc_settings.potcar_spec")])
    mat["ntask_ids"] = len(get(new_style_mat, "task_ids"))
    set_(mat, "pseudo_potential.pot_type", "paw")
    add_blessed_tasks(mat, new_style_mat)
    add_cifs(mat)
    check_relaxation(mat, new_style_mat)

    return mat
Example #3
0
 def process_vasprun(self, dir_name, taskname, filename):
     """
     Process a vasprun.xml file.
     """
     vasprun_file = os.path.join(dir_name, filename)
     if self.parse_projected_eigen and (self.parse_projected_eigen != 'final' or \
                          taskname == self.runs[-1]):
         parse_projected_eigen = True
     else:
         parse_projected_eigen = False
     r = Vasprun(vasprun_file,parse_projected_eigen=parse_projected_eigen)
     d = r.as_dict()
     d["dir_name"] = os.path.abspath(dir_name)
     d["completed_at"] = \
         str(datetime.datetime.fromtimestamp(os.path.getmtime(
             vasprun_file)))
     d["cif"] = str(CifWriter(r.final_structure))
     d["density"] = r.final_structure.density
     if self.parse_dos and (self.parse_dos != 'final' \
                            or taskname == self.runs[-1]):
         try:
             d["dos"] = r.complete_dos.as_dict()
         except Exception:
             logger.warning("No valid dos data exist in {}.\n Skipping dos"
                            .format(dir_name))
     if taskname == "relax1" or taskname == "relax2":
         d["task"] = {"type": "aflow", "name": taskname}
     else:
         d["task"] = {"type": taskname, "name": taskname}
     d["oxide_type"] = oxide_type(r.final_structure)
     return d
Example #4
0
    def get_correction(self, entry):
        comp = entry.composition
        if len(comp) == 1:  # Skip element entry
            return 0

        correction = 0
        # Check for sulfide corrections
        if Element("S") in comp:
            sf_type = "sulfide"
            if entry.data.get("sulfide_type"):
                sf_type = entry.data["sulfide_type"]
            elif hasattr(entry, "structure"):
                sf_type = sulfide_type(entry.structure)
            if sf_type in self.sulfide_correction:
                correction += self.sulfide_correction[sf_type] * comp["S"]

        # Check for oxide, peroxide, superoxide, and ozonide corrections.
        if Element("O") in comp:
            if self.correct_peroxide:
                if entry.data.get("oxide_type"):
                    if entry.data["oxide_type"] in self.oxide_correction:
                        ox_corr = self.oxide_correction[
                            entry.data["oxide_type"]]
                        correction += ox_corr * comp["O"]
                    if entry.data["oxide_type"] == "hydroxide":
                        ox_corr = self.oxide_correction["oxide"]
                        correction += ox_corr * comp["O"]

                elif hasattr(entry, "structure"):
                    ox_type, nbonds = oxide_type(entry.structure,
                                                 1.05,
                                                 return_nbonds=True)
                    if ox_type in self.oxide_correction:
                        correction += self.oxide_correction[ox_type] * \
                            nbonds
                    elif ox_type == "hydroxide":
                        correction += self.oxide_correction["oxide"] * \
                                      comp["O"]
                else:
                    rform = entry.composition.reduced_formula
                    if rform in UCorrection.common_peroxides:
                        correction += self.oxide_correction["peroxide"] * \
                            comp["O"]
                    elif rform in UCorrection.common_superoxides:
                        correction += self.oxide_correction["superoxide"] * \
                            comp["O"]
                    elif rform in UCorrection.ozonides:
                        correction += self.oxide_correction["ozonide"] * \
                            comp["O"]
                    elif Element("O") in comp.elements and len(comp.elements)\
                            > 1:
                        correction += self.oxide_correction['oxide'] * \
                                      comp["O"]
            else:
                correction += self.oxide_correction['oxide'] * comp["O"]

        return correction
Example #5
0
    def get_correction(self, entry):
        comp = entry.composition
        if len(comp) == 1:  # Skip element entry
            return 0

        correction = 0
        # Check for sulfide corrections
        if Element("S") in comp:
            sf_type = "sulfide"
            if entry.data.get("sulfide_type"):
                sf_type = entry.data["sulfide_type"]
            elif hasattr(entry, "structure"):
                sf_type = sulfide_type(entry.structure)
            if sf_type in self.sulfide_correction:
                correction += self.sulfide_correction[sf_type] * comp["S"]

        # Check for oxide, peroxide, superoxide, and ozonide corrections.
        if Element("O") in comp:
            if self.correct_peroxide:
                if entry.data.get("oxide_type"):
                    if entry.data["oxide_type"] in self.oxide_correction:
                        ox_corr = self.oxide_correction[
                            entry.data["oxide_type"]]
                        correction += ox_corr * comp["O"]
                    if entry.data["oxide_type"] == "hydroxide":
                        ox_corr = self.oxide_correction["oxide"]
                        correction += ox_corr * comp["O"]

                elif hasattr(entry, "structure"):
                    ox_type, nbonds = oxide_type(entry.structure, 1.05,
                                                 return_nbonds=True)
                    if ox_type in self.oxide_correction:
                        correction += self.oxide_correction[ox_type] * \
                            nbonds
                    elif ox_type == "hydroxide":
                        correction += self.oxide_correction["oxide"] * \
                                      comp["O"]
                else:
                    rform = entry.composition.reduced_formula
                    if rform in UCorrection.common_peroxides:
                        correction += self.oxide_correction["peroxide"] * \
                            comp["O"]
                    elif rform in UCorrection.common_superoxides:
                        correction += self.oxide_correction["superoxide"] * \
                            comp["O"]
                    elif rform in UCorrection.ozonides:
                        correction += self.oxide_correction["ozonide"] * \
                            comp["O"]
                    elif Element("O") in comp.elements and len(comp.elements)\
                            > 1:
                        correction += self.oxide_correction['oxide'] * \
                                      comp["O"]
            else:
                correction += self.oxide_correction['oxide'] * comp["O"]

        return correction
Example #6
0
def old_style_mat(new_style_mat):
    """
    Creates the base document for the old MP mapidoc style from the new document structure
    """

    mat = {}
    mp_conversion_dict = _settings["conversion_dict"]
    mag_types = _settings["mag_types"]

    # Uses the conversion dict to copy over values which handles the bulk of the work.
    for mp, new_key in mp_conversion_dict.items():
        if has(new_style_mat, new_key):
            set_(mat, mp, get(new_style_mat, new_key))

    # Anything coming through DFT is always ordered
    mat["is_ordered"] = True
    mat["is_compatible"] = True

    struc = Structure.from_dict(mat["structure"])
    mat["oxide_type"] = oxide_type(struc)
    mat["reduced_cell_formula"] = struc.composition.reduced_composition.as_dict(
    )
    mat["unit_cell_formula"] = struc.composition.as_dict()
    mat["full_formula"] = "".join(struc.formula.split())
    vals = sorted(mat["reduced_cell_formula"].values())
    mat["anonymous_formula"] = {
        string.ascii_uppercase[i]: float(vals[i])
        for i in range(len(vals))
    }
    mat["initial_structure"] = new_style_mat.get("initial_structure", None)
    mat["nsites"] = struc.get_primitive_structure().num_sites

    set_(mat, "pseudo_potential.functional", "PBE")

    set_(
        mat,
        "pseudo_potential.labels",
        [
            p["titel"].split()[1]
            for p in get(new_style_mat, "calc_settings.potcar_spec")
        ],
    )
    set_(mat, "pseudo_potential.pot_type", "paw")

    mat["blessed_tasks"] = {
        d["task_type"]: d["task_id"]
        for d in new_style_mat["origins"]
    }
    mat["deprecated_tasks"] = new_style_mat.get("deprecated_tasks", [])
    mat["ntask_ids"] = len(mat["task_ids"])

    return mat
Example #7
0
    def get_entries(self, chemsys):
        """
        Get all entries in a chemsys from materials

        Args:
            chemsys(str): a chemical system represented by string elements seperated by a dash (-)

        Returns:
            set(ComputedEntry): a set of entries for this system
        """

        self.logger.info("Getting entries for: {}".format(chemsys))

        new_q = dict(self.query)
        new_q["chemsys"] = {"$in": list(chemsys_permutations(chemsys))}
        fields = [
            "structure", self.materials.key, "thermo.energy",
            "unit_cell_formula", "calc_settings.is_hubbard",
            "calc_settings.hubbards", "calc_settings.potcar_spec",
            "calc_settings.run_type"
        ]
        data = list(self.materials.query(fields, new_q))

        all_entries = []

        for d in data:
            parameters = {
                "is_hubbard": d["calc_settings"]["is_hubbard"],
                "hubbards": d["calc_settings"]["hubbards"],
                "potcar_spec": d["calc_settings"]["potcar_spec"],
                "run_type": d["calc_settings"]["run_type"]
            }

            entry = ComputedEntry(Composition(d["unit_cell_formula"]),
                                  d["thermo"]["energy"],
                                  0.0,
                                  parameters=parameters,
                                  entry_id=d[self.materials.key],
                                  data={
                                      "oxide_type":
                                      oxide_type(
                                          Structure.from_dict(d["structure"]))
                                  })

            all_entries.append(entry)

        self.logger.info("Total entries in {} : {}".format(
            chemsys, len(all_entries)))

        return all_entries
Example #8
0
    def get_correction(self, entry):
        comp = entry.composition

        rform = entry.composition.reduced_formula
        if rform in self.cpd_energies:
            return self.cpd_energies[rform] * comp.num_atoms \
                - entry.uncorrected_energy

        correction = 0
        #Check for oxide, peroxide, superoxide, and ozonide corrections.
        if self.correct_peroxide:
            if len(comp) >= 2 and Element("O") in comp:
                if "oxide_type" in entry.data:
                    if entry.data["oxide_type"] in self.oxide_correction:
                        ox_corr = self.oxide_correction[
                            entry.data["oxide_type"]]
                        correction += ox_corr * comp["O"]
                    if entry.data["oxide_type"] == "hydroxide":
                        ox_corr = self.oxide_correction["oxide"]
                        correction += ox_corr * comp["O"]

                elif hasattr(entry, "structure"):
                    ox_type, nbonds = oxide_type(entry.structure, 1.05,
                                                 return_nbonds=True)
                    if ox_type in self.oxide_correction:
                        correction += self.oxide_correction[ox_type] * \
                            nbonds
                    elif ox_type == "hydroxide":
                        correction += self.oxide_correction["oxide"] * comp["O"]
                else:
                    if rform in UCorrection.common_peroxides:
                        correction += self.oxide_correction["peroxide"] * \
                            comp["O"]
                    elif rform in UCorrection.common_superoxides:
                        correction += self.oxide_correction["superoxide"] * \
                            comp["O"]
                    elif rform in UCorrection.ozonides:
                        correction += self.oxide_correction["ozonide"] * \
                            comp["O"]
                    elif Element("O") in comp.elements and len(comp.elements)\
                            > 1:
                        correction += self.oxide_correction['oxide'] * comp["O"]
        else:
            correction += self.oxide_correction['oxide'] * comp["O"]

        return correction
Example #9
0
    def entry(self):
        """ Turns a Task Doc into a ComputedEntry"""
        entry_dict = {
            "correction": 0.0,
            "entry_id": self.task_id,
            "composition": self.output.structure.composition,
            "energy": self.output.energy,
            "parameters": {
                "potcar_spec": self.input.potcar_spec,
                # This is done to be compatible with MontyEncoder for the ComputedEntry
                "run_type": str(self.run_type),
            },
            "data": {
                "oxide_type": oxide_type(self.output.structure),
                "last_updated": self.last_updated,
            },
        }

        return ComputedEntry.from_dict(entry_dict)
Example #10
0
    def get_entries(self, chemsys):
        """
        Get all entries in a chemsys from materials

        Args:
            chemsys(str): a chemical system represented by string elements seperated by a dash (-)

        Returns:
            set(ComputedEntry): a set of entries for this system
        """

        self.logger.info("Getting entries for: {}".format(chemsys))

        new_q = dict(self.query)
        new_q["chemsys"] = {"$in": list(chemsys_permutations(chemsys))}
        fields = [
            "structure", self.materials.key, "thermo.energy_per_atom",
            "composition", "calc_settings"
        ]
        data = list(self.materials.query(fields, new_q))

        all_entries = []

        for d in data:
            comp = Composition(d["composition"])
            entry = ComputedEntry(
                comp,
                d["thermo"]["energy_per_atom"] * comp.num_atoms,
                0.0,
                parameters=d["calc_settings"],
                entry_id=d[self.materials.key],
                data={
                    "oxide_type":
                    oxide_type(Structure.from_dict(d["structure"]))
                })

            all_entries.append(entry)

        self.logger.info("Total entries in {} : {}".format(
            chemsys, len(all_entries)))

        return all_entries
Example #11
0
def old_style_mat(new_mat):

    mat = {}
    for mp, new_key in mp_conversion_dict.items():
        if has(new_mat, new_key):
            set_(mat, mp, get(new_mat, new_key))

    mat["is_orderd"] = True
    mat["is_compatible"] = True

    struc = Structure.from_dict(mat["structure"])
    mat["oxide_type"] = oxide_type(struc)
    mat["reduced_cell_formula"] = struc.composition.as_dict()
    mat["full_formula"] = "".join(struc.formula.split())
    vals = sorted(mat["reduced_cell_formula"].values())
    mat["anonymous_formula"] = {
        string.ascii_uppercase[i]: float(vals[i])
        for i in range(len(vals))
    }

    set_(mat, "original_task_id", get(new_mat, "material_id"))
    set_(mat, "ntask_ids", len(get(new_mat, "task_ids")))

    set_(mat, "input.incar", get(new_mat,
                                 "inputs.structure_optimization.incar"))
    set_(mat, "input.kpoints",
         get(new_mat, "inputs.structure_optimization.kpoints"))
    set_(mat, "encut", get(new_mat,
                           "inputs.structure_optimization.incar.ENCUT"))
    mat["pseudo_potential"] = {
        "pot_type":
        "paw",
        "labels":
        get(new_mat, "input.structure_optimization.potcar.symbols"),
        "functional":
        get(new_mat, "input.structure_optimization.potcar.functional")
    }

    return mat
    def test_oxide_type(self):
        el_li = Element("Li")
        el_o = Element("O")
        latt = Lattice([[3.985034, 0.0, 0.0],
                        [0.0, 4.881506, 0.0],
                        [0.0, 0.0, 2.959824]])
        elts = [el_li, el_li, el_o, el_o, el_o, el_o]
        coords = list()
        coords.append([0.500000, 0.500000, 0.500000])
        coords.append([0.0, 0.0, 0.0])
        coords.append([0.632568, 0.085090, 0.500000])
        coords.append([0.367432, 0.914910, 0.500000])
        coords.append([0.132568, 0.414910, 0.000000])
        coords.append([0.867432, 0.585090, 0.000000])
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "superoxide")

        el_li = Element("Li")
        el_o = Element("O")
        elts = [el_li, el_o, el_o, el_o]
        latt = Lattice.from_parameters(3.999911, 3.999911, 3.999911, 133.847504,
                                       102.228244, 95.477342)
        coords = [[0.513004, 0.513004, 1.000000],
                  [0.017616, 0.017616, 0.000000],
                  [0.649993, 0.874790, 0.775203],
                  [0.099587, 0.874790, 0.224797]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "ozonide")

        latt = Lattice.from_parameters(3.159597, 3.159572, 7.685205, 89.999884,
                                       89.999674, 60.000510)
        el_li = Element("Li")
        el_o = Element("O")
        elts = [el_li, el_li, el_li, el_li, el_o, el_o, el_o, el_o]
        coords = [[0.666656, 0.666705, 0.750001],
                  [0.333342, 0.333378, 0.250001],
                  [0.000001, 0.000041, 0.500001],
                  [0.000001, 0.000021, 0.000001],
                  [0.333347, 0.333332, 0.649191],
                  [0.333322, 0.333353, 0.850803],
                  [0.666666, 0.666686, 0.350813],
                  [0.666665, 0.666684, 0.149189]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "peroxide")

        el_li = Element("Li")
        el_o = Element("O")
        el_h = Element("H")
        latt = Lattice.from_parameters(3.565276, 3.565276, 4.384277, 90.000000,
                                       90.000000, 90.000000)
        elts = [el_h, el_h, el_li, el_li, el_o, el_o]
        coords = [[0.000000, 0.500000, 0.413969],
                  [0.500000, 0.000000, 0.586031],
                  [0.000000, 0.000000, 0.000000],
                  [0.500000, 0.500000, 0.000000],
                  [0.000000, 0.500000, 0.192672],
                  [0.500000, 0.000000, 0.807328]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "hydroxide")

        el_li = Element("Li")
        el_n = Element("N")
        el_h = Element("H")
        latt = Lattice.from_parameters(3.565276, 3.565276, 4.384277, 90.000000,
                                       90.000000, 90.000000)
        elts = [el_h, el_h, el_li, el_li, el_n, el_n]
        coords = [[0.000000, 0.500000, 0.413969],
                  [0.500000, 0.000000, 0.586031],
                  [0.000000, 0.000000, 0.000000],
                  [0.500000, 0.500000, 0.000000],
                  [0.000000, 0.500000, 0.192672],
                  [0.500000, 0.000000, 0.807328]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "None")

        el_o = Element("O")
        latt = Lattice.from_parameters(4.389828, 5.369789, 5.369789, 70.786622,
                                       69.244828, 69.244828)
        elts = [el_o, el_o, el_o, el_o, el_o, el_o, el_o, el_o]
        coords = [[0.844609, 0.273459, 0.786089],
                  [0.155391, 0.213911, 0.726541],
                  [0.155391, 0.726541, 0.213911],
                  [0.844609, 0.786089, 0.273459],
                  [0.821680, 0.207748, 0.207748],
                  [0.178320, 0.792252, 0.792252],
                  [0.132641, 0.148222, 0.148222],
                  [0.867359, 0.851778, 0.851778]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "None")
Example #13
0
    def get_correction(self, entry):
        comp = entry.composition
        if len(comp) == 1:  # Skip element entry
            return 0

        correction = 0
        # Check for sulfide corrections
        if Element("S") in comp:
            sf_type = "sulfide"
            if entry.data.get("sulfide_type"):
                sf_type = entry.data["sulfide_type"]
            elif hasattr(entry, "structure"):
                sf_type = sulfide_type(entry.structure)
            if sf_type in self.sulfide_correction:
                correction += self.sulfide_correction[sf_type] * comp["S"]

        # Check for oxide, peroxide, superoxide, and ozonide corrections.
        if Element("O") in comp:
            if self.correct_peroxide:
                if entry.data.get("oxide_type"):
                    if entry.data["oxide_type"] in self.oxide_correction:
                        ox_corr = self.oxide_correction[
                            entry.data["oxide_type"]]
                        correction += ox_corr * comp["O"]
                    if entry.data["oxide_type"] == "hydroxide":
                        ox_corr = self.oxide_correction["oxide"]
                        correction += ox_corr * comp["O"]

                elif hasattr(entry, "structure"):
                    ox_type, nbonds = oxide_type(entry.structure,
                                                 1.05,
                                                 return_nbonds=True)
                    if ox_type in self.oxide_correction:
                        correction += self.oxide_correction[ox_type] * \
                                      nbonds
                    elif ox_type == "hydroxide":
                        correction += self.oxide_correction["oxide"] * \
                                      comp["O"]
                else:
                    warnings.warn(
                        "No structure or oxide_type parameter present. Note "
                        "that peroxide/superoxide corrections are not as "
                        "reliable and relies only on detection of special"
                        "formulas, e.g., Li2O2.")
                    rform = entry.composition.reduced_formula
                    if rform in UCorrection.common_peroxides:
                        correction += self.oxide_correction["peroxide"] * \
                                      comp["O"]
                    elif rform in UCorrection.common_superoxides:
                        correction += self.oxide_correction["superoxide"] * \
                                      comp["O"]
                    elif rform in UCorrection.ozonides:
                        correction += self.oxide_correction["ozonide"] * \
                                      comp["O"]
                    elif Element("O") in comp.elements and len(comp.elements) \
                            > 1:
                        correction += self.oxide_correction['oxide'] * \
                                      comp["O"]
            else:
                correction += self.oxide_correction['oxide'] * comp["O"]

        return correction
Example #14
0
    def assimilate(self, path, launches_coll=None):
        """
        Parses vasp runs. Then insert the result into the db. and return the
        task_id or doc of the insertion.

        Returns:
            If in simulate_mode, the entire doc is returned for debugging
            purposes. Else, only the task_id of the inserted doc is returned.
        """

        d = self.get_task_doc(path)
        if self.additional_fields:
            d.update(self.additional_fields)  # always add additional fields, even for failed jobs

        try:
            d["dir_name_full"] = d["dir_name"].split(":")[1]
            d["dir_name"] = get_block_part(d["dir_name_full"])
            d["stored_data"] = {}
        except:
            print 'COULD NOT GET DIR NAME'
            pprint.pprint(d)
            print traceback.format_exc()
            raise ValueError('IMPROPER PARSING OF {}'.format(path))

        if not self.simulate:
            # Perform actual insertion into db. Because db connections cannot
            # be pickled, every insertion needs to create a new connection
            # to the db.
            conn = MongoClient(self.host, self.port)
            db = conn[self.database]
            if self.user:
                db.authenticate(self.user, self.password)
            coll = db[self.collection]

            # Insert dos data into gridfs and then remove it from the dict.
            # DOS data tends to be above the 4Mb limit for mongo docs. A ref
            # to the dos file is in the dos_fs_id.
            result = coll.find_one({"dir_name": d["dir_name"]})

            if result is None or self.update_duplicates:
                if self.parse_dos and "calculations" in d:
                    for calc in d["calculations"]:
                        if "dos" in calc:
                            dos = json.dumps(calc["dos"], cls=MontyEncoder)
                            fs = gridfs.GridFS(db, "dos_fs")
                            dosid = fs.put(dos)
                            calc["dos_fs_id"] = dosid
                            del calc["dos"]

                d["last_updated"] = datetime.datetime.today()
                if result is None:
                    if ("task_id" not in d) or (not d["task_id"]):
                        d["task_id"] = "mp-{}".format(
                            db.counter.find_one_and_update(
                                {"_id": "taskid"}, {"$inc": {"c": 1}}
			    )["c"])
                    logger.info("Inserting {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))
                elif self.update_duplicates:
                    d["task_id"] = result["task_id"]
                    logger.info("Updating {} with taskid = {}"
                    .format(d["dir_name"], d["task_id"]))

                #Fireworks processing

                self.process_fw(path, d)

                try:
                    #Add oxide_type
                    struct=Structure.from_dict(d["output"]["crystal"])
                    d["oxide_type"]=oxide_type(struct)
                except:
                    logger.error("can't get oxide_type for {}".format(d["task_id"]))
                    d["oxide_type"] = None

                #Override incorrect outcar subdocs for two step relaxations
                if "optimize structure" in d['task_type'] and \
                    os.path.exists(os.path.join(path, "relax2")):
                    try:
                        run_stats = {}
                        for i in [1,2]:
                            o_path = os.path.join(path,"relax"+str(i),"OUTCAR")
                            o_path = o_path if os.path.exists(o_path) else o_path+".gz"
                            outcar = Outcar(o_path)
                            d["calculations"][i-1]["output"]["outcar"] = outcar.as_dict()
                            run_stats["relax"+str(i)] = outcar.run_stats
                    except:
                        logger.error("Bad OUTCAR for {}.".format(path))

                    try:
                        overall_run_stats = {}
                        for key in ["Total CPU time used (sec)", "User time (sec)",
                                    "System time (sec)", "Elapsed time (sec)"]:
                            overall_run_stats[key] = sum([v[key]
                                              for v in run_stats.values()])
                        run_stats["overall"] = overall_run_stats
                    except:
                        logger.error("Bad run stats for {}.".format(path))

                    d["run_stats"] = run_stats

                # add is_compatible
                mpc = MaterialsProjectCompatibility("Advanced")

                try:
                    func = d["pseudo_potential"]["functional"]
                    labels = d["pseudo_potential"]["labels"]
                    symbols = ["{} {}".format(func, label) for label in labels]
                    parameters = {"run_type": d["run_type"],
                              "is_hubbard": d["is_hubbard"],
                              "hubbards": d["hubbards"],
                              "potcar_symbols": symbols}
                    entry = ComputedEntry(Composition(d["unit_cell_formula"]),
                                          0.0, 0.0, parameters=parameters,
                                          entry_id=d["task_id"])

                    d['is_compatible'] = bool(mpc.process_entry(entry))
                except:
                    traceback.print_exc()
                    print 'ERROR in getting compatibility'
                    d['is_compatible'] = None


                #task_type dependent processing
                if 'static' in d['task_type']:
                    launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1})
                    for i in ["conventional_standard_structure", "symmetry_operations",
                              "symmetry_dataset", "refined_structure"]:
                        try:
                            d['stored_data'][i] = launch_doc['action']['stored_data'][i]
                        except:
                            pass

                #parse band structure if necessary
                if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\
                    and d['state'] == 'successful':
                    launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}},
                                                        {"action.stored_data": 1})
                    vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=False)

                    if 'band structure' in d['task_type']:
                        def string_to_numlist(stringlist):
                            g=re.search('([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist)
                            return [float(g.group(i)) for i in range(1,4)]

                        for i in ["kpath_name", "kpath"]:
                            d['stored_data'][i] = launch_doc['action']['stored_data'][i]
                        kpoints_doc = d['stored_data']['kpath']['kpoints']
                        for i in kpoints_doc:
                            kpoints_doc[i]=string_to_numlist(kpoints_doc[i])
                        bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'],
                                                       line_mode=True)
                    else:
                        bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'],
                                                       line_mode=False)
                    bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder)
                    fs = gridfs.GridFS(db, "band_structure_fs")
                    bs_id = fs.put(bs_json)
                    d['calculations'][0]["band_structure_fs_id"] = bs_id

                    # also override band gap in task doc
                    gap = bs.get_band_gap()
                    vbm = bs.get_vbm()
                    cbm = bs.get_cbm()
                    update_doc = {'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct']}
                    d['analysis'].update(update_doc)
                    d['calculations'][0]['output'].update(update_doc)

		coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True)

                return d["task_id"], d
            else:
                logger.info("Skipping duplicate {}".format(d["dir_name"]))
                return result["task_id"], result

        else:
            d["task_id"] = 0
            logger.info("Simulated insert into database for {} with task_id {}"
            .format(d["dir_name"], d["task_id"]))
            return 0, d
Example #15
0
    def get_entries(self, chemsys):
        """
        Get all entries in a chemsys from materials

        Args:
            chemsys(str): a chemical system represented by string elements seperated by a dash (-)

        Returns:
            set(ComputedEntry): a set of entries for this system
        """

        self.logger.info("Getting entries for: {}".format(chemsys))

        # First check the cache
        all_chemsys = chemsys_permutations(chemsys)
        cached_chemsys = all_chemsys & set(self.entries_cache.keys())
        query_chemsys = all_chemsys - cached_chemsys

        self.logger.debug("Getting {} entries from cache for {}".format(
            len(cached_chemsys), chemsys))

        # Query for any chemsys we don't have
        new_q = dict(self.query)
        new_q["chemsys"] = {"$in": list(query_chemsys)}
        new_q["deprecated"] = False

        fields = [
            "structure",
            self.materials.key,
            "thermo.energy_per_atom",
            "composition",
            "calc_settings",
            "_sbxn",
        ]
        data = list(self.materials.query(properties=fields, criteria=new_q))

        # Start with entries from cache
        all_entries = list(
            chain.from_iterable(self.entries_cache[c] for c in cached_chemsys))

        for d in data:
            comp = Composition(d["composition"])
            entry = ComputedEntry(
                comp,
                d["thermo"]["energy_per_atom"] * comp.num_atoms,
                0.0,
                parameters=d["calc_settings"],
                entry_id=d[self.materials.key],
                data={
                    "oxide_type":
                    oxide_type(Structure.from_dict(d["structure"])),
                    "_sbxn": d.get("_sbxn", []),
                },
            )

            # Add to cache
            elsyms = sorted(set([el.symbol for el in comp.elements]))
            self.entries_cache["-".join(elsyms)].append(entry)

            all_entries.append(entry)

        self.logger.info("Total entries in {} : {}".format(
            chemsys, len(all_entries)))

        return all_entries
Example #16
0
    def get_correction(self, entry):
        comp = entry.composition
        if len(comp) == 1:  # Skip element entry
            return 0

        correction = 0
        # Check for sulfide corrections
        if Element("S") in comp:
            sf_type = "sulfide"
            if entry.data.get("sulfide_type"):
                sf_type = entry.data["sulfide_type"]
            elif hasattr(entry, "structure"):
                sf_type = sulfide_type(entry.structure)
            if sf_type in self.sulfide_correction:
                correction += self.sulfide_correction[sf_type] * comp["S"]

        # Check for oxide, peroxide, superoxide, and ozonide corrections.
        if Element("O") in comp:
            if self.correct_peroxide:
                if entry.data.get("oxide_type"):
                    if entry.data["oxide_type"] in self.oxide_correction:
                        ox_corr = self.oxide_correction[
                            entry.data["oxide_type"]]
                        correction += ox_corr * comp["O"]
                    if entry.data["oxide_type"] == "hydroxide":
                        ox_corr = self.oxide_correction["oxide"]
                        correction += ox_corr * comp["O"]

                elif hasattr(entry, "structure"):
                    ox_type, nbonds = oxide_type(entry.structure, 1.05,
                                                 return_nbonds=True)
                    if ox_type in self.oxide_correction:
                        correction += self.oxide_correction[ox_type] * \
                            nbonds
                    elif ox_type == "hydroxide":
                        correction += self.oxide_correction["oxide"] * \
                                      comp["O"]
                else:
                    warnings.warn(
                        "No structure or oxide_type parameter present. Note"
                        "that peroxide/superoxide corrections are not as "
                        "reliable and relies only on detection of special"
                        "formulas, e.g., Li2O2.")
                    rform = entry.composition.reduced_formula
                    if rform in UCorrection.common_peroxides:
                        correction += self.oxide_correction["peroxide"] * \
                            comp["O"]
                    elif rform in UCorrection.common_superoxides:
                        correction += self.oxide_correction["superoxide"] * \
                            comp["O"]
                    elif rform in UCorrection.ozonides:
                        correction += self.oxide_correction["ozonide"] * \
                            comp["O"]
                    elif Element("O") in comp.elements and len(comp.elements)\
                            > 1:
                        correction += self.oxide_correction['oxide'] * \
                                      comp["O"]
            else:
                correction += self.oxide_correction['oxide'] * comp["O"]

        return correction
Example #17
0
def list_oxide_type(list_struc):
    return [
        oxide_type(structure, relative_cutoff=1.2, return_nbonds=True)
        for structure in list_struc
    ]
Example #18
0
    def test_oxide_type(self):
        el_li = Element("Li")
        el_o = Element("O")
        latt = Lattice([[3.985034, 0.0, 0.0],
                        [0.0, 4.881506, 0.0],
                        [0.0, 0.0, 2.959824]])
        elts = [el_li, el_li, el_o, el_o, el_o, el_o]
        coords = list()
        coords.append([0.500000, 0.500000, 0.500000])
        coords.append([0.0, 0.0, 0.0])
        coords.append([0.632568, 0.085090, 0.500000])
        coords.append([0.367432, 0.914910, 0.500000])
        coords.append([0.132568, 0.414910, 0.000000])
        coords.append([0.867432, 0.585090, 0.000000])
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "superoxide")

        el_li = Element("Li")
        el_o = Element("O")
        elts = [el_li, el_o, el_o, el_o]
        latt = Lattice.from_parameters(3.999911, 3.999911, 3.999911, 133.847504,
                                       102.228244, 95.477342)
        coords = [[0.513004, 0.513004, 1.000000],
                  [0.017616, 0.017616, 0.000000],
                  [0.649993, 0.874790, 0.775203],
                  [0.099587, 0.874790, 0.224797]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "ozonide")

        latt = Lattice.from_parameters(3.159597, 3.159572, 7.685205, 89.999884,
                                       89.999674, 60.000510)
        el_li = Element("Li")
        el_o = Element("O")
        elts = [el_li, el_li, el_li, el_li, el_o, el_o, el_o, el_o]
        coords = [[0.666656, 0.666705, 0.750001],
                  [0.333342, 0.333378, 0.250001],
                  [0.000001, 0.000041, 0.500001],
                  [0.000001, 0.000021, 0.000001],
                  [0.333347, 0.333332, 0.649191],
                  [0.333322, 0.333353, 0.850803],
                  [0.666666, 0.666686, 0.350813],
                  [0.666665, 0.666684, 0.149189]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "peroxide")

        el_li = Element("Li")
        el_o = Element("O")
        el_h = Element("H")
        latt = Lattice.from_parameters(3.565276, 3.565276, 4.384277, 90.000000,
                                       90.000000, 90.000000)
        elts = [el_h, el_h, el_li, el_li, el_o, el_o]
        coords = [[0.000000, 0.500000, 0.413969],
                  [0.500000, 0.000000, 0.586031],
                  [0.000000, 0.000000, 0.000000],
                  [0.500000, 0.500000, 0.000000],
                  [0.000000, 0.500000, 0.192672],
                  [0.500000, 0.000000, 0.807328]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "hydroxide")

        el_li = Element("Li")
        el_n = Element("N")
        el_h = Element("H")
        latt = Lattice.from_parameters(3.565276, 3.565276, 4.384277, 90.000000,
                                       90.000000, 90.000000)
        elts = [el_h, el_h, el_li, el_li, el_n, el_n]
        coords = [[0.000000, 0.500000, 0.413969],
                  [0.500000, 0.000000, 0.586031],
                  [0.000000, 0.000000, 0.000000],
                  [0.500000, 0.500000, 0.000000],
                  [0.000000, 0.500000, 0.192672],
                  [0.500000, 0.000000, 0.807328]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "None")

        el_o = Element("O")
        latt = Lattice.from_parameters(4.389828, 5.369789, 5.369789, 70.786622,
                                       69.244828, 69.244828)
        elts = [el_o, el_o, el_o, el_o, el_o, el_o, el_o, el_o]
        coords = [[0.844609, 0.273459, 0.786089],
                  [0.155391, 0.213911, 0.726541],
                  [0.155391, 0.726541, 0.213911],
                  [0.844609, 0.786089, 0.273459],
                  [0.821680, 0.207748, 0.207748],
                  [0.178320, 0.792252, 0.792252],
                  [0.132641, 0.148222, 0.148222],
                  [0.867359, 0.851778, 0.851778]]
        struct = Structure(latt, elts, coords)
        self.assertEqual(oxide_type(struct, 1.1), "None")
Example #19
0
    def assimilate(self, path, launches_coll=None):
        """
        Parses vasp runs. Then insert the result into the db. and return the
        task_id or doc of the insertion.

        Returns:
            If in simulate_mode, the entire doc is returned for debugging
            purposes. Else, only the task_id of the inserted doc is returned.
        """

        d = self.get_task_doc(path)
        if self.additional_fields:
            d.update(self.additional_fields
                     )  # always add additional fields, even for failed jobs

        try:
            d["dir_name_full"] = d["dir_name"].split(":")[1]
            d["dir_name"] = get_block_part(d["dir_name_full"])
            d["stored_data"] = {}
        except:
            print 'COULD NOT GET DIR NAME'
            pprint.pprint(d)
            print traceback.format_exc()
            raise ValueError('IMPROPER PARSING OF {}'.format(path))

        if not self.simulate:
            # Perform actual insertion into db. Because db connections cannot
            # be pickled, every insertion needs to create a new connection
            # to the db.
            conn = MongoClient(self.host, self.port)
            db = conn[self.database]
            if self.user:
                db.authenticate(self.user, self.password)
            coll = db[self.collection]

            # Insert dos data into gridfs and then remove it from the dict.
            # DOS data tends to be above the 4Mb limit for mongo docs. A ref
            # to the dos file is in the dos_fs_id.
            result = coll.find_one({"dir_name": d["dir_name"]})

            if result is None or self.update_duplicates:
                if self.parse_dos and "calculations" in d:
                    for calc in d["calculations"]:
                        if "dos" in calc:
                            dos = json.dumps(calc["dos"], cls=MontyEncoder)
                            fs = gridfs.GridFS(db, "dos_fs")
                            dosid = fs.put(dos)
                            calc["dos_fs_id"] = dosid
                            del calc["dos"]

                d["last_updated"] = datetime.datetime.today()
                if result is None:
                    if ("task_id" not in d) or (not d["task_id"]):
                        d["task_id"] = "mp-{}".format(
                            db.counter.find_one_and_update({"_id": "taskid"},
                                                           {"$inc": {
                                                               "c": 1
                                                           }})["c"])
                    logger.info("Inserting {} with taskid = {}".format(
                        d["dir_name"], d["task_id"]))
                elif self.update_duplicates:
                    d["task_id"] = result["task_id"]
                    logger.info("Updating {} with taskid = {}".format(
                        d["dir_name"], d["task_id"]))

                #Fireworks processing

                self.process_fw(path, d)

                try:
                    #Add oxide_type
                    struct = Structure.from_dict(d["output"]["crystal"])
                    d["oxide_type"] = oxide_type(struct)
                except:
                    logger.error("can't get oxide_type for {}".format(
                        d["task_id"]))
                    d["oxide_type"] = None

                #Override incorrect outcar subdocs for two step relaxations
                if "optimize structure" in d['task_type'] and \
                    os.path.exists(os.path.join(path, "relax2")):
                    try:
                        run_stats = {}
                        for i in [1, 2]:
                            o_path = os.path.join(path, "relax" + str(i),
                                                  "OUTCAR")
                            o_path = o_path if os.path.exists(
                                o_path) else o_path + ".gz"
                            outcar = Outcar(o_path)
                            d["calculations"][
                                i - 1]["output"]["outcar"] = outcar.as_dict()
                            run_stats["relax" + str(i)] = outcar.run_stats
                    except:
                        logger.error("Bad OUTCAR for {}.".format(path))

                    try:
                        overall_run_stats = {}
                        for key in [
                                "Total CPU time used (sec)", "User time (sec)",
                                "System time (sec)", "Elapsed time (sec)"
                        ]:
                            overall_run_stats[key] = sum(
                                [v[key] for v in run_stats.values()])
                        run_stats["overall"] = overall_run_stats
                    except:
                        logger.error("Bad run stats for {}.".format(path))

                    d["run_stats"] = run_stats

                # add is_compatible
                mpc = MaterialsProjectCompatibility("Advanced")

                try:
                    func = d["pseudo_potential"]["functional"]
                    labels = d["pseudo_potential"]["labels"]
                    symbols = ["{} {}".format(func, label) for label in labels]
                    parameters = {
                        "run_type": d["run_type"],
                        "is_hubbard": d["is_hubbard"],
                        "hubbards": d["hubbards"],
                        "potcar_symbols": symbols
                    }
                    entry = ComputedEntry(Composition(d["unit_cell_formula"]),
                                          0.0,
                                          0.0,
                                          parameters=parameters,
                                          entry_id=d["task_id"])

                    d['is_compatible'] = bool(mpc.process_entry(entry))
                except:
                    traceback.print_exc()
                    print 'ERROR in getting compatibility'
                    d['is_compatible'] = None

                #task_type dependent processing
                if 'static' in d['task_type']:
                    launch_doc = launches_coll.find_one(
                        {
                            "fw_id": d['fw_id'],
                            "launch_dir": {
                                "$regex": d["dir_name"]
                            }
                        }, {"action.stored_data": 1})
                    for i in [
                            "conventional_standard_structure",
                            "symmetry_operations", "symmetry_dataset",
                            "refined_structure"
                    ]:
                        try:
                            d['stored_data'][i] = launch_doc['action'][
                                'stored_data'][i]
                        except:
                            pass

                #parse band structure if necessary
                if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\
                    and d['state'] == 'successful':
                    launch_doc = launches_coll.find_one(
                        {
                            "fw_id": d['fw_id'],
                            "launch_dir": {
                                "$regex": d["dir_name"]
                            }
                        }, {"action.stored_data": 1})
                    vasp_run = Vasprun(zpath(os.path.join(path,
                                                          "vasprun.xml")),
                                       parse_projected_eigen=True)

                    if 'band structure' in d['task_type']:

                        def string_to_numlist(stringlist):
                            g = re.search(
                                '([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)',
                                stringlist)
                            return [float(g.group(i)) for i in range(1, 4)]

                        for i in ["kpath_name", "kpath"]:
                            d['stored_data'][i] = launch_doc['action'][
                                'stored_data'][i]
                        kpoints_doc = d['stored_data']['kpath']['kpoints']
                        for i in kpoints_doc:
                            if isinstance(kpoints_doc[i], six.string_types):
                                kpoints_doc[i] = string_to_numlist(
                                    kpoints_doc[i])
                        bs = vasp_run.get_band_structure(
                            efermi=d['calculations'][0]['output']['outcar']
                            ['efermi'],
                            line_mode=True)
                    else:
                        bs = vasp_run.get_band_structure(
                            efermi=d['calculations'][0]['output']['outcar']
                            ['efermi'],
                            line_mode=False)
                    bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder)
                    fs = gridfs.GridFS(db, "band_structure_fs")
                    bs_id = fs.put(bs_json)
                    d['calculations'][0]["band_structure_fs_id"] = bs_id

                    # also override band gap in task doc
                    gap = bs.get_band_gap()
                    vbm = bs.get_vbm()
                    cbm = bs.get_cbm()
                    update_doc = {
                        'bandgap': gap['energy'],
                        'vbm': vbm['energy'],
                        'cbm': cbm['energy'],
                        'is_gap_direct': gap['direct']
                    }
                    d['analysis'].update(update_doc)
                    d['calculations'][0]['output'].update(update_doc)

                coll.update_one({"dir_name": d["dir_name"]}, {'$set': d},
                                upsert=True)

                return d["task_id"], d
            else:
                logger.info("Skipping duplicate {}".format(d["dir_name"]))
                return result["task_id"], result

        else:
            d["task_id"] = 0
            logger.info(
                "Simulated insert into database for {} with task_id {}".format(
                    d["dir_name"], d["task_id"]))
            return 0, d