예제 #1
0
    def _parse_criteria(self, criteria):
        """
        Internal method to perform mapping of criteria to proper mongo queries
        using aliases, as well as some useful sanitization. For example, string
        formulas such as "Fe2O3" are auto-converted to proper mongo queries of
        {"Fe":2, "O":3}.

        If 'criteria' is None, returns an empty dict. Putting this logic here
        simplifies callers and allows subclasses to insert something even
        when there are no criteria.
        """
        if criteria is None:
            return dict()
        parsed_crit = dict()
        for k, v in self.default_criteria.items():
            if k not in criteria:
                parsed_crit[self.aliases.get(k, k)] = v

        for key, crit in list(criteria.items()):
            if key in ["normalized_formula", "reduced_cell_formula"]:
                comp = Composition(crit)
                parsed_crit["pretty_formula"] = comp.reduced_formula
            elif key == "unit_cell_formula":
                comp = Composition(crit)
                crit = comp.as_dict()
                for el, amt in crit.items():
                    parsed_crit["{}.{}".format(self.aliases[key], el)] = amt
                parsed_crit["nelements"] = len(crit)
                parsed_crit['pretty_formula'] = comp.reduced_formula
            elif key in ["$or", "$and"]:
                parsed_crit[key] = [self._parse_criteria(m) for m in crit]
            else:
                parsed_crit[self.aliases.get(key, key)] = crit
        return parsed_crit
예제 #2
0
    def _parse_criteria(self, criteria):
        """
        Internal method to perform mapping of criteria to proper mongo queries
        using aliases, as well as some useful sanitization. For example, string
        formulas such as "Fe2O3" are auto-converted to proper mongo queries of
        {"Fe":2, "O":3}.

        If 'criteria' is None, returns an empty dict. Putting this logic here
        simplifies callers and allows subclasses to insert something even
        when there are no criteria.
        """
        if criteria is None:
            return dict()
        parsed_crit = dict()
        for k, v in self.defaults.items():
            if k not in criteria:
                parsed_crit[self.aliases.get(k, k)] = v

        for key, crit in list(criteria.items()):
            if key in ["normalized_formula", "reduced_cell_formula"]:
                comp = Composition(crit)
                parsed_crit["pretty_formula"] = comp.reduced_formula
            elif key == "unit_cell_formula":
                comp = Composition(crit)
                crit = comp.as_dict()
                for el, amt in crit.items():
                    parsed_crit["{}.{}".format(self.aliases[key], el)] = amt
                parsed_crit["nelements"] = len(crit)
                parsed_crit['pretty_formula'] = comp.reduced_formula
            elif key in ["$or", "$and"]:
                parsed_crit[key] = [self._parse_criteria(m) for m in crit]
            else:
                parsed_crit[self.aliases.get(key, key)] = crit
        return parsed_crit
예제 #3
0
def id_mobile_ion(formula):
    """id_mobile_ion
    Given a formula, identifies the mobile ion present, if any.
    :param formula: Formula to fetch working ion from.
    """
    c = Composition(formula)
    wi = [el for el in c.as_dict() if el in mobile_ion]
    return wi[0]
예제 #4
0
def get_composition_from_string(comp_str):
    """validate and return composition from string `comp_str`."""
    from pymatgen import Composition, Element
    comp = Composition(comp_str)
    for element in comp.elements:
        Element(element)
    formula = comp.get_integer_formula_and_factor()[0]
    comp = Composition(formula)
    return ''.join([
        '{}{}'.format(key, int(value) if value > 1 else '')
        for key, value in comp.as_dict().items()
    ])
예제 #5
0
def get_composition_from_string(comp_str):
    """validate and return composition from string `comp_str`."""
    from pymatgen import Composition, Element
    comp = Composition(comp_str)
    for element in comp.elements:
        Element(element)
    formula = comp.get_integer_formula_and_factor()[0]
    comp = Composition(formula)
    return ''.join([
        '{}{}'.format(key,
                      int(value) if value > 1 else '')
        for key, value in comp.as_dict().items()
    ])
예제 #6
0
def get_qmpy_formation_energy(total_e, formula, n_atoms):
    """
    Helper function to computer qmpy-compatible formation
    energy using reference energies extracted from OQMD

    Args:
        total_e (float): total energy
        formula (str): chemical formula
        n_atoms (int): number of atoms

    Returns:
        (float): qmpy-compatible formation energy

    """
    composition = Composition(formula).fractional_composition
    energy = total_e / n_atoms
    for element, weight in composition.as_dict().items():
        energy -= QMPY_REFERENCES[element] * weight
        if (element in QMPY_REFERENCES_HUBBARD) and ("O" in composition):
            energy -= QMPY_REFERENCES_HUBBARD[element] * weight
    return energy
예제 #7
0
    def compute_corrections(self, exp_entries: list,
                            calc_entries: dict) -> dict:
        """
        Computes the corrections and fills in correction, corrections_std_error, and corrections_dict.

        Args:
            exp_entries: list of dictionary objects with the following keys/values:
                    {"formula": chemical formula, "exp energy": formation energy in eV/formula unit,
                    "uncertainty": uncertainty in formation energy}
            calc_entries: dictionary of computed entries, of the form {chemical formula: ComputedEntry}

        Raises:
            ValueError: calc_compounds is missing an entry
        """

        self.exp_compounds = exp_entries
        self.calc_compounds = calc_entries

        self.names: List[str] = []
        self.diffs: List[float] = []
        self.coeff_mat: List[List[float]] = []
        self.exp_uncer: List[float] = []

        # remove any corrections in calc_compounds
        for entry in self.calc_compounds.values():
            entry.correction = 0

        for cmpd_info in self.exp_compounds:

            # to get consistent element ordering in formula
            name = Composition(cmpd_info["formula"]).reduced_formula

            allow = True

            compound = self.calc_compounds.get(name, None)
            if not compound:
                warnings.warn(
                    "Compound {} is not found in provided computed entries and is excluded from the fit"
                    .format(name))
                continue

            # filter out compounds with large uncertainties
            relative_uncertainty = abs(cmpd_info["uncertainty"] /
                                       cmpd_info["exp energy"])
            if relative_uncertainty > self.max_error:
                allow = False
                warnings.warn(
                    "Compound {} is excluded from the fit due to high experimental uncertainty ({}%)"
                    .format(name, relative_uncertainty))

            # filter out compounds containing certain polyanions
            for anion in self.exclude_polyanions:
                if anion in name or anion in cmpd_info["formula"]:
                    allow = False
                    warnings.warn(
                        "Compound {} contains the polyanion {} and is excluded from the fit"
                        .format(name, anion))
                    break

            # filter out compounds that are unstable
            if isinstance(self.allow_unstable, float):
                try:
                    eah = compound.data["e_above_hull"]
                except KeyError:
                    raise ValueError("Missing e above hull data")
                if eah > self.allow_unstable:
                    allow = False
                    warnings.warn(
                        "Compound {} is unstable and excluded from the fit (e_above_hull = {})"
                        .format(name, eah))

            if allow:
                comp = Composition(name)
                elems = list(comp.as_dict())

                reactants = []
                for elem in elems:
                    try:
                        elem_name = Composition(elem).reduced_formula
                        reactants.append(self.calc_compounds[elem_name])
                    except KeyError:
                        raise ValueError("Computed entries missing " + elem)

                rxn = ComputedReaction(reactants, [compound])
                rxn.normalize_to(comp)
                energy = rxn.calculated_reaction_energy

                coeff = []
                for specie in self.species:
                    if specie == "oxide":
                        if compound.data["oxide_type"] == "oxide":
                            coeff.append(comp["O"])
                            self.oxides.append(name)
                        else:
                            coeff.append(0)
                    elif specie == "peroxide":
                        if compound.data["oxide_type"] == "peroxide":
                            coeff.append(comp["O"])
                            self.peroxides.append(name)
                        else:
                            coeff.append(0)
                    elif specie == "superoxide":
                        if compound.data["oxide_type"] == "superoxide":
                            coeff.append(comp["O"])
                            self.superoxides.append(name)
                        else:
                            coeff.append(0)
                    elif specie == "S":
                        if Element("S") in comp:
                            sf_type = "sulfide"
                            if compound.data.get("sulfide_type"):
                                sf_type = compound.data["sulfide_type"]
                            elif hasattr(compound, "structure"):
                                sf_type = sulfide_type(compound.structure)
                            if sf_type == "sulfide":
                                coeff.append(comp["S"])
                                self.sulfides.append(name)
                            else:
                                coeff.append(0)
                        else:
                            coeff.append(0)
                    else:
                        try:
                            coeff.append(comp[specie])
                        except ValueError:
                            raise ValueError(
                                "We can't detect this specie: {}".format(
                                    specie))

                self.names.append(name)
                self.diffs.append(
                    (cmpd_info["exp energy"] - energy) / comp.num_atoms)
                self.coeff_mat.append([i / comp.num_atoms for i in coeff])
                self.exp_uncer.append(
                    (cmpd_info["uncertainty"]) / comp.num_atoms)

        # for any exp entries with no uncertainty value, assign average uncertainty value
        sigma = np.array(self.exp_uncer)
        sigma[sigma == 0] = np.nan

        with warnings.catch_warnings():
            warnings.simplefilter(
                "ignore", category=RuntimeWarning
            )  # numpy raises warning if the entire array is nan values
            mean_uncer = np.nanmean(sigma)

        sigma = np.where(np.isnan(sigma), mean_uncer, sigma)

        if np.isnan(mean_uncer):
            # no uncertainty values for any compounds, don't try to weight
            popt, self.pcov = curve_fit(_func,
                                        self.coeff_mat,
                                        self.diffs,
                                        p0=np.ones(len(self.species)))
        else:
            popt, self.pcov = curve_fit(
                _func,
                self.coeff_mat,
                self.diffs,
                p0=np.ones(len(self.species)),
                sigma=sigma,
                absolute_sigma=True,
            )
        self.corrections = popt.tolist()
        self.corrections_std_error = np.sqrt(np.diag(self.pcov)).tolist()
        for i in range(len(self.species)):
            self.corrections_dict[self.species[i]] = (
                round(self.corrections[i], 3),
                round(self.corrections_std_error[i], 4),
            )
        return self.corrections_dict
    #bandgap_str = formula+','+str(bandgap[0])
    out = list(out)

    #print(out)
    df_avial["Eg"]=out
    df_avial.to_csv("roost_screening_result_exp1_208_m9.csv", index=False, header=True)

    df_bandgap = pd.read_csv('./roost_screening_result_exp2_127_m211.csv')
    """
    3. discard item whose band gap value < 6
    """
    df_filtered = df_bandgap[df_bandgap["Eg"] >= 6]


    num_atom_l = []
    num_elements_l = []
    for c in df_filtered["composition"]:
        c = Composition(c)
        num_elements = len(c.as_dict())
        num_atom = c.num_atoms
        num_atom_l.append(num_atom)
        num_elements_l.append(num_elements)
    df_filtered["num_atom"] = num_atom_l
    df_filtered["# of elements"] = num_elements_l

    df_filtered = df_filtered[df_filtered["num_atom"] < 8]
    df_filtered = df_filtered[df_filtered["# of elements"] < 4]

    df_filtered.to_csv("roost_simple_candidate_exp1_M9.csv", index=False, header=True)
    print('success!')
예제 #9
0
def get_qmpy_formation_energy(total_e, formula, n_atoms):
    mus = {
        u'Ac': -4.1060035325,
        u'Ag': -2.8217729525,
        u'Al': -3.74573946,
        u'Ar': -0.00636995,
        u'As': -4.651918435,
        u'Au': -3.26680174,
        u'B': -6.67796758,
        u'Ba': -1.92352708,
        u'Be': -3.75520865,
        u'Bi': -4.038931855,
        u'Br': -1.31759562258416,
        u'C': -9.2170759925,
        u'Ca': -1.977817,
        u'Cd': -0.90043514,
        u'Ce': -4.7771708225,
        u'Cl': -1.47561368438088,
        u'Co': -7.089565,
        u'Cr': -9.50844998,
        u'Cs': -0.85462775,
        u'Cu': -3.7159594,
        u'Dy': -4.60150328333333,
        u'Er': -4.56334055,
        u'Eu': -1.8875732,
        u'F': -1.45692429086889,
        u'Fe': -8.3078978,
        u'Ga': -3.031846515,
        u'Gd': -4.6550712925,
        u'Ge': -4.623692585,
        u'H': -3.38063384781582,
        u'He': -0.004303435,
        u'Hf': -9.955368785,
        u'Hg': -0.358963825033731,
        u'Ho': -4.57679364666667,
        u'I': -1.35196205757168,
        u'In': -2.71993876,
        u'Ir': -8.8549203,
        u'K': -1.096699335,
        u'Kr': -0.004058825,
        u'La': -4.93543556,
        u'Li': -1.89660627,
        u'Lu': -4.524181525,
        u'Mg': -1.54251595083333,
        u'Mn': -9.0269032462069,
        u'Mo': -10.8480839,
        u'N': -8.11974103465649,
        u'Na': -1.19920373914835,
        u'Nb': -10.09391206,
        u'Nd': -4.762916335,
        u'Ne': -0.02931791,
        u'Ni': -5.56661952,
        u'Np': -12.94027372125,
        u'O': -4.52329546412125,
        u'Os': -11.22597601,
        u'P': -5.15856496104006,
        u'Pa': -9.49577589,
        u'Pb': -3.70396484,
        u'Pd': -5.17671826,
        u'Pm': -4.7452352875,
        u'Pr': -4.7748066125,
        u'Pt': -6.05575959,
        u'Pu': -14.29838348,
        u'Rb': -0.9630733,
        u'Re': -12.422818875,
        u'Rh': -7.26940476,
        u'Ru': -9.2019888,
        u'S': -3.83888286598664,
        u'Sb': -4.117563025,
        u'Sc': -6.328367185,
        u'Se': -3.48117276,
        u'Si': -5.424892535,
        u'Sm': -4.7147675825,
        u'Sn': -3.9140929231488,
        u'Sr': -1.6829138,
        u'Ta': -11.85252937,
        u'Tb': -5.28775675533333,
        u'Tc': -10.360747885,
        u'Te': -3.14184237666667,
        u'Th': -7.41301719,
        u'Ti': -7.69805778621374,
        u'Tl': -2.359420025,
        u'Tm': -4.47502416,
        u'U': -11.292348705,
        u'V': -8.94097896,
        u'W': -12.96020695,
        u'Xe': 0.00306349,
        u'Y': -6.464420635,
        u'Yb': -1.51277545,
        u'Zn': -1.2660268,
        u'Zr': -8.54717235
    }
    hubbard_mus = {
        u'Co': 2.0736240219357,
        u'Cr': 2.79591214925926,
        u'Cu': 1.457571831687,
        u'Fe': 2.24490453841424,
        u'Mn': 2.08652912841877,
        u'Ni': 2.56766185643768,
        u'Np': 2.77764768949249,
        u'Pu': 2.2108747749433,
        u'Th': 1.06653674624248,
        u'U': 2.57513786752409,
        u'V': 2.67812162528461
    }

    from pymatgen import Composition
    c = Composition(formula).fractional_composition
    e = total_e / n_atoms
    for k, v in c.as_dict().items():
        e -= mus[k] * v
        if (k in hubbard_mus) and ('O' in c):
            e -= hubbard_mus[k] * v
    return e