def _parse_criteria(self, criteria): """ Internal method to perform mapping of criteria to proper mongo queries using aliases, as well as some useful sanitization. For example, string formulas such as "Fe2O3" are auto-converted to proper mongo queries of {"Fe":2, "O":3}. If 'criteria' is None, returns an empty dict. Putting this logic here simplifies callers and allows subclasses to insert something even when there are no criteria. """ if criteria is None: return dict() parsed_crit = dict() for k, v in self.default_criteria.items(): if k not in criteria: parsed_crit[self.aliases.get(k, k)] = v for key, crit in list(criteria.items()): if key in ["normalized_formula", "reduced_cell_formula"]: comp = Composition(crit) parsed_crit["pretty_formula"] = comp.reduced_formula elif key == "unit_cell_formula": comp = Composition(crit) crit = comp.as_dict() for el, amt in crit.items(): parsed_crit["{}.{}".format(self.aliases[key], el)] = amt parsed_crit["nelements"] = len(crit) parsed_crit['pretty_formula'] = comp.reduced_formula elif key in ["$or", "$and"]: parsed_crit[key] = [self._parse_criteria(m) for m in crit] else: parsed_crit[self.aliases.get(key, key)] = crit return parsed_crit
def _parse_criteria(self, criteria): """ Internal method to perform mapping of criteria to proper mongo queries using aliases, as well as some useful sanitization. For example, string formulas such as "Fe2O3" are auto-converted to proper mongo queries of {"Fe":2, "O":3}. If 'criteria' is None, returns an empty dict. Putting this logic here simplifies callers and allows subclasses to insert something even when there are no criteria. """ if criteria is None: return dict() parsed_crit = dict() for k, v in self.defaults.items(): if k not in criteria: parsed_crit[self.aliases.get(k, k)] = v for key, crit in list(criteria.items()): if key in ["normalized_formula", "reduced_cell_formula"]: comp = Composition(crit) parsed_crit["pretty_formula"] = comp.reduced_formula elif key == "unit_cell_formula": comp = Composition(crit) crit = comp.as_dict() for el, amt in crit.items(): parsed_crit["{}.{}".format(self.aliases[key], el)] = amt parsed_crit["nelements"] = len(crit) parsed_crit['pretty_formula'] = comp.reduced_formula elif key in ["$or", "$and"]: parsed_crit[key] = [self._parse_criteria(m) for m in crit] else: parsed_crit[self.aliases.get(key, key)] = crit return parsed_crit
def id_mobile_ion(formula): """id_mobile_ion Given a formula, identifies the mobile ion present, if any. :param formula: Formula to fetch working ion from. """ c = Composition(formula) wi = [el for el in c.as_dict() if el in mobile_ion] return wi[0]
def get_composition_from_string(comp_str): """validate and return composition from string `comp_str`.""" from pymatgen import Composition, Element comp = Composition(comp_str) for element in comp.elements: Element(element) formula = comp.get_integer_formula_and_factor()[0] comp = Composition(formula) return ''.join([ '{}{}'.format(key, int(value) if value > 1 else '') for key, value in comp.as_dict().items() ])
def get_qmpy_formation_energy(total_e, formula, n_atoms): """ Helper function to computer qmpy-compatible formation energy using reference energies extracted from OQMD Args: total_e (float): total energy formula (str): chemical formula n_atoms (int): number of atoms Returns: (float): qmpy-compatible formation energy """ composition = Composition(formula).fractional_composition energy = total_e / n_atoms for element, weight in composition.as_dict().items(): energy -= QMPY_REFERENCES[element] * weight if (element in QMPY_REFERENCES_HUBBARD) and ("O" in composition): energy -= QMPY_REFERENCES_HUBBARD[element] * weight return energy
def compute_corrections(self, exp_entries: list, calc_entries: dict) -> dict: """ Computes the corrections and fills in correction, corrections_std_error, and corrections_dict. Args: exp_entries: list of dictionary objects with the following keys/values: {"formula": chemical formula, "exp energy": formation energy in eV/formula unit, "uncertainty": uncertainty in formation energy} calc_entries: dictionary of computed entries, of the form {chemical formula: ComputedEntry} Raises: ValueError: calc_compounds is missing an entry """ self.exp_compounds = exp_entries self.calc_compounds = calc_entries self.names: List[str] = [] self.diffs: List[float] = [] self.coeff_mat: List[List[float]] = [] self.exp_uncer: List[float] = [] # remove any corrections in calc_compounds for entry in self.calc_compounds.values(): entry.correction = 0 for cmpd_info in self.exp_compounds: # to get consistent element ordering in formula name = Composition(cmpd_info["formula"]).reduced_formula allow = True compound = self.calc_compounds.get(name, None) if not compound: warnings.warn( "Compound {} is not found in provided computed entries and is excluded from the fit" .format(name)) continue # filter out compounds with large uncertainties relative_uncertainty = abs(cmpd_info["uncertainty"] / cmpd_info["exp energy"]) if relative_uncertainty > self.max_error: allow = False warnings.warn( "Compound {} is excluded from the fit due to high experimental uncertainty ({}%)" .format(name, relative_uncertainty)) # filter out compounds containing certain polyanions for anion in self.exclude_polyanions: if anion in name or anion in cmpd_info["formula"]: allow = False warnings.warn( "Compound {} contains the polyanion {} and is excluded from the fit" .format(name, anion)) break # filter out compounds that are unstable if isinstance(self.allow_unstable, float): try: eah = compound.data["e_above_hull"] except KeyError: raise ValueError("Missing e above hull data") if eah > self.allow_unstable: allow = False warnings.warn( "Compound {} is unstable and excluded from the fit (e_above_hull = {})" .format(name, eah)) if allow: comp = Composition(name) elems = list(comp.as_dict()) reactants = [] for elem in elems: try: elem_name = Composition(elem).reduced_formula reactants.append(self.calc_compounds[elem_name]) except KeyError: raise ValueError("Computed entries missing " + elem) rxn = ComputedReaction(reactants, [compound]) rxn.normalize_to(comp) energy = rxn.calculated_reaction_energy coeff = [] for specie in self.species: if specie == "oxide": if compound.data["oxide_type"] == "oxide": coeff.append(comp["O"]) self.oxides.append(name) else: coeff.append(0) elif specie == "peroxide": if compound.data["oxide_type"] == "peroxide": coeff.append(comp["O"]) self.peroxides.append(name) else: coeff.append(0) elif specie == "superoxide": if compound.data["oxide_type"] == "superoxide": coeff.append(comp["O"]) self.superoxides.append(name) else: coeff.append(0) elif specie == "S": if Element("S") in comp: sf_type = "sulfide" if compound.data.get("sulfide_type"): sf_type = compound.data["sulfide_type"] elif hasattr(compound, "structure"): sf_type = sulfide_type(compound.structure) if sf_type == "sulfide": coeff.append(comp["S"]) self.sulfides.append(name) else: coeff.append(0) else: coeff.append(0) else: try: coeff.append(comp[specie]) except ValueError: raise ValueError( "We can't detect this specie: {}".format( specie)) self.names.append(name) self.diffs.append( (cmpd_info["exp energy"] - energy) / comp.num_atoms) self.coeff_mat.append([i / comp.num_atoms for i in coeff]) self.exp_uncer.append( (cmpd_info["uncertainty"]) / comp.num_atoms) # for any exp entries with no uncertainty value, assign average uncertainty value sigma = np.array(self.exp_uncer) sigma[sigma == 0] = np.nan with warnings.catch_warnings(): warnings.simplefilter( "ignore", category=RuntimeWarning ) # numpy raises warning if the entire array is nan values mean_uncer = np.nanmean(sigma) sigma = np.where(np.isnan(sigma), mean_uncer, sigma) if np.isnan(mean_uncer): # no uncertainty values for any compounds, don't try to weight popt, self.pcov = curve_fit(_func, self.coeff_mat, self.diffs, p0=np.ones(len(self.species))) else: popt, self.pcov = curve_fit( _func, self.coeff_mat, self.diffs, p0=np.ones(len(self.species)), sigma=sigma, absolute_sigma=True, ) self.corrections = popt.tolist() self.corrections_std_error = np.sqrt(np.diag(self.pcov)).tolist() for i in range(len(self.species)): self.corrections_dict[self.species[i]] = ( round(self.corrections[i], 3), round(self.corrections_std_error[i], 4), ) return self.corrections_dict
#bandgap_str = formula+','+str(bandgap[0]) out = list(out) #print(out) df_avial["Eg"]=out df_avial.to_csv("roost_screening_result_exp1_208_m9.csv", index=False, header=True) df_bandgap = pd.read_csv('./roost_screening_result_exp2_127_m211.csv') """ 3. discard item whose band gap value < 6 """ df_filtered = df_bandgap[df_bandgap["Eg"] >= 6] num_atom_l = [] num_elements_l = [] for c in df_filtered["composition"]: c = Composition(c) num_elements = len(c.as_dict()) num_atom = c.num_atoms num_atom_l.append(num_atom) num_elements_l.append(num_elements) df_filtered["num_atom"] = num_atom_l df_filtered["# of elements"] = num_elements_l df_filtered = df_filtered[df_filtered["num_atom"] < 8] df_filtered = df_filtered[df_filtered["# of elements"] < 4] df_filtered.to_csv("roost_simple_candidate_exp1_M9.csv", index=False, header=True) print('success!')
def get_qmpy_formation_energy(total_e, formula, n_atoms): mus = { u'Ac': -4.1060035325, u'Ag': -2.8217729525, u'Al': -3.74573946, u'Ar': -0.00636995, u'As': -4.651918435, u'Au': -3.26680174, u'B': -6.67796758, u'Ba': -1.92352708, u'Be': -3.75520865, u'Bi': -4.038931855, u'Br': -1.31759562258416, u'C': -9.2170759925, u'Ca': -1.977817, u'Cd': -0.90043514, u'Ce': -4.7771708225, u'Cl': -1.47561368438088, u'Co': -7.089565, u'Cr': -9.50844998, u'Cs': -0.85462775, u'Cu': -3.7159594, u'Dy': -4.60150328333333, u'Er': -4.56334055, u'Eu': -1.8875732, u'F': -1.45692429086889, u'Fe': -8.3078978, u'Ga': -3.031846515, u'Gd': -4.6550712925, u'Ge': -4.623692585, u'H': -3.38063384781582, u'He': -0.004303435, u'Hf': -9.955368785, u'Hg': -0.358963825033731, u'Ho': -4.57679364666667, u'I': -1.35196205757168, u'In': -2.71993876, u'Ir': -8.8549203, u'K': -1.096699335, u'Kr': -0.004058825, u'La': -4.93543556, u'Li': -1.89660627, u'Lu': -4.524181525, u'Mg': -1.54251595083333, u'Mn': -9.0269032462069, u'Mo': -10.8480839, u'N': -8.11974103465649, u'Na': -1.19920373914835, u'Nb': -10.09391206, u'Nd': -4.762916335, u'Ne': -0.02931791, u'Ni': -5.56661952, u'Np': -12.94027372125, u'O': -4.52329546412125, u'Os': -11.22597601, u'P': -5.15856496104006, u'Pa': -9.49577589, u'Pb': -3.70396484, u'Pd': -5.17671826, u'Pm': -4.7452352875, u'Pr': -4.7748066125, u'Pt': -6.05575959, u'Pu': -14.29838348, u'Rb': -0.9630733, u'Re': -12.422818875, u'Rh': -7.26940476, u'Ru': -9.2019888, u'S': -3.83888286598664, u'Sb': -4.117563025, u'Sc': -6.328367185, u'Se': -3.48117276, u'Si': -5.424892535, u'Sm': -4.7147675825, u'Sn': -3.9140929231488, u'Sr': -1.6829138, u'Ta': -11.85252937, u'Tb': -5.28775675533333, u'Tc': -10.360747885, u'Te': -3.14184237666667, u'Th': -7.41301719, u'Ti': -7.69805778621374, u'Tl': -2.359420025, u'Tm': -4.47502416, u'U': -11.292348705, u'V': -8.94097896, u'W': -12.96020695, u'Xe': 0.00306349, u'Y': -6.464420635, u'Yb': -1.51277545, u'Zn': -1.2660268, u'Zr': -8.54717235 } hubbard_mus = { u'Co': 2.0736240219357, u'Cr': 2.79591214925926, u'Cu': 1.457571831687, u'Fe': 2.24490453841424, u'Mn': 2.08652912841877, u'Ni': 2.56766185643768, u'Np': 2.77764768949249, u'Pu': 2.2108747749433, u'Th': 1.06653674624248, u'U': 2.57513786752409, u'V': 2.67812162528461 } from pymatgen import Composition c = Composition(formula).fractional_composition e = total_e / n_atoms for k, v in c.as_dict().items(): e -= mus[k] * v if (k in hubbard_mus) and ('O' in c): e -= hubbard_mus[k] * v return e