def test_find_structure(self): # nosetests pymatgen/matproj/tests/test_matproj.py:MPResterTest.test_find_structure m = MPRester() ciffile = self.TEST_FILES_DIR / 'Fe3O4.cif' data = m.find_structure(str(ciffile)) self.assertTrue(len(data) > 1) s = CifParser(ciffile).get_structures()[0] data = m.find_structure(s) self.assertTrue(len(data) > 1)
def test_find_structure(self): # nosetests pymatgen/matproj/tests/test_matproj.py:MPResterTest.test_find_structure m = MPRester() ciffile = os.path.join(test_dir, 'Fe3O4.cif') data = m.find_structure(ciffile) self.assertTrue(len(data) > 1) s = CifParser(ciffile).get_structures()[0] data = m.find_structure(s) self.assertTrue(len(data) > 1)
def test(self, structure): failures = [] if self.is_valid: if not structure.is_valid(): failures.append("IS_VALID=False") if self.potcar_exists: elements = structure.composition.elements if set(elements).intersection(set(self.NO_POTCARS)): failures.append("POTCAR_EXISTS=False") if self.max_natoms: if structure.num_sites > self.max_natoms: failures.append("MAX_NATOMS=Exceeded") if self.is_ordered: if not structure.is_ordered: failures.append("IS_ORDERED=False") if self.not_in_MP: mpr = MPRester(self.MAPI_KEY) mpids = mpr.find_structure(structure) if mpids: if self.require_bandstructure: for mpid in mpids: try: bs = mpr.get_bandstructure_by_material_id(mpid) if bs: failures.append(f"NOT_IN_MP=False ({mpid})") except: pass else: failures.append("NOT_IN_MP=False ({})".format(mpids[0])) return True if not failures else False
class MaterialsEhullBuilder(AbstractBuilder): def __init__(self, materials_write, mapi_key=None, update_all=False): """ Starting with an existing materials collection, adds stability information and The Materials Project ID. Args: materials_write: mongodb collection for materials (write access needed) mapi_key: (str) Materials API key (if MAPI_KEY env. var. not set) update_all: (bool) - if true, updates all docs. If false, only updates docs w/o a stability key """ self._materials = materials_write self.mpr = MPRester(api_key=mapi_key) self.update_all = update_all def run(self): logger.info("MaterialsEhullBuilder starting...") self._build_indexes() q = {"thermo.energy": {"$exists": True}} if not self.update_all: q["stability"] = {"$exists": False} mats = [ m for m in self._materials.find( q, { "calc_settings": 1, "structure": 1, "thermo.energy": 1, "material_id": 1 }) ] pbar = tqdm(mats) for m in pbar: pbar.set_description("Processing materials_id: {}".format( m['material_id'])) try: params = {} for x in ["is_hubbard", "hubbards", "potcar_spec"]: params[x] = m["calc_settings"][x] structure = Structure.from_dict(m["structure"]) energy = m["thermo"]["energy"] my_entry = ComputedEntry(structure.composition, energy, parameters=params) # TODO: @computron This only calculates Ehull with respect to Materials Project. # It should also account for the current database's results. -computron self._materials.update_one({"material_id": m["material_id"]}, { "$set": { "stability": self.mpr.get_stability([my_entry])[0] } }) # TODO: @computron: also add additional properties like inverse hull energy? # TODO: @computron it's better to use PD tool or reaction energy calculator # Otherwise the compatibility schemes might have issues...one strategy might be # use MP only to retrieve entries but compute the PD locally -computron for el, elx in my_entry.composition.items(): entries = self.mpr.get_entries(el.symbol, compatible_only=True) min_e = min( entries, key=lambda x: x.energy_per_atom).energy_per_atom energy -= elx * min_e self._materials.update_one({"material_id": m["material_id"]}, { "$set": { "thermo.formation_energy_per_atom": energy / structure.num_sites } }) mpids = self.mpr.find_structure(structure) self._materials.update_one({"material_id": m["material_id"]}, {"$set": { "mpids": mpids }}) except: import traceback logger.exception("<---") logger.exception( "There was an error processing material_id: {}".format(m)) logger.exception(traceback.format_exc()) logger.exception("--->") logger.info("MaterialsEhullBuilder finished processing.") def reset(self): logger.info("Resetting MaterialsEhullBuilder") self._materials.update_many({}, {"$unset": {"stability": 1}}) self._build_indexes() logger.info("Finished resetting MaterialsEhullBuilder") def _build_indexes(self): self._materials.create_index("stability.e_above_hull") @classmethod def from_file(cls, db_file, m="materials", **kwargs): """ Get a MaterialsEhullBuilder using only a db file Args: db_file: (str) path to db file m: (str) name of "materials" collection **kwargs: other parameters to feed into the builder, e.g. mapi_key """ db_write = get_database(db_file, admin=True) return cls(db_write[m], **kwargs)
data_dict_list = [] for i_cnt, row_i in df_dft.iterrows(): data_dict_i = dict() # ##################################################### name_i = row_i.name stoich_i = row_i.stoich # ##################################################### data_dict_i["id"] = name_i data_dict_i["stoich"] = stoich_i atoms_i = row_i.atoms struct_i = AseAtomsAdaptor.get_structure(atoms_i) duplicates_tmp = MPR.find_structure(struct_i) tmp_list.append(duplicates_tmp) data_dict_i["mp_duplicates"] = duplicates_tmp data_dict_list.append(data_dict_i) # + df = pd.DataFrame(data_dict_list) df_mp_dupl = df[[ True if len(i) != 0 else False for i in df.mp_duplicates.tolist() ]] df_mp_dupl = df_mp_dupl.set_index("id") df_mp_dupl
def add_structure(self, source, name=None, identifier=None, fmt=None): """add a structure to the mpfile""" from pymatgen.core import Structure from pymatgen.ext.matproj import MPRester if isinstance(source, Structure): structure = source elif isinstance(source, dict): structure = Structure.from_dict(source) elif os.path.exists(source): structure = Structure.from_file(source, sort=True) elif isinstance(source, six.string_types): if fmt is None: raise ValueError("Need fmt to get structure from string!") structure = Structure.from_str(source, fmt, sort=True) else: raise ValueError(source, "not supported!") if name is not None: if not isinstance(name, six.string_types): raise ValueError("structure name needs to be a string") elif "." in name: raise ValueError("structure name cannot contain dots (.)") mpr = MPRester() if not mpr.api_key: raise ValueError( "API key not set. Run `pmg config --add PMG_MAPI_KEY <USER_API_KEY>`." ) matched_mpids = mpr.find_structure(structure) formula = get_composition_from_string(structure.composition.formula) if not matched_mpids: if identifier is None: identifier = formula print( "Structure not found in MP! Please submit via MPComplete to " "obtain mp-id or manually choose an anchor mp-id! Continuing " "with {} as identifier!".format(identifier)) else: print("Structure not found in MP! Forcing {} as identifier!". format(identifier)) elif identifier is None: identifier = matched_mpids[0] if len(matched_mpids) > 1: print("Multiple matching structures found in MP. Using", identifier) elif identifier not in matched_mpids: msg = "Structure does not match {} but instead {}!".format( identifier, matched_mpids) raise ValueError(msg) idx = len( self.document.get(identifier, {}).get(mp_level01_titles[3], {})) sub_key = formula if name is None else name if sub_key in self.document.get(identifier, {}).get(mp_level01_titles[3], {}): sub_key += "_{}".format(idx) self.document.rec_update( nest_dict(structure.as_dict(), [identifier, mp_level01_titles[3], sub_key])) return identifier