def fetch_materials_data(out_file): # properties of interest properties = [ "material_id", "icsd_ids", "unit_cell_formula", "pretty_formula", "spacegroup", "cif", "volume", "nsites", "elements", "band_gap" ] # MaterialsProject API settings my_API_key = "gxTAyXSm2GvCdWer" m = MPRester(api_key=my_API_key) # query data with calculated band structures mp_data = m.query( criteria={ # set filters }, properties=properties) data_origin = [] for entry in mp_data: plist = [] for _, val in entry.items(): plist.append(val) data_origin.append(plist) data_origin = pd.DataFrame(data_origin) data_origin.to_csv(out_file, sep=';', index=False, header=properties, mode='w')
def get_structures(apiKey, elements): """Gets structures from Materials Project database. Uses a Materials Project API key to query the database with a list of elements. Gets Cu K-alpha diffraction pattern, material id, and spacegroup info for each model. Parameters ---------- `apiKey` : str This is your API key from the MaterialsProject. `elements` : list of str Include a list of elements in the sample here. Returns ------- `models` : list of dicts Contains X-ray diffraction patterns, material ids, and space group information about the elements queried. """ mpr = MPRester(apiKey) numElements = len(elements) models = mpr.query(criteria={"elements": {"$all": [*elements]}, "nelements": numElements}, properties=['xrd.Cu', 'material_id', 'spacegroup']) return models
def import_materials(mp_ids, api_key=None): """ Given a list of material ids, returns a list of Material objects with all available properties from the Materials Project. Args: mp_ids (list<str>): list of material ids whose information will be retrieved. api_key (str): api key to be used to conduct the query. Returns: (list<Material>): list of material objects with associated data. """ mpr = MPRester(api_key) to_return = [] query = mpr.query(criteria={"task_id": { '$in': mp_ids }}, properties=AVAILABLE_MP_PROPERTIES) for data in query: # properties of one mp-id mat = Material() tag_string = data['task_id'] mat.add_property(Symbol('structure', data['structure'], [tag_string])) mat.add_property( Symbol('lattice_unit_cell', data['structure'].lattice.matrix, [tag_string])) for key in data: if not data[ key] is None and key in PROPNET_FROM_MP_NAME_MAPPING.keys( ): prop_type = DEFAULT_SYMBOL_TYPES[ PROPNET_FROM_MP_NAME_MAPPING[key]] p = Symbol(prop_type, data[key], [tag_string]) mat.add_property(p) to_return.append(mat) return to_return
def download(key): """ download(key) Downloads the dataset matching the optical property of interest. Inputs: key- The API key for downloading data from the Materials Project database. Outputs: 1- List of requested material properties to be trained on. """ api = key[0] m = MPRester(api) criteria = {"elements": {"$all": ["O"]}} for props in key[1:]: properties = ["structure", "%s" % props] print("\nFetching %s data ..." % props) result = m.query(criteria, properties) logging.info("Convert to dataframe ...") props_data = pd.DataFrame(result) logging.info("Pickle :)") props_data.to_pickle("%s_data.pkl" % props) return key[1:]
def get_ids(api_key="Di2IZMunaeR8vr9w", name_list=None): """ support_proprerity = ['energy', 'energy_per_atom', 'volume', 'formation_energy_per_atom', 'nsites', 'unit_cell_formula','pretty_formula', 'is_hubbard', 'elements', 'nelements', 'e_above_hull', 'hubbards', 'is_compatible', 'spacegroup', 'task_ids', 'band_gap', 'density', 'icsd_id', 'icsd_ids', 'cif', 'total_magnetization','material_id', 'oxide_type', 'tags', 'elasticity'] """ """ $gt >, $gte >=, $lt <, $lte <=, $ne !=, $in, $nin (not in), $or, $and, $not, $nor , $all """ m = MPRester(api_key) ids = m.query( criteria={ # 'pretty_formula': {"$in": name_list}, 'nelements': { "$lt": 3 }, # 'spacegroup.number': {"$in": [225]}, # 'nsites': {"$lt": 5}, # 'formation_energy_per_atom': {"$lt": 0}, # "elements": {"$in": ["Al", "Co", "Cr", "Cu", "Fe", 'Ni'], "$all": "O"}, # "elements": {"$in": list(combinations(["Al", "Co", "Cr", "Cu", "Fe", 'Ni'], 5))} }, properties=["material_id"]) print("number %s" % len(ids)) return ids
def mp_filter(criteria, return_elements=False): """Get a list of materials project mpids that match a set of given criteria. The criteria should be in the format used for a MP query. Args: criteria (dict): Criteria that can be used in an MPRester query return_elements (bool): Also return the elements for that MP entry """ m = MPRester(os.environ.get("MP_API_KEY")) if return_elements: properties = ['task_id', 'elements'] struc_filter = m.query(criteria, properties) return struc_filter else: properties = ['task_id'] struc_filter = m.query(criteria, properties) id_list = [i['task_id'] for i in struc_filter] return id_list
class MPDataRetrieval(BaseDataRetrieval): """ Retrieves data from the Materials Project database. """ def __init__(self, api_key=None): """ Args: api_key: (str) Your Materials Project API key, or None if you've set up your pymatgen config. """ self.mprester = MPRester(api_key=api_key) def api_link(self): return "https://materialsproject.org/wiki/index.php/The_Materials_API" def get_dataframe(self, criteria, properties, index_mpid=True, **kwargs): """ Gets data from MP in a dataframe format. See api_link for more details. Args: all arguments including criteria, properties and index_mpid are the same as in get_data Returns (pandas.Dataframe): """ data = self.get_data(criteria=criteria, properties=properties, index_mpid=index_mpid, **kwargs) df = pd.DataFrame(data, columns=properties) if index_mpid: df = df.set_index("material_id") return df def get_data(self, criteria, properties, mp_decode=False, index_mpid=True): """ Args: criteria: (str/dict) see MPRester.query() for a description of this parameter. String examples: "mp-1234", "Fe2O3", "Li-Fe-O', "\\*2O3". Dict example: {"band_gap": {"$gt": 1}} properties: (list) see MPRester.query() for a description of this parameter. Example: ["formula", "formation_energy_per_atom"] mp_decode: (bool) see MPRester.query() for a description of this parameter. Whether to decode to a Pymatgen object where possible. index_mpid: (bool) Whether to set the materials_id as the dataframe index. Returns ([dict]): a list of jsons that match the criteria and contain properties """ if index_mpid and "material_id" not in properties: properties.append("material_id") data = self.mprester.query(criteria, properties, mp_decode) return data
def from_mp(mp_id, api_key, crystal=None, lattice_type=None, outfile=None): """ This function retrieves the elastic tensor provided by the materials project database. Please provide an API key generated from https://materialsproject.org/dashboard Parameters ---------- mp_id : str material id in materials project database. e.g. ``mp_id = "mp-13"`` api_key : str API key generated by materials project data base. https://materialsproject.org/dashboard crystal : TYPE, optional DESCRIPTION. The default is None. lattice_type : TYPE, optional DESCRIPTION. The default is None. outfile : TYPE, optional DESCRIPTION. The default is None. Returns ------- None. """ try: from pymatgen import MPRester except: print("To use this functionality please install pymatgen") # welcome message printer.print_mechelastic() crystal_type = crystal mpr = MPRester(api_key) data = mpr.query( criteria={"material_id": mp_id}, properties=["formula", "elasticity", "structure"], )[0] elastic_tensor = array(data["elasticity"]["elastic_tensor_original"]) mp_structure = data["structure"] symbols = [x for x in mp_structure.symbol_set] positions = mp_structure.frac_coords lattice = array(mp_structure.lattice.matrix) structure = Structure(symbols, positions, lattice) # elastic constants calculation for 3D materials elastic_properties = ElasticProperties( elastic_tensor, structure, crystal_type,) elastic_properties.print_properties() # other # else: We don't need this # elastic_bulk.elastic_const_bulk( # cnew, snew, crystal, cell, density, natoms, totalmass # ) print("\nThanks! See you later. ") return
def getCompounds(apiKey): mpr = MPRester(apiKey) data = mpr.query({'elasticity': { '$exists': True }}, [ 'material_id', 'full_formula', 'elasticity', 'formation_energy_per_atom', 'band_gap', 'spacegroup', 'energy_per_atom' ]) return (data)
class MPDataRetrieval: """ MPDataRetrieval is used to retrieve data from the Materials Project database, print the results, and convert them into an indexed Pandas dataframe. """ def __init__(self, api_key=None): """ Args: api_key: (str) Your Materials Project API key, or None if you've set up your pymatgen config. """ self.mprester = MPRester(api_key=api_key) def get_dataframe(self, criteria, properties, mp_decode=False, index_mpid=True): """ Gets data from MP in a dataframe format. See API docs at https://materialsproject.org/wiki/index.php/The_Materials_API for more details. Args: criteria: (str/dict) see MPRester.query() for a description of this parameter. String examples: "mp-1234", "Fe2O3", "Li-Fe-O', "\\*2O3". Dict example: {"band_gap": {"$gt": 1}} properties: (list) see MPRester.query() for a description of this parameter. Example: ["formula", "formation_energy_per_atom"] mp_decode: (bool) see MPRester.query() for a description of this parameter. Whether to decode to a Pymatgen object where possible. index_mpid: (bool) Whether to set the materials_id as the dataframe index. Returns: A pandas Dataframe object """ if index_mpid and "material_id" not in properties: properties.append("material_id") data = self.mprester.query(criteria, properties, mp_decode) df = pd.DataFrame(data, columns=properties) if index_mpid: df = df.set_index("material_id") return df
def load_compounds(self, api_key, properties, filename=None): mpr = MPRester(api_key) print("Loading Compounds....") all_compounds_data = mpr.query({}, properties=properties) self.all_compounds = self.clean_and_check_data(all_compounds_data) if filename is not None: file = open(filename, 'wb') pickle.dump(all_compounds, file) file.close()
def fetch_materials_data(root_dir): # properties of interest properties = [ "material_id", "icsd_ids", "cif", "unit_cell_formula", "pretty_formula", "spacegroup", "crystal_system", "volume", "nsites", "elements", "nelements", "energy", "energy_per_atom", "formation_energy_per_atom", "e_above_hull", "band_gap", "elasticity", "density", "total_magnetization", "warnings", "tags" ] # MaterialsProject API settings my_API_key = "YOUR_MP_API_KEY" m = MPRester(api_key=my_API_key) # query all materials data query_all = m.query(criteria={}, properties=properties) MPdata_all = pd.DataFrame(entry.values() for entry in query_all) MPdata_all.columns = properties # write cif to file for _, irow in MPdata_all[["material_id", "cif"]].iterrows(): cif_file = os.path.join(root_dir, irow["material_id"] + ".cif") with open(cif_file, 'w') as f: f.write(irow["cif"]) MPdata_all = MPdata_all.drop(columns=["cif"]) # materials with calculated band structures query_band = m.query(criteria={"has": "bandstructure"}, properties=["material_id"]) band_filenames = [list(entry.values())[0] for entry in query_band] MPdata_all['has_band_structure'] = MPdata_all["material_id"].isin(band_filenames) # write properties to file out_file = os.path.join(root_dir, "MPdata_all.csv") MPdata_all.to_csv(out_file, sep=';', index=False, header=MPdata_all.columns, mode='w')
def fetch_materials_data(out_file): # properties of interest properties = [ "material_id", "icsd_ids", "unit_cell_formula", "pretty_formula", "spacegroup", "cif", "volume", "nsites", "elements", "nelements", "energy", "energy_per_atom", "formation_energy_per_atom", "e_above_hull", "band_gap", "density", "total_magnetization", "elasticity", "is_hubbard", "hubbards", "warnings", "tags", ] # MaterialsProject API settings my_API_key = "gxTAyXSm2GvCdWer" m = MPRester(api_key=my_API_key) # query data with calculated band structures mp_data = m.query(criteria={ "has": "bandstructure", }, properties=properties) data_origin = [] for entry in mp_data: plist = [] for _, val in entry.items(): plist.append(val) data_origin.append(plist) data_origin = pd.DataFrame(data_origin) data_origin.to_csv(out_file, sep=';', index=False, header=properties, mode='w')
def materials_from_formula(formula, api_key=None): """ Given a material chemical formula, returns all Material objects with a matching formula with all their available properties from the Materials Project. Args: formula (str): material's formula api_key (str): api key to be used to conduct the query. Returns: (list<Material>): all materials with matching formula """ mpr = MPRester(api_key) query_results = mpr.query(criteria={'pretty_formula': formula}, properties=['task_id']) mpids = [entry['task_id'] for entry in query_results] return import_materials(mpids, api_key)
def mp_filter(criteria, api_key=None): """Get a list of Materials Project task ids that match a set of given criteria. The criteria should be in the format used for a MP query. TODO DWD: Move to more appropriate place. This is not oxidation state specific. Args: criteria (dict): Criteria that can be used in an MPRester query api_key (str): Materials Project API key (from your MP dashboard) """ if not api_key: print('You need to supply an api key.') else: m = MPRester(os.environ.get("MP_API_KEY")) properties = ['task_id'] struc_filter = m.query(criteria, properties) id_list = [i['task_id'] for i in struc_filter] return id_list
def download_mp_data(form, spc_num, id, pbe, mbj, gw, path): mpr = MPRester('z9dAjdwO95SWgsUZ') data = mpr.query({'pretty_formula': form}, ['material_id', 'pretty_formula', 'spacegroup', 'band_gap', 'cif']) t = None for c in data: if c['pretty_formula'] == form and c['spacegroup']['number'] == spc_num: t = (c['material_id'], c['pretty_formula'], c['spacegroup']['number'], c['band_gap'], pbe, mbj, gw, str(id)) cif_file = open(path + '/' + c['material_id'] + '.cif', 'w') cif_file.write(c['cif']) if t is None: print(id, form, spc_num) return None else: return t
def RetrieveData(elements, properties, private_key): #Import the MPRester API library from pymatgen import MPRester #Specify private key for MP database mpr = MPRester(private_key) #Query MP-DB for all materials ids of binary alloys of transition metals. Returns a list of dictionaries. entries = mpr.query({ "elements": { "$in": elements }, "nelements": 2 }, properties) return entries
def do_query(args): m = MPRester() try: criteria = json.loads(args.criteria) except json.decoder.JSONDecodeError: criteria = args.criteria if args.structure: count = 0 for d in m.query(criteria, properties=["structure", "task_id"]): s = d["structure"] formula = re.sub("\s+", "", s.formula) if args.structure == "poscar": fname = "POSCAR.%s_%s" % (d["task_id"], formula) else: fname = "%s-%s.%s" % (d["task_id"], formula, args.structure) s.to(filename=fname) count += 1 print("%d structures written!" % count) elif args.entries: entries = m.get_entries(criteria) dumpfn(entries, args.entries) print("%d entries written to %s!" % (len(entries), args.entries)) else: props = ["e_above_hull", "spacegroup"] props += args.data entries = m.get_entries(criteria, property_data=props) t = [] headers = [ "mp-id", "Formula", "Spacegroup", "E/atom (eV)", "E above hull (eV)" ] + args.data for e in entries: row = [ e.entry_id, e.composition.reduced_formula, e.data["spacegroup"]["symbol"], e.energy_per_atom, e.data["e_above_hull"] ] row += [e.data[s] for s in args.data] t.append(row) t = sorted(t, key=lambda x: x[headers.index("E above hull (eV)")]) print(tabulate(t, headers=headers, tablefmt="pipe", floatfmt=".3f"))
def get_materials_list(): """Fetch data (from local cache if available).""" try: _log.info('Trying data cache for materials') with open('materials_list.pickle') as f: return pickle.load(f) except IOError: _log.info('Fetching remote data') m = MPRester() materials_list = m.query( criteria={"elasticity": {"$exists": True}}, properties=['pretty_formula', 'reduced_cell_formula', 'task_id', "elasticity.K_VRH", "elasticity.K_VRH", 'volume', 'density', 'formation_energy_per_atom', 'nsites']) # Save for later with open('materials_list.pickle', 'w') as f: pickle.dump(materials_list, f) _log.info('Data loaded') return materials_list
def do_query(args): m = MPRester() try: criteria = json.loads(args.criteria) except json.decoder.JSONDecodeError: criteria = args.criteria if args.structure: count = 0 for d in m.query(criteria, properties=["structure", "task_id"]): s = d["structure"] formula = re.sub("\s+", "", s.formula) if args.structure == "poscar": fname = "POSCAR.%s_%s" % (d["task_id"], formula) else: fname = "%s-%s.%s" % (d["task_id"], formula, args.structure) s.to(filename=fname) count += 1 print("%d structures written!" % count) elif args.entries: entries = m.get_entries(criteria) dumpfn(entries, args.entries) print("%d entries written to %s!" % (len(entries), args.entries)) else: props = ["e_above_hull", "spacegroup"] props += args.data entries = m.get_entries(criteria, property_data=props) t = [] headers = ["mp-id", "Formula", "Spacegroup", "E/atom (eV)", "E above hull (eV)"] + args.data for e in entries: row = [e.entry_id, e.composition.reduced_formula, e.data["spacegroup"]["symbol"], e.energy_per_atom, e.data["e_above_hull"]] row += [e.data[s] for s in args.data] t.append(row) t = sorted(t, key=lambda x: x[headers.index("E above hull (eV)")]) print(tabulate(t, headers=headers, tablefmt="pipe", floatfmt=".3f"))
def run(mpfile, **kwargs): from pymatgen import MPRester, Composition import pandas as pd input_file = mpfile.document['_hdata'].pop('input_file') file_path = os.path.join(os.environ['HOME'], 'work', input_file) if not os.path.exists(file_path): return 'Please upload', file_path df_dct = pd.read_excel(file_path) columns_units = [ ('A-Site', ''), ('B-Site', ''), ('a', 'Å'), ('Eᶠ|ABO₃', 'eV'), ('Eᶠ|Yᴮ', 'eV'), ('Eᶠ|Vᴼ', 'eV'), ('Eᶠ|Hᵢ', 'eV'), ('ΔEᵢ|Yᴮ-Hᵢ', 'eV') ] columns = df_dct.columns mpr = MPRester(endpoint="http://materialsproject.org:8080/rest/v2") for row_idx, row in df_dct.iterrows(): formula = '{}{}O3'.format(row[columns[0]], row[columns[1]]) comp = Composition(formula) crit = {"reduced_cell_formula": comp.to_reduced_dict, "nsites": 5} docs = mpr.query(criteria=crit, properties=["task_id", "volume"]) if len(docs) > 1: volume = row[columns[2]]**3 volumes = pd.np.array([r['volume'] for r in docs]) idx = pd.np.abs(volumes-volume).argmin() identifier = docs[idx]['task_id'] continue elif not docs: print formula, 'not found on MP' continue else: identifier = docs[0]['task_id'] print formula, '->', identifier d = RecursiveDict() for col, (key, unit) in zip(columns, columns_units): d[key] = clean_value(row[col], unit) mpfile.add_hierarchical_data(nest_dict(d, ['data']), identifier=identifier)
'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu' ] # search_key in material project, including elasticity, piezo search_key = 'elasticity' data = mpr.query(criteria={ 'elements': { '$in': element_list }, 'has_bandstructure': True, search_key: { '$exists': True }, }, properties=[ 'material_id', 'pretty_formula', 'nelements', 'nsites', 'is_hubbard', 'is_compatible', 'volume', 'density', 'energy_per_atom', 'formation_energy_per_atom', 'structure', search_key ]) if search_key == 'elasticity': new_file = open('./training/' + search_key + '/' + search_key + '.csv', 'w', encoding='utf-8') csv_writer = csv.writer(new_file) new_file_warnings = open( './training/elasticity/elasticity_warnings.csv',
def run(mpfile, hosts=None, download=False): mpr = MPRester() fpath = f"{project}.xlsx" if download or not os.path.exists(fpath): figshare_id = 1546772 url = "https://api.figshare.com/v2/articles/{}".format(figshare_id) print("get figshare article {}".format(figshare_id)) r = requests.get(url) figshare = json.loads(r.content) print("version =", figshare["version"]) # TODO set manually in "other"? print("read excel from figshare into DataFrame") df_dct = None for d in figshare["files"]: if "xlsx" in d["name"]: # Dict of DataFrames is returned, with keys representing sheets df_dct = read_excel(d["download_url"], sheet_name=None) break if df_dct is None: print("no excel sheet found on figshare") return print("save excel to disk") writer = ExcelWriter(fpath) for sheet, df in df_dct.items(): df.to_excel(writer, sheet) writer.save() else: df_dct = read_excel(fpath, sheet_name=None) print(len(df_dct), "sheets loaded.") print("looping hosts ...") host_info = df_dct["Host Information"] host_info.set_index(host_info.columns[0], inplace=True) host_info.dropna(inplace=True) for idx, host in enumerate(host_info): if hosts is not None: if isinstance(hosts, int) and idx + 1 > hosts: break elif isinstance(hosts, list) and not host in hosts: continue print("get mp-id for {}".format(host)) mpid = None for doc in mpr.query(criteria={"pretty_formula": host}, properties={"task_id": 1}): if "decomposes_to" not in doc["sbxd"][0]: mpid = doc["task_id"] break if mpid is None: print("mp-id for {} not found".format(host)) continue print("add host info for {}".format(mpid)) hdata = host_info[host].to_dict(into=RecursiveDict) for k in list(hdata.keys()): v = hdata.pop(k) ks = k.split() if ks[0] not in hdata: hdata[ks[0]] = RecursiveDict() unit = ks[-1][1:-1] if ks[-1].startswith("[") else "" subkey = "_".join(ks[1:-1] if unit else ks[1:]).split(",")[0] if subkey == "lattice_constant": unit = "Å" try: hdata[ks[0]][subkey] = clean_value( v, unit.replace("angstrom", "Å")) except ValueError: hdata[ks[0]][subkey] = v hdata["formula"] = host df = df_dct["{}-X".format(host)] rows = list(isnull(df).any(1).nonzero()[0]) if rows: cells = df.iloc[rows].dropna(how="all").dropna( axis=1)[df.columns[0]] note = cells.iloc[0].replace("following", cells.iloc[1])[:-1] hdata["note"] = note df.drop(rows, inplace=True) mpfile.add_hierarchical_data(nest_dict(hdata, ["data"]), identifier=mpid) print("add table for D₀/Q data for {}".format(mpid)) df.set_index(df["Solute element number"], inplace=True) df.drop("Solute element number", axis=1, inplace=True) df.columns = df.iloc[0] df.index.name = "index" df.drop("Solute element name", inplace=True) df = df.T.reset_index() if str(host) == "Fe": df_D0_Q = df[[ "Solute element name", "Solute D0, paramagnetic [cm^2/s]", "Solute Q, paramagnetic [eV]", ]] elif hdata["Host"]["crystal_structure"] == "HCP": df_D0_Q = df[[ "Solute element name", "Solute D0 basal [cm^2/s]", "Solute Q basal [eV]", ]] else: df_D0_Q = df[[ "Solute element name", "Solute D0 [cm^2/s]", "Solute Q [eV]" ]] df_D0_Q.columns = ["Solute", "D₀ [cm²/s]", "Q [eV]"] anums = [z[el] for el in df_D0_Q["Solute"]] df_D0_Q.insert(0, "Z", Series(anums, index=df_D0_Q.index)) df_D0_Q.sort_values("Z", inplace=True) df_D0_Q.reset_index(drop=True, inplace=True) mpfile.add_data_table(mpid, df_D0_Q, "D₀_Q") if hdata["Host"]["crystal_structure"] == "BCC": print("add table for hop activation barriers for {} (BCC)".format( mpid)) columns_E = ([ "Hop activation barrier, E_{} [eV]".format(i) for i in range(2, 5) ] + [ "Hop activation barrier, E'_{} [eV]".format(i) for i in range(3, 5) ] + [ "Hop activation barrier, E''_{} [eV]".format(i) for i in range(3, 5) ] + [ "Hop activation barrier, E_{} [eV]".format(i) for i in range(5, 7) ]) df_E = df[["Solute element name"] + columns_E] df_E.columns = (["Solute"] + ["E{} [eV]".format(i) for i in ["₂", "₃", "₄"]] + ["E`{} [eV]".format(i) for i in ["₃", "₄"]] + ["E``{} [eV]".format(i) for i in ["₃", "₄"]] + ["E{} [eV]".format(i) for i in ["₅", "₆"]]) mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") print("add table for hop attempt frequencies for {} (BCC)".format( mpid)) columns_v = ([ "Hop attempt frequency, v_{} [THz]".format(i) for i in range(2, 5) ] + [ "Hop attempt frequency, v'_{} [THz]".format(i) for i in range(3, 5) ] + [ "Hop attempt frequency, v''_{} [THz]".format(i) for i in range(3, 5) ] + [ "Hop attempt frequency, v_{} [THz]".format(i) for i in range(5, 7) ]) df_v = df[["Solute element name"] + columns_v] df_v.columns = (["Solute"] + ["v{} [THz]".format(i) for i in ["₂", "₃", "₄"]] + ["v`{} [THz]".format(i) for i in ["₃", "₄"]] + ["v``{} [THz]".format(i) for i in ["₃", "₄"]] + ["v{} [THz]".format(i) for i in ["₅", "₆"]]) mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") elif hdata["Host"]["crystal_structure"] == "FCC": print("add table for hop activation barriers for {} (FCC)".format( mpid)) columns_E = [ "Hop activation barrier, E_{} [eV]".format(i) for i in range(5) ] df_E = df[["Solute element name"] + columns_E] df_E.columns = ["Solute"] + [ "E{} [eV]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"] ] mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") print("add table for hop attempt frequencies for {} (FCC)".format( mpid)) columns_v = [ "Hop attempt frequency, v_{} [THz]".format(i) for i in range(5) ] df_v = df[["Solute element name"] + columns_v] df_v.columns = ["Solute"] + [ "v{} [THz]".format(i) for i in ["₀", "₁", "₂", "₃", "₄"] ] mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") elif hdata["Host"]["crystal_structure"] == "HCP": print("add table for hop activation barriers for {} (HCP)".format( mpid)) columns_E = [ "Hop activation barrier, E_X [eV]", "Hop activation barrier, E'_X [eV]", "Hop activation barrier, E_a [eV]", "Hop activation barrier, E'_a [eV]", "Hop activation barrier, E_b [eV]", "Hop activation barrier, E'_b [eV]", "Hop activation barrier, E_c [eV]", "Hop activation barrier, E'_c [eV]", ] df_E = df[["Solute element name"] + columns_E] df_E.columns = ["Solute"] + [ "Eₓ [eV]", "E`ₓ [eV]", "Eₐ [eV]", "E`ₐ [eV]", "E_b [eV]", "E`_b [eV]", "Eꪱ [eV]", "E`ꪱ [eV]", ] mpfile.add_data_table(mpid, df_E, "hop_activation_barriers") print("add table for hop attempt frequencies for {} (HCP)".format( mpid)) columns_v = ["Hop attempt frequency, v_a [THz]" ] + ["Hop attempt frequency, v_X [THz]"] df_v = df[["Solute element name"] + columns_v] df_v.columns = ["Solute"] + ["vₐ [THz]"] + ["vₓ [THz]"] mpfile.add_data_table(mpid, df_v, "hop_attempt_frequencies") print("DONE")
def cif_lib_build(self, crystal_system, size_limit=None): ''' function to build cif and pdf library based on space group symbol Parameters ---------- crystal_system: str name of crystal system. It capitalized, like CUBIC. space group symbol will be generated by get_symbol_list method size_list : int optional. Uppder limit of data pulled out per symbol ''' self.crystal_system = crystal_system space_group_symbol = self.get_symbol_list(crystal_system) if isinstance(space_group_symbol, list): space_group_symbol_set = space_group_symbol else: space_group_symbol_set = list(spac_group_symbol) ## changing dir data_dir = os.path.join(self.working_dir, crystal_system) self.data_dir = data_dir self._makedirs(data_dir) os.chdir(data_dir) if os.getcwd() == data_dir: print('Library will be built at %s' % data_dir) else: e = 'Werid, return' raise RuntimeError(e) # summary lists missed_list = [] # reference m_id_list = [] # reference for searchs have been done in the past time_info = time.strftime('%Y-%m-%d') # create dirs, cif and calculated dir cif_dir = os.path.join(data_dir, 'cif_data') self._makedirs(cif_dir) self.cif_dir = cif_dir # looping for space_group_symbol in space_group_symbol_set: print('Building library with space_group symbol: {}'.format(space_group_symbol)) ## search query m = MPRester(self.API_key) search = m.query(criteria = {"spacegroup.symbol": space_group_symbol}, properties = ["material_id"]) if search: ## crazy looping if size_limit: dim = 400 # 400 data sets per symbol else: dim = len(search) print('Pull out %s data sets' % dim) print('Now, starts to save cif and compute pdf...') for i in range(dim): # part 1: grab cif files from data base m_id = search[i]['material_id'] m_id_list.append(m_id) m_struc = m.get_structure_by_material_id(m_id) m_formula = m_struc.formula m_name = m_formula.replace(' ', '') # material name cif_w = CifWriter(m_struc) cif_name = '{}_{}.cif'.format(space_group_symbol, m_name) cif_w_name = os.path.join(cif_dir, cif_name) if os.path.isfile(cif_w_name): print('already have {}, skip'.format(cif_name)) pass # skip files already exist else: cif_w.write_file(cif_w_name) print('{} has been saved'.format(cif_name)) else: print('Hmm, no reasult. Something wrong') missed_list.append(space_group_symbol) pass m_id_list_name = '{}_{}_material_id.txt'.format(crystal_system, time_info) m_id_list_w_name = os.path.join(data_dir, m_id_list_name) np.savetxt(m_id_list_w_name, m_id_list) print('''SUMMARY: for {} cystsal sytem, Symbols {} can't be found from data base'''.format(crystal_system, missed_list)) return cif_dir
from pymatgen import MPRester mpr = MPRester("78OAi0lR9kdkyiAi") mpids = ['mp-1021516', 'mp-9580'] # replace with your list of missing material ids results = mpr.query({"task_ids": {"$in": mpids}}, ["task_id"]) assert len(results) == len(mpids)
elif not job['is_ordered']: print 'REJECTED WORKFLOW FOR {} - invalid structure (disordered)'.format( snl.structure.formula) else: return True return False if __name__ == '__main__': create_mincoll() mp_unique_comps = set() pf_unique_comps = set() coll = db['pauling_file_mpmin'] new_coll = db['pf_to_mp'] new_coll.drop() mp_comps = mpr.query(criteria={}, properties=["snl_final.reduced_cell_formula_abc"]) print 'Total number of MP comps = {}'.format(len(mp_comps)) for mp_comp in mp_comps: mp_unique_comps.add(mp_comp["snl_final.reduced_cell_formula_abc"]) print 'Number of unique MP comps = {}'.format(len(mp_unique_comps)) x = 0 for doc in coll.find().batch_size(75): x += 1 if x % 1000 == 0: print x # if x > 1: # break pf_unique_comps.add(doc['metadata']['_structure']['reduced_cell_formula_abc']) if doc['metadata']['_structure']['reduced_cell_formula_abc'] not in mp_unique_comps: new_coll.insert(doc_to_snl(doc).as_dict()) # with open('PaulingFile_example.json', 'w') as outfile:
from pymatgen import MPRester from pprint import pprint import os JINNYAPI = os.environ['materials_project'] m = MPRester(JINNYAPI) data = m.query({'piezo': { '$exists': True }}, ['material_id', 'pretty_formula', 'piezo']) pprint(data)
data = m.query(criteria={ "spacegroup.number": { "$in": [ 75, 76, 77, 78, 79, 80, 81, 82, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 168, 169, 170, 171, 172, 173, 174, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 195, 196, 197, 198, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220 ] }, "band_gap": 0, "magnetic_type": { "$in": ["FM", "AFM", "FiM"] }, "nelements": { "$lte": 3 }, "elements": { "$in": SPT1 }, "elements": { "$nin": Not_SPT1 } }, properties=[ "pretty_formula", "material_id", "formation_energy_per_atom", "unit_cell_formula", "spacegroup", "structure", "magnetic_type", "total_magnetization" ], chunk_size=0)
def test(): from pymatgen import MPRester, Composition m = MPRester('DhmFQPuibZo8JtXn') results = m.query("**O", ['density', 'task_id', 'structure']) print(len(results))
from pymatgen.io.zeoone import get_voronoi_nodes, get_void_volume_surfarea, get_high_accuracy_voronoi_nodes try: from zeo.netstorage import AtomNetwork, VoronoiNetwork from zeo.area_volume import volume, surface_area from zeo.cluster import get_nearest_largest_diameter_highaccuracy_vornode,\ generate_simplified_highaccuracy_voronoi_network, \ prune_voronoi_network_close_node zeo_found = True except ImportError: zeo_found = False m = MPRester(key) #def get_POSCAR(elements, interstitial, supercell_size, ): results = m.query("Fe", ['structure']) print(type(results[2]['structure'])) #Mg_cell = mg.Lattice.hexagonal(3.184, 5.249) #print(Mg_cell.lengths_and_angles) #Mg_Lattice = mg.Structure(Mg_cell, ["Mg","Mg"], [[.333333333,.66666666666,.25], [.66666666666,.33333333333333,.75]]) print(results[2]['structure']) Mg_Lattice=results[2]['structure'] #Mg_Lattice = results[0] Mg_Interstitial = point_defects.Interstitial(Mg_Lattice, {u"Fe":0}, {u"Fe":1.26}, 'voronoi_vertex',accuracy=u'Normal',symmetry_flag=True,oxi_state=False) print(Mg_Interstitial.enumerate_defectsites()) print(Mg_Interstitial.defectsite_count())
def get_avg_Z_num(material, elem_list): Z_list = [] for element in elem_list: Z_list += [log(Element(element).Z)] return np.average(Z_list) def get_sd_X(material, elem_list): X_list = [] for element in elem_list: X_list += [log(Element(element).X)] return np.std(X_list) key = os.environ['MAPI_KEY'] m = MPRester(key) materials_list = m.query(criteria={"elasticity": {"$exists": True}}, properties=['pretty_formula', 'reduced_cell_formula', "elasticity.K_VRH", 'volume', 'density', 'formation_energy_per_atom', 'formation_energy_per_atom', 'nsites']) def vectorize_and_catalog(materials): vector_list = [] catalog = {} for material in materials: element_list = element_lister(material) vector = [get_ln_volume(material, element_list), get_c_energy_per_atom(material, element_list), get_avg_Z_num(material, element_list), get_sd_X(material, element_list)] vector_list += [vector] catalog[tuple(vector)] = material return vector_list, catalog def vector_to_material(vector): return catalog[tuple(vector)]
from __future__ import division import numpy as np import matplotlib.pyplot as plt from pymatgen import MPRester # import/truncate raw data (volume, density, energies, etc) m = MPRester("ID") data = m.query(criteria={"elasticity": {"$exists": True}}, properties=["pretty_formula", "volume", "K_VRH"]) # for now, import parameters and also bulk modulus (known), in future we will use parameters to select bulk moduli to be calculated # subfunction for formatting # define desired objectives and bounds/tolerances (GPa) tolcost = 1e-5 kdes = 205 # define tuning/mating parameters theta = 1/2 w1 = 1 # create objective function cost = lambda k: w1*((kdes-k)/kdes)**2 # randomly sample data and calculate costs
class MPDataRetrieval(BaseDataRetrieval): """ Retrieves data from the Materials Project database. If you use this data retrieval class, please additionally cite: Ong, S.P., Cholia, S., Jain, A., Brafman, M., Gunter, D., Ceder, G., Persson, K.A., 2015. The Materials Application Programming Interface (API): A simple, flexible and efficient API for materials data based on REpresentational State Transfer (REST) principles. Computational Materials Science 97, 209–215. https://doi.org/10.1016/j.commatsci.2014.10.037 """ def __init__(self, api_key=None): """ Args: api_key: (str) Your Materials Project API key, or None if you've set up your pymatgen config. """ self.mprester = MPRester(api_key=api_key) def api_link(self): return "https://materialsproject.org/wiki/index.php/The_Materials_API" def get_dataframe(self, criteria, properties, index_mpid=True, **kwargs): """ Gets data from MP in a dataframe format. See api_link for more details. Args: criteria (dict): the same as in get_data properties ([str]): the same properties supported as in get_data plus: "structure", "initial_structure", "final_structure", "bandstructure" (line mode), "bandstructure_uniform", "phonon_bandstructure", "phonon_ddb", "phonon_bandstructure", "phonon_dos". Note that for a long list of compounds, it may take a long time to retrieve some of these objects. index_mpid (bool): the same as in get_data kwargs (dict): the same keyword arguments as in get_data Returns (pandas.Dataframe): """ data = self.get_data(criteria=criteria, properties=properties, index_mpid=index_mpid, **kwargs) df = pd.DataFrame(data, columns=properties) for prop in ["dos", "phonon_dos", "phonon_bandstructure", "phonon_ddb"]: if prop in properties: df[prop] = self.try_get_prop_by_material_id( prop=prop, material_id_list=df["material_id"].values) if "bandstructure" in properties: df["bandstructure"] = self.try_get_prop_by_material_id( prop="bandstructure", material_id_list=df["material_id"].values, line_mode=True) if "bandstructure_uniform" in properties: df["bandstructure_uniform"] = self.try_get_prop_by_material_id( prop="bandstructure", material_id_list=df["material_id"].values, line_mode=False) if index_mpid: df = df.set_index("material_id") return df def get_data(self, criteria, properties, mp_decode=False, index_mpid=True): """ Args: criteria: (str/dict) see MPRester.query() for a description of this parameter. String examples: "mp-1234", "Fe2O3", "Li-Fe-O', "\\*2O3". Dict example: {"band_gap": {"$gt": 1}} properties: (list) see MPRester.query() for a description of this parameter. Example: ["formula", "formation_energy_per_atom"] mp_decode: (bool) see MPRester.query() for a description of this parameter. Whether to decode to a Pymatgen object where possible. index_mpid: (bool) Whether to set the materials_id as the dataframe index. Returns ([dict]): a list of jsons that match the criteria and contain properties """ if index_mpid and "material_id" not in properties: properties.append("material_id") data = self.mprester.query(criteria, properties, mp_decode) return data def try_get_prop_by_material_id(self, prop, material_id_list, **kwargs): """ Call the relevant get_prop_by_material_id. "prop" is a property such as bandstructure that is not readily available in supported properties of the get_data function but via the get_bandstructure_by_material_id method for example. Args: prop (str): the name of the property. Options are: "bandstructure", "dos", "phonon_dos", "phonon_bandstructure", "phonon_ddb" material_id_list ([str]): list of material_id of compounds kwargs (dict): other keyword arguments that get_*_by_material_id may have; e.g. line_mode in get_bandstructure_by_material_id Returns ([target prop object or NaN]): If the target property is not available for a certain material_id, NaN is returned. """ method = getattr(self.mprester, "get_{}_by_material_id".format(prop)) props = [] for material_id in material_id_list: try: props.append(method(material_id=material_id, **kwargs)) except MPRestError: props.append(float('NaN')) return props
def run(mpfile, hosts=None, download=False, **kwargs): #mpfile.unique_mp_cat_ids = False from pymatgen import MPRester mpr = MPRester() fpath = os.path.join(os.environ['HOME'], 'work', 'dilute_solute_diffusion.xlsx') if download or not os.path.exists(fpath): figshare_id = mpfile.hdata.general['info']['figshare_id'] url = 'https://api.figshare.com/v2/articles/{}'.format(figshare_id) print 'get figshare article {}'.format(figshare_id) r = requests.get(url) figshare = json.loads(r.content) mpfile.document['_hdata']['version'] = figshare['version'] print 'read excel from figshare into DataFrame' df_dct = None for d in figshare['files']: if 'xlsx' in d['name']: # Dict of DataFrames is returned, with keys representing sheets df_dct = read_excel(d['download_url'], sheet_name=None) break if df_dct is None: print 'no excel sheet found on figshare' return print 'save excel to disk' writer = ExcelWriter(fpath) for sheet, df in df_dct.items(): df.to_excel(writer, sheet) writer.save() else: df_dct = read_excel(fpath, sheet_name=None) print len(df_dct), 'sheets loaded.' print 'looping hosts ...' host_info = df_dct['Host Information'] host_info.set_index(host_info.columns[0], inplace=True) host_info.dropna(inplace=True) for idx, host in enumerate(host_info): if hosts is not None: if isinstance(hosts, int) and idx+1 > hosts: break elif isinstance(hosts, list) and not host in hosts: continue print 'get mp-id for {}'.format(host) mpid = None for doc in mpr.query( criteria={'pretty_formula': host}, properties={'task_id': 1} ): if doc['sbxd'][0]['decomposes_to'] is None: mpid = doc['task_id'] break if mpid is None: print 'mp-id for {} not found'.format(host) continue print 'add host info for {}'.format(mpid) hdata = host_info[host].to_dict(into=RecursiveDict) for k in hdata.keys(): v = hdata.pop(k) ks = k.split() if ks[0] not in hdata: hdata[ks[0]] = RecursiveDict() unit = ks[-1][1:-1] if ks[-1].startswith('[') else '' subkey = '_'.join(ks[1:-1] if unit else ks[1:]).split(',')[0] if subkey == "lattice_constant": unit = u'Å' try: hdata[ks[0]][subkey] = clean_value(v, unit.replace('angstrom', u'Å')) except ValueError: hdata[ks[0]][subkey] = v hdata['formula'] = host df = df_dct['{}-X'.format(host)] rows = list(isnull(df).any(1).nonzero()[0]) if rows: cells = df.ix[rows].dropna(how='all').dropna(axis=1)[df.columns[0]] note = cells.iloc[0].replace('following', cells.iloc[1])[:-1] hdata['note'] = note df.drop(rows, inplace=True) mpfile.add_hierarchical_data(nest_dict(hdata, ['data']), identifier=mpid) print 'add table for D₀/Q data for {}'.format(mpid) df.set_index(df['Solute element number'], inplace=True) df.drop('Solute element number', axis=1, inplace=True) df.columns = df.ix[0] df.index.name = 'index' df.drop('Solute element name', inplace=True) df = df.T.reset_index() if str(host) == 'Fe': df_D0_Q = df[[ 'Solute element name', 'Solute D0, paramagnetic [cm^2/s]', 'Solute Q, paramagnetic [eV]' ]] elif hdata['Host']['crystal_structure'] == 'HCP': df_D0_Q = df[['Solute element name', 'Solute D0 basal [cm^2/s]', 'Solute Q basal [eV]']] else: df_D0_Q = df[['Solute element name', 'Solute D0 [cm^2/s]', 'Solute Q [eV]']] df_D0_Q.columns = ['El.', 'D₀ [cm²/s]', 'Q [eV]'] mpfile.add_data_table(mpid, df_D0_Q, 'D₀_Q') if hdata['Host']['crystal_structure'] == 'BCC': print 'add table for hop activation barriers for {} (BCC)'.format(mpid) columns_E = [ 'Hop activation barrier, E_{} [eV]'.format(i) for i in range(2,5) ] + [ "Hop activation barrier, E'_{} [eV]".format(i) for i in range(3,5) ] + [ "Hop activation barrier, E''_{} [eV]".format(i) for i in range(3,5) ] + [ 'Hop activation barrier, E_{} [eV]'.format(i) for i in range(5,7) ] df_E = df[['Solute element name'] + columns_E] df_E.columns = ['El.'] + [ 'E{} [eV]'.format(i) for i in ['₂', '₃', '₄'] ] + [ 'E`{} [eV]'.format(i) for i in ['₃', '₄'] ] + [ 'E``{} [eV]'.format(i) for i in ['₃', '₄'] ] + [ 'E{} [eV]'.format(i) for i in ['₅', '₆'] ] mpfile.add_data_table(mpid, df_E, 'hop_activation_barriers') print 'add table for hop attempt frequencies for {} (BCC)'.format(mpid) columns_v = [ 'Hop attempt frequency, v_{} [THz]'.format(i) for i in range(2,5) ] + [ "Hop attempt frequency, v'_{} [THz]".format(i) for i in range(3,5) ] + [ "Hop attempt frequency, v''_{} [THz]".format(i) for i in range(3,5) ] + [ 'Hop attempt frequency, v_{} [THz]'.format(i) for i in range(5,7) ] df_v = df[['Solute element name'] + columns_v] df_v.columns = ['El.'] + [ 'v{} [THz]'.format(i) for i in ['₂', '₃', '₄'] ] + [ 'v``{} [THz]'.format(i) for i in ['₃', '₄'] ] + [ 'v``{} [THz]'.format(i) for i in ['₃', '₄'] ] + [ 'v{} [THz]'.format(i) for i in ['₅', '₆'] ] mpfile.add_data_table(mpid, df_v, 'hop_attempt_frequencies') elif hdata['Host']['crystal_structure'] == 'FCC': print 'add table for hop activation barriers for {} (FCC)'.format(mpid) columns_E = ['Hop activation barrier, E_{} [eV]'.format(i) for i in range(5)] df_E = df[['Solute element name'] + columns_E] df_E.columns = ['El.'] + ['E{} [eV]'.format(i) for i in ['₀', '₁', '₂', '₃', '₄']] mpfile.add_data_table(mpid, df_E, 'hop_activation_barriers') print 'add table for hop attempt frequencies for {} (FCC)'.format(mpid) columns_v = ['Hop attempt frequency, v_{} [THz]'.format(i) for i in range(5)] df_v = df[['Solute element name'] + columns_v] df_v.columns = ['El.'] + ['v{} [THz]'.format(i) for i in ['₀', '₁', '₂', '₃', '₄']] mpfile.add_data_table(mpid, df_v, 'hop_attempt_frequencies') elif hdata['Host']['crystal_structure'] == 'HCP': print 'add table for hop activation barriers for {} (HCP)'.format(mpid) columns_E = [ "Hop activation barrier, E_X [eV]", "Hop activation barrier, E'_X [eV]", "Hop activation barrier, E_a [eV]", "Hop activation barrier, E'_a [eV]", "Hop activation barrier, E_b [eV]", "Hop activation barrier, E'_b [eV]", "Hop activation barrier, E_c [eV]", "Hop activation barrier, E'_c [eV]" ] df_E = df[['Solute element name'] + columns_E] df_E.columns = ['El.'] + [ 'Eₓ [eV]', 'E`ₓ [eV]', 'Eₐ [eV]', 'E`ₐ [eV]', 'E_b [eV]', 'E`_b [eV]', 'Eꪱ [eV]', 'E`ꪱ [eV]' ] mpfile.add_data_table(mpid, df_E, 'hop_activation_barriers') print 'add table for hop attempt frequencies for {} (HCP)'.format(mpid) columns_v = ['Hop attempt frequency, v_a [THz]'] + ['Hop attempt frequency, v_X [THz]'] df_v = df[['Solute element name'] + columns_v] df_v.columns = ['El.'] + ['vₐ [THz]'] + ['vₓ [THz]'] mpfile.add_data_table(mpid, df_v, 'hop_attempt_frequencies') print mpfile print 'DONE'
"3m", "6", "-6", "622", "6mm", "-6m2", "23", "432", "-43m" ] #find centro and non-centro materials based on pt grps cen_data = [] for item in centrosymm: cen_data.append( mpr.query(criteria={ "has": { "$all": ["diel", "elasticity"] }, "band_gap": { "$gt": 0.5 }, "spacegroup.point_group": { "$all": [item] }, "e_above_hull": { "$lt": 0.1 }, "nelements": { "$gt": 1 } }, properties=["diel.e_total"])) i = 0 print() print() print("centrosymmetric structures:") for item in cen_data: print("point group %s, len: %i" % (centrosymm[i], len(item)))
for comp in comps: print(comp) try: entries = mpr.get_entries_in_chemsys(comp) except: continue if len(entries) == 0: continue # pd = PhaseDiagram(entries) # data = collections.defaultdict(list) for e in entries: com = e.entry_id prop = mpr.query(criteria={"task_id": com}, properties=[\ "formation_energy_per_atom", \ "energy_per_atom", \ "spacegroup", \ "pretty_formula", \ "cif"]) form_energy = prop[0]['formation_energy_per_atom'] energy = prop[0]['energy_per_atom'] sp = prop[0]['spacegroup']['symbol'] name = prop[0]['pretty_formula'] struct = prop[0]['cif'] cif_name = './structure/' + com + '_' + name + '.cif' Comps.add_component(name, energy, sp, struct, cif_name) fcif = open(cif_name, 'w') fcif.write(struct) fcif.close() Comps.get_formation_energy()
MATERIAL_API_KEY = os.getenv('MATERIAL_API_KEY') mpr = MPRester(MATERIAL_API_KEY) mpid_list = [] with open('mpid_list.csv', 'r') as f: reader = csv.reader(f, delimiter=',') for line in reader: mpid = line[0] mpid_list.append(mpid) len(mpid_list) # 243 ############################################################################### entries_from_list = mpr.query(criteria={"material_id": { "$in": mpid_list }}, properties=["pretty_formula", "e_above_hull"]) len(entries_from_list) # 243 entries = sorted(entries_from_list, key=lambda e: e['e_above_hull']) ############################################################################### createFolder('models') # insert the number of index for modeling here num_ini = 30 num_fin = 35 with open('models/adsorbate_modeling_%03d_%03d.log' % (num_ini + 1, num_fin), 'w') as f:
# This script performs a blanket search for all systems with elasticity data. from pymatgen import MPRester import numpy as np import os key = os.environ['MAPI_KEY'] # Assuming you've done `export MAPI_KEY="USER_API_KEY"` at the command line # See materialsproject.org/dashboard to get your API key m = MPRester(key) data = m.query(criteria={"elasticity": {"$exists": True}}, properties=["pretty_formula", "elasticity.elastic_tensor"])
import pandas as pd import matplotlib.pyplot as plt import csv import multiprocessing as mp import pickle import tqdm import time mat_api_key = 'JWV6Fi4f6VfxROtHO2uP' mpr = MPRester(mat_api_key) print("Loading Compounds....") all_compounds = mpr.query({}, properties=[ "task_id", "pretty_formula", 'e_above_hull', 'elements', 'volume', 'formation_energy_per_atom', 'band_gap' ]) criteria = float(input("Enter Stable Phase Criteria in meV: ")) def find_stable_phases(compound): ''' find all compounds with e_above_hull within given range of zero ''' if abs(compound['e_above_hull']) < criteria / 1000: return compound print('Finding Stable Phases....')
# Assuming you've done `export MAPI_KEY="USER_API_KEY"` at the command line # See materialsproject.org/dashboard to get your API key from pymatgen import MPRester import numpy as np import os key = os.environ['MAPI_KEY'] m = MPRester(key) e_cutoff = m.query("mp-46", ["elasticity.calculations.energy_cutoff"])[0]['elasticity.calculations.energy_cutoff'] # units (eV) file = open("INCAR", "w") file.write("LWAVE = .FALSE.\n") file.write("LCHARG= .FALSE.\n") file.write("PREC = Accurate\n") file.write("LREAL = AUTO\n") file.write("ADDGRID = .TRUE.\n") file.write("POTIM = 0.1\n") file.write("SIGMA = 0.05\n") file.write("IBRION = 2\n") file.write("NSW = 100\n") file.write("ENCUT ={0:3d}\n".format(int(e_cutoff))) file.write("EDIFF = 1e-8\n") file.write("EDIFFG = -1e-3\n") file.write("ISIF = 3\n") file.close()
MP_KEY = 'cRelC34nhXp1wf5H' mp_dr = MPRester(MP_KEY) # query criteria = { "nelements": { "$gte": 1 }, "band_gap": { "$gt": 0 }, "e_above_hull": { "$lt": 0.000001 }, } # property properties = ['material_id', 'band_gap', 'cif'] data = mp_dr.query(criteria=criteria, properties=properties) list_id_and_band_gap = [[ value['material_id'].strip("mp-" "vc-"), value['band_gap'] ] for value in data] with open("data.csv", "w") as f: writer = csv.writer(f, lineterminator='\n') writer.writerows(list_id_and_band_gap) for value in data: with open("{}.cif".format(value['material_id'].strip("mp-" "vc-")), mode='w') as f: f.write(value['cif'])
class MaterialsProjectImporter(DbImporter): """ Database importer for the Materials Project. """ _properties = 'structure' _supported_keywords = None def __init__(self, **kwargs): """ Instantiate the MaterialsProjectImporter by setting up the Materials API (MAPI) connection details. """ self.setup_db(**kwargs) def setup_db(self, **kwargs): """ Setup the required parameters to the REST API """ try: api_key = kwargs['api_key'] except KeyError: try: api_key = os.environ['PMG_MAPI_KEY'] except KeyError as exc: raise_from( KeyError( 'API key not supplied and PMG_MAPI_KEY environment ' 'variable not set. Either pass it when initializing the class, ' 'or set the environment variable PMG_MAPI_KEY to your API key.' ), exc) api_key = None self._api_key = api_key self._verify_api_key() self._mpr = MPRester(self._api_key) def _verify_api_key(self): """ Verify the supplied API key by issuing a request to Materials Project. """ response = requests.get( 'https://www.materialsproject.org/rest/v1/api_check', headers={'X-API-KEY': self._api_key}) response_content = response.json() # a dict if 'error' in response_content: raise RuntimeError(response_content['error']) if not response_content['valid_response']: raise RuntimeError( 'Materials Project did not give a valid response for the API key check.' ) if not response_content['api_key_valid']: raise RuntimeError( 'Your API key for Materials Project is not valid.') @property def api_key(self): """ Return the API key configured for the importer """ return self._api_key @property def properties(self): """ Return the properties that will be queried """ return self._properties @property def get_supported_keywords(self): """ Returns the list of all supported query keywords :return: list of strings """ return self._supported_keywords def query(self, **kwargs): """ Query the database with a given dictionary of query parameters for a given properties :param query: a dictionary with the query parameters :param properties: the properties to query """ try: query = kwargs['query'] except AttributeError: raise AttributeError( 'Make sure the supplied dictionary has `query` as a key. This ' 'should contain a dictionary with the right query needed.') try: properties = kwargs['properties'] except AttributeError: raise AttributeError( 'Make sure the supplied dictionary has `properties` as a key.') if not isinstance(query, dict): raise TypeError('The query argument should be a dictionary') if properties is None: properties = self._properties if properties != 'structure': raise ValueError('Unsupported properties: {}'.format(properties)) results = [] properties_list = ['material_id', 'cif'] for entry in self._find(query, properties_list): results.append(entry) search_results = MaterialsProjectSearchResults( results, return_class=MaterialsProjectCifEntry) return search_results def _find(self, query, properties): """ Query the database with a given dictionary of query parameters :param query: a dictionary with the query parameters """ for entry in self._mpr.query(criteria=query, properties=properties): yield entry
def get_tensor(name): m = MPRester(key) tensor = m.query(criteria={"elasticity": {"$exists": True}, "pretty_formula": name}, properties=["elasticity.elastic_tensor"]) return tensor