def test_mpds(): from mpds_client import MPDSDataRetrieval key = os.getenv('MPDS_KEY') client = MPDSDataRetrieval(api_key=key) query_dict = dict(formulae="MgO", sgs=225, classes="binary") # insert props: atomic structure to query. Might check if it's already set to smth query_dict['props'] = 'atomic structure' answer = client.get_data(query_dict, fields={'S': [ 'phase', ]}) assert len(set(_[0] for _ in answer)) == 1
def get_cell_v_for_t(elements, t0=250, t1=350): """ Extracts the cell volumes within the certain temperature Args: elements: (list) chemical elements to retrieve, the first is metal t0, t1: (numeric) temperature boundaries, K Returns: dict of volumes per phase """ phases_volumes = {} for item in mpds_api.get_data(dict(elements='-'.join(elements), classes=supported_arities[len(elements)]), fields={ 'P': [ lambda: 'P', 'sample.material.phase_id', lambda: None, 'sample.measurement[0].condition[0].name', 'sample.measurement[0].condition[0].units', 'sample.measurement[0].condition[0].scalar', 'sample.material.entry' ], 'S':[ lambda: 'S', 'phase_id', 'v', lambda: 'Temperature', lambda: 'K', 'condition', # four values 'entry', 'occs_noneq', 'cell_abc', 'sg_n', 'basis_noneq', 'els_noneq' ]}): if not item or not item[1] or item[3] != 'Temperature' or item[4] != 'K': # Other entry type, or no phase assigned, or irrelevant condition given continue if item[0] == 'P': # P-entry, TODO: consider temperature if item[5] and (item[5] < t0 or item[5] > t1): print('Phase %s, P: OUT OF BOUNDS TEMPERATURE: %s K (%s)' % (item[1], item[5], item[6])) else: # S-entry if item[5] and item[5][0] and (item[5][0] < t0 or item[5][0] > t1): print('Phase %s, S: OUT OF BOUNDS TEMPERATURE: %s K (%s)' % (item[1], item[5][0], item[6])) continue ase_obj = MPDSDataRetrieval.compile_crystal(item, 'ase') if not ase_obj: continue n_metal_atoms = len([p for p in ase_obj if p.symbol == elements[0]]) phases_volumes.setdefault(item[1], []).append(det(ase_obj.cell) / n_metal_atoms) return phases_volumes
def get_geometry(self): """ Getting geometry from MPDS database """ key = os.getenv('MPDS_KEY') client = MPDSDataRetrieval(api_key=key, verbose=False) query_dict = self.inputs.mpds_query.get_dict() # Add direct structures submitting support: FIXME assert query_dict or self.inputs.struct_in if not query_dict: return self.inputs.struct_in # insert props: atomic structure to query. Might check if it's already set to smth query_dict['props'] = 'atomic structure' try: answer = client.get_data( query_dict, fields={'S': [ 'cell_abc', 'sg_n', 'basis_noneq', 'els_noneq' ]} ) except APIError as ex: if ex.code == 429: self.logger.warning("Too many parallel MPDS requests, chilling") time.sleep(random.choice([2 * 2**m for m in range(5)])) return self.get_geometry() else: raise structs = [client.compile_crystal(line, flavor='ase') for line in answer] structs = list(filter(None, structs)) if not structs: raise APIError('No crystal structures returned') minimal_struct = min([len(s) for s in structs]) # get structures with minimal number of atoms and find the one with median cell vectors cells = np.array([s.get_cell().reshape(9) for s in structs if len(s) == minimal_struct]) median_cell = np.median(cells, axis=0) median_idx = int(np.argmin(np.sum((cells - median_cell) ** 2, axis=1) ** 0.5)) return get_data_class('structure')(ase=structs[median_idx])
def get_phases(): key = os.getenv('MPDS_KEY', None) if key is None: raise EnvironmentError( 'Environment variable MPDS_KEY not set, aborting') cols = ['phase', 'chemical_formula', 'sg_n'] client = MPDSDataRetrieval(api_key=key) for formula in get_formulae(): formula.update({'props': 'atomic structure'}) data = client.get_data(formula, fields={'S': cols}) data_df = pd.DataFrame(data=data, columns=cols).dropna(axis=0, how="all", subset=["phase"]) for _, phase in data_df.drop_duplicates().iterrows(): yield { 'phase': phase['phase'], 'formulae': phase['chemical_formula'], 'sgs': int(phase['sg_n']) }
import io import requests from mpds_client import MPDSDataRetrieval, MPDSDataTypes from etransport_raw import analyze_raw # this is given in the supplied file "etransport_raw.py" # the raw data on the MPDS are in 7z format # so we need the latest dev version of pylzma # pip install git+https://github.com/fancycode/pylzma # then py7zlib is available from py7zlib import Archive7z mpds_api = MPDSDataRetrieval(dtype=MPDSDataTypes.AB_INITIO) for entry in mpds_api.get_data({'props': 'electrical conductivity'}, fields={}): archive_url = entry['sample']['measurement'][0]['raw_data'] # this is the raw data archive location p = requests.get(archive_url) if p.status_code != 200: logging.critical('ARCHIVE %s IS UNAVAILABLE' % archive_url) continue print('Analyzing the raw data for %s' % entry['sample']['material']['entry']) archive = Archive7z(io.BytesIO(p.content)) for virtual_path in archive.files:
continue formers.add(fingerprint) break # Here we have no single phases: complete insolubility case, e.g. La-Mn else: maybe_nonformers.add(fingerprint) # different pd's may give different impression, so we compare globally true_nonformers |= (maybe_nonformers - formers) return true_nonformers if __name__ == "__main__": OUTPUT = "mpds_bin_nonformers.json" assert not os.path.exists(OUTPUT), "%s exists!" % OUTPUT starttime = time.time() nonformers = get_nonformers(MPDSDataRetrieval()) print("Binary nonformers:", len(nonformers)) f = open(OUTPUT, "w") f.write(json.dumps(sorted(list(nonformers)), indent=4)) f.close() print("Done in %1.2f sc" % (time.time() - starttime))
def calculate_lengths(ase_obj, elA, elB, limit=4): assert elA != elB lengths = [] for n, atom in enumerate(ase_obj): if atom.symbol == elA: for m, neighbor in enumerate(ase_obj): if neighbor.symbol == elB: dist = round(ase_obj.get_distance(n, m), 2) # NB occurrence <-> rounding if dist < limit: lengths.append(dist) return lengths client = MPDSDataRetrieval() answer = client.get_data({ "elements": "U-O", "props": "atomic structure" }, fields={ 'S': [ 'phase_id', 'entry', 'chemical_formula', 'cell_abc', 'sg_n', 'basis_noneq', 'els_noneq' ] }) lengths = [] for item in answer:
from aiida_crystal_dft.io.f34 import Fort34 from yascheduler import CONFIG_FILE from yascheduler.scheduler import Yascheduler from mpds_client import MPDSDataRetrieval from mpds_aiida.common import get_template, get_basis_sets, get_mpds_structures, get_input ela = ['Li', 'Na', 'K', 'Rb', 'Be', 'Mg', 'Ca', 'Sr'] elb = ['F', 'Cl', 'Br', 'I', 'O', 'S', 'Se', 'Te'] config = ConfigParser() config.read(CONFIG_FILE) yac = Yascheduler(config) client = MPDSDataRetrieval() calc_setup = get_template() bs_repo = get_basis_sets(calc_setup['basis_family']) try: how_many = int(sys.argv[1]) except (IndexError, ValueError): how_many = False counter = 0 random.shuffle(ela) random.shuffle(elb) for elem_pair in product(ela, elb): if how_many and counter >= how_many: raise SystemExit
""" #This code is used to extract superconducting Tc and Bulk Moduli data from #the MPDS database using the API information. The code extracts the info #for all the compounds from a given list and extracts the necessary info #That is then added to an excel sheet and saved. The code uses a sleep #function to give the code 1 second before feeding in the next compound #into the database so as to not overwork the system. #import all necessary python packages from mpds_client import MPDSDataRetrieval import pandas as pd import numpy as np from pandas import ExcelWriter import time # feed in personalized API KEY client = MPDSDataRetrieval(" API KEY") sc_datafile = pd.read_excel( r'Bulk Moduli.xlsx' ) #This reads the excel file containing name of compounds and their spacegroup as obtained from MP # The following lines convert panda dataframe type to numpy array name = pd.DataFrame(sc_datafile, columns=['Compound']) name = name.values.tolist() name = np.asarray(name) name = np.concatenate(name, axis=0) spacegroup = pd.DataFrame(sc_datafile, columns=['Space group']) spacegroup = spacegroup.values.tolist() spacegroup = np.asarray(spacegroup) spacegroup = np.concatenate(spacegroup, axis=0) answer = [] Tc = [] Tc_actual = []
import time from urllib import urlencode import httplib2 import ujson as json import numpy as np from mpds_client import MPDSDataRetrieval, APIError from prediction import prop_models from struct_utils import detect_format, poscar_to_ase, refine, get_formula, sgn_to_crsystem from cif_utils import cif_to_ase from common import API_KEY, API_ENDPOINT req = httplib2.Http() client = MPDSDataRetrieval(api_key=API_KEY, endpoint=API_ENDPOINT) ARITY = {1: 'unary', 2: 'binary', 3: 'ternary', 4: 'quaternary', 5: 'quinary'} def make_request(address, data={}, httpverb='POST', headers={}): address += '?' + urlencode(data) if httpverb == 'GET': response, content = req.request(address, httpverb, headers=headers) else: headers.update({'Content-type': 'application/x-www-form-urlencoded'}) response, content = req.request(address, httpverb, headers=headers,
#!/usr/bin/env python """ MPDS API usage example: clustering the band gaps of binary compounds https://developer.mpds.io/#Clustering """ from ase.data import chemical_symbols from mpds_client import MPDSDataRetrieval, MPDSExport from kmeans import Point, kmeans, k_from_n from element_groups import get_element_group client = MPDSDataRetrieval() dfrm = client.get_dataframe( { "classes": "binary", "props": "band gap" }, fields={ 'P': [ 'sample.material.chemical_formula', 'sample.material.chemical_elements', 'sample.material.condition[0].scalar[0].value', 'sample.measurement[0].property.units', 'sample.measurement[0].property.scalar' ] },
try: arg = sys.argv[1] except IndexError: sys.exit( "What to do?\n" "Please, provide either a *prop_id* letter (%s) for a property data to be downloaded and fitted,\n" "or a data *filename* for tuning the model." % ", ".join(prop_models.keys())) try: descriptor_kappa = int(sys.argv[2]) except: descriptor_kappa = None if arg in prop_models.keys(): api_client = MPDSDataRetrieval(api_key=API_KEY, endpoint=API_ENDPOINT) struct_props = mpds_get_data(api_client, arg, descriptor_kappa) X = np.array(struct_props['Descriptor'].tolist()) n_samples, n_x, n_y = X.shape X = X.reshape(n_samples, n_x * n_y) y = struct_props['Avgvalue'].tolist() avg_mae, avg_r2 = estimate_regr_quality(get_regr(), X, y) print("Avg. MAE: %.2f" % avg_mae) print("Avg. R2 score: %.2f" % avg_r2) tune_model(struct_props.export_file)
from mpds_client import MPDSDataRetrieval from miner_nonformers import pd_svg_to_points MARGIN_EDGES_COMP = 0.1 MARGIN_EDGES_TEMP = 5 if __name__ == "__main__": try: ela, elb = list(set([sys.argv[1], sys.argv[2]])) except IndexError: raise RuntimeError('Chemical element symbols should be given.') elements = sorted([ela, elb]) print("Elements: %s" % elements) api_client = MPDSDataRetrieval() plt.xlabel('Composition') plt.ylabel('Temperature') plt.annotate(ela, xy=(-0.05, -0.1), xycoords='axes fraction') plt.annotate(elb, xy=(1.05, -0.1), xycoords='axes fraction') ymin, ymax = 500, 700 for pd in api_client.get_data( { "props": "phase diagram", "classes": "binary", "elements": "-".join(elements) }, fields={}): # fields={} means all fields # Consider only full-composition diagrams
'z': 'bulk_modulus', 'x': 'heat_capacity', 'w': 'direct_band_gap' # 'enthalpy of formation' # 'linear thermal expansion coefficient' } CACHE_FILE = os.path.dirname(os.path.realpath( os.path.abspath(__file__))) + os.sep + 'example_aiida_cache.json' if not os.path.exists(CACHE_FILE): with open(CACHE_FILE, 'w') as f: f.write('{}') LABS_SERVER_ADDR = 'https://labs.mpds.io/predict' # http://127.0.0.1:5000/predict raw_req = httplib2.Http() mpds_api = MPDSDataRetrieval() def get_structures(elements): """ Given some arbitrary chemical elements, get their possible crystalline structures """ assert sorted(list(set(elements))) == sorted(elements) and \ len(elements) <= len(supported_arities) structures = [] for item in mpds_api.get_data( { "props": "atomic structure", "elements": '-'.join(elements),
#!/usr/bin/env python """ MPDS API usage example: Display thermal expansion coefficient (alpha^E5) for all the phases which have T_melt > 1800 C reported Warning: ML data should be considered with a grain of salt """ import numpy as np from mpds_client import MPDSDataRetrieval, MPDSDataTypes mpds_api = MPDSDataRetrieval( dtype=MPDSDataTypes.MACHINE_LEARNING) # NB MPDSDataTypes.ALL phase_for_formula = {} phase_for_val_a, phase_for_val_b = {}, {} for deck in mpds_api.get_data( { 'props': 'temperature for congruent melting', 'classes': 'oxide' }, fields={ 'P': [ 'sample.material.phase_id', 'sample.material.chemical_formula', 'sample.measurement[0].property.scalar' ] }): if deck[2] > (1800 + 273):
for atom in ase_obj: volume += 4 / 3 * np.pi * covalent_radii[chemical_symbols.index( atom.symbol)]**3 return volume / abs(np.linalg.det(ase_obj.cell)) def get_Wiener(ase_obj): """ Example crystal structure descriptor: https://en.wikipedia.org/wiki/Wiener_index defined per a unit cell """ return np.sum(ase_obj.get_all_distances()) * 0.5 client = MPDSDataRetrieval() dfrm = client.get_dataframe({ "classes": "transitional, oxide", "props": "isothermal bulk modulus" }) dfrm = dfrm[np.isfinite(dfrm['Phase'])] dfrm = dfrm[dfrm['Units'] == 'GPa'] dfrm = dfrm[dfrm['Value'] > 0] phases = set(dfrm['Phase'].tolist()) answer = client.get_data({"props": "atomic structure"}, phases=phases, fields={ 'S': [ 'phase_id', 'entry', 'chemical_formula',