Exemple #1
0
def test_mpds():
    from mpds_client import MPDSDataRetrieval
    key = os.getenv('MPDS_KEY')
    client = MPDSDataRetrieval(api_key=key)
    query_dict = dict(formulae="MgO", sgs=225, classes="binary")
    # insert props: atomic structure to query. Might check if it's already set to smth
    query_dict['props'] = 'atomic structure'
    answer = client.get_data(query_dict, fields={'S': [
        'phase',
    ]})
    assert len(set(_[0] for _ in answer)) == 1
Exemple #2
0
def get_cell_v_for_t(elements, t0=250, t1=350):
    """
    Extracts the cell volumes within the certain temperature

    Args:
        elements: (list) chemical elements to retrieve, the first is metal
        t0, t1: (numeric) temperature boundaries, K

    Returns: dict of volumes per phase
    """
    phases_volumes = {}

    for item in mpds_api.get_data(dict(elements='-'.join(elements), classes=supported_arities[len(elements)]), fields={
    'P': [
        lambda: 'P',
        'sample.material.phase_id',
        lambda: None,
        'sample.measurement[0].condition[0].name',
        'sample.measurement[0].condition[0].units',
        'sample.measurement[0].condition[0].scalar',
        'sample.material.entry'
    ],
    'S':[
        lambda: 'S',
        'phase_id',
        'v',
        lambda: 'Temperature',
        lambda: 'K',
        'condition', # four values
        'entry',
        'occs_noneq',
        'cell_abc',
        'sg_n',
        'basis_noneq',
        'els_noneq'
    ]}):
        if not item or not item[1] or item[3] != 'Temperature' or item[4] != 'K':
            # Other entry type, or no phase assigned, or irrelevant condition given
            continue

        if item[0] == 'P':
            # P-entry, TODO: consider temperature
            if item[5] and (item[5] < t0 or item[5] > t1):
                print('Phase %s, P: OUT OF BOUNDS TEMPERATURE: %s K (%s)' % (item[1], item[5], item[6]))

        else:
            # S-entry
            if item[5] and item[5][0] and (item[5][0] < t0 or item[5][0] > t1):
                print('Phase %s, S: OUT OF BOUNDS TEMPERATURE: %s K (%s)' % (item[1], item[5][0], item[6]))
                continue

            ase_obj = MPDSDataRetrieval.compile_crystal(item, 'ase')
            if not ase_obj:
                continue
            n_metal_atoms = len([p for p in ase_obj if p.symbol == elements[0]])
            phases_volumes.setdefault(item[1], []).append(det(ase_obj.cell) / n_metal_atoms)

    return phases_volumes
Exemple #3
0
    def get_geometry(self):
        """ Getting geometry from MPDS database
        """
        key = os.getenv('MPDS_KEY')
        client = MPDSDataRetrieval(api_key=key, verbose=False)
        query_dict = self.inputs.mpds_query.get_dict()

        # Add direct structures submitting support: FIXME
        assert query_dict or self.inputs.struct_in
        if not query_dict:
            return self.inputs.struct_in

        # insert props: atomic structure to query. Might check if it's already set to smth
        query_dict['props'] = 'atomic structure'
        try:
            answer = client.get_data(
                query_dict,
                fields={'S': [
                    'cell_abc',
                    'sg_n',
                    'basis_noneq',
                    'els_noneq'
                ]}
            )
        except APIError as ex:
            if ex.code == 429:
                self.logger.warning("Too many parallel MPDS requests, chilling")
                time.sleep(random.choice([2 * 2**m for m in range(5)]))
                return self.get_geometry()
            else: raise

        structs = [client.compile_crystal(line, flavor='ase') for line in answer]
        structs = list(filter(None, structs))
        if not structs:
            raise APIError('No crystal structures returned')
        minimal_struct = min([len(s) for s in structs])

        # get structures with minimal number of atoms and find the one with median cell vectors
        cells = np.array([s.get_cell().reshape(9) for s in structs if len(s) == minimal_struct])
        median_cell = np.median(cells, axis=0)
        median_idx = int(np.argmin(np.sum((cells - median_cell) ** 2, axis=1) ** 0.5))
        return get_data_class('structure')(ase=structs[median_idx])
Exemple #4
0
def get_phases():
    key = os.getenv('MPDS_KEY', None)
    if key is None:
        raise EnvironmentError(
            'Environment variable MPDS_KEY not set, aborting')

    cols = ['phase', 'chemical_formula', 'sg_n']
    client = MPDSDataRetrieval(api_key=key)

    for formula in get_formulae():
        formula.update({'props': 'atomic structure'})
        data = client.get_data(formula, fields={'S': cols})
        data_df = pd.DataFrame(data=data,
                               columns=cols).dropna(axis=0,
                                                    how="all",
                                                    subset=["phase"])

        for _, phase in data_df.drop_duplicates().iterrows():
            yield {
                'phase': phase['phase'],
                'formulae': phase['chemical_formula'],
                'sgs': int(phase['sg_n'])
            }
Exemple #5
0
import io

import requests
from mpds_client import MPDSDataRetrieval, MPDSDataTypes

from etransport_raw import analyze_raw # this is given in the supplied file "etransport_raw.py"

# the raw data on the MPDS are in 7z format
# so we need the latest dev version of pylzma
# pip install git+https://github.com/fancycode/pylzma
# then py7zlib is available

from py7zlib import Archive7z


mpds_api = MPDSDataRetrieval(dtype=MPDSDataTypes.AB_INITIO)

for entry in mpds_api.get_data({'props': 'electrical conductivity'}, fields={}):

    archive_url = entry['sample']['measurement'][0]['raw_data'] # this is the raw data archive location

    p = requests.get(archive_url)
    if p.status_code != 200:
        logging.critical('ARCHIVE %s IS UNAVAILABLE' % archive_url)
        continue

    print('Analyzing the raw data for %s' % entry['sample']['material']['entry'])

    archive = Archive7z(io.BytesIO(p.content))
    for virtual_path in archive.files:
Exemple #6
0
                    continue

                formers.add(fingerprint)
                break

            # Here we have no single phases: complete insolubility case, e.g. La-Mn

        else:
            maybe_nonformers.add(fingerprint)

    # different pd's may give different impression, so we compare globally
    true_nonformers |= (maybe_nonformers - formers)
    return true_nonformers


if __name__ == "__main__":

    OUTPUT = "mpds_bin_nonformers.json"
    assert not os.path.exists(OUTPUT), "%s exists!" % OUTPUT

    starttime = time.time()

    nonformers = get_nonformers(MPDSDataRetrieval())

    print("Binary nonformers:", len(nonformers))
    f = open(OUTPUT, "w")
    f.write(json.dumps(sorted(list(nonformers)), indent=4))
    f.close()

    print("Done in %1.2f sc" % (time.time() - starttime))
Exemple #7
0
def calculate_lengths(ase_obj, elA, elB, limit=4):
    assert elA != elB
    lengths = []
    for n, atom in enumerate(ase_obj):
        if atom.symbol == elA:
            for m, neighbor in enumerate(ase_obj):
                if neighbor.symbol == elB:
                    dist = round(ase_obj.get_distance(n, m),
                                 2)  # NB occurrence <-> rounding
                    if dist < limit:
                        lengths.append(dist)
    return lengths


client = MPDSDataRetrieval()

answer = client.get_data({
    "elements": "U-O",
    "props": "atomic structure"
},
                         fields={
                             'S': [
                                 'phase_id', 'entry', 'chemical_formula',
                                 'cell_abc', 'sg_n', 'basis_noneq', 'els_noneq'
                             ]
                         })

lengths = []

for item in answer:
from aiida_crystal_dft.io.f34 import Fort34

from yascheduler import CONFIG_FILE
from yascheduler.scheduler import Yascheduler

from mpds_client import MPDSDataRetrieval
from mpds_aiida.common import get_template, get_basis_sets, get_mpds_structures, get_input

ela = ['Li', 'Na', 'K', 'Rb', 'Be', 'Mg', 'Ca', 'Sr']
elb = ['F', 'Cl', 'Br', 'I', 'O', 'S', 'Se', 'Te']

config = ConfigParser()
config.read(CONFIG_FILE)
yac = Yascheduler(config)

client = MPDSDataRetrieval()

calc_setup = get_template()
bs_repo = get_basis_sets(calc_setup['basis_family'])

try:
    how_many = int(sys.argv[1])
except (IndexError, ValueError):
    how_many = False

counter = 0
random.shuffle(ela)
random.shuffle(elb)

for elem_pair in product(ela, elb):
    if how_many and counter >= how_many: raise SystemExit
Exemple #9
0
"""
#This code is used to extract superconducting Tc and Bulk Moduli data from
#the MPDS database using the API information. The code extracts the info
#for all the compounds from a given list and extracts the necessary info
#That is then added to an excel sheet and saved. The code uses a sleep
#function to give the code 1 second before feeding in the next compound
#into the database so as to not overwork the system.

#import all necessary python packages
from mpds_client import MPDSDataRetrieval
import pandas as pd
import numpy as np
from pandas import ExcelWriter
import time
# feed in personalized API KEY
client = MPDSDataRetrieval(" API KEY")
sc_datafile = pd.read_excel(
    r'Bulk Moduli.xlsx'
)  #This reads the excel file containing name of compounds and their spacegroup as obtained from MP
# The following lines convert panda dataframe type to numpy array
name = pd.DataFrame(sc_datafile, columns=['Compound'])
name = name.values.tolist()
name = np.asarray(name)
name = np.concatenate(name, axis=0)
spacegroup = pd.DataFrame(sc_datafile, columns=['Space group'])
spacegroup = spacegroup.values.tolist()
spacegroup = np.asarray(spacegroup)
spacegroup = np.concatenate(spacegroup, axis=0)
answer = []
Tc = []
Tc_actual = []
Exemple #10
0
import time
from urllib import urlencode

import httplib2
import ujson as json
import numpy as np

from mpds_client import MPDSDataRetrieval, APIError

from prediction import prop_models
from struct_utils import detect_format, poscar_to_ase, refine, get_formula, sgn_to_crsystem
from cif_utils import cif_to_ase
from common import API_KEY, API_ENDPOINT

req = httplib2.Http()
client = MPDSDataRetrieval(api_key=API_KEY, endpoint=API_ENDPOINT)
ARITY = {1: 'unary', 2: 'binary', 3: 'ternary', 4: 'quaternary', 5: 'quinary'}


def make_request(address, data={}, httpverb='POST', headers={}):

    address += '?' + urlencode(data)

    if httpverb == 'GET':
        response, content = req.request(address, httpverb, headers=headers)

    else:
        headers.update({'Content-type': 'application/x-www-form-urlencoded'})
        response, content = req.request(address,
                                        httpverb,
                                        headers=headers,
Exemple #11
0
#!/usr/bin/env python
"""
MPDS API usage example:
clustering the band gaps of binary compounds

https://developer.mpds.io/#Clustering
"""

from ase.data import chemical_symbols

from mpds_client import MPDSDataRetrieval, MPDSExport

from kmeans import Point, kmeans, k_from_n
from element_groups import get_element_group

client = MPDSDataRetrieval()

dfrm = client.get_dataframe(
    {
        "classes": "binary",
        "props": "band gap"
    },
    fields={
        'P': [
            'sample.material.chemical_formula',
            'sample.material.chemical_elements',
            'sample.material.condition[0].scalar[0].value',
            'sample.measurement[0].property.units',
            'sample.measurement[0].property.scalar'
        ]
    },
    try:
        arg = sys.argv[1]
    except IndexError:
        sys.exit(
            "What to do?\n"
            "Please, provide either a *prop_id* letter (%s) for a property data to be downloaded and fitted,\n"
            "or a data *filename* for tuning the model." %
            ", ".join(prop_models.keys()))
    try:
        descriptor_kappa = int(sys.argv[2])
    except:
        descriptor_kappa = None

    if arg in prop_models.keys():

        api_client = MPDSDataRetrieval(api_key=API_KEY, endpoint=API_ENDPOINT)

        struct_props = mpds_get_data(api_client, arg, descriptor_kappa)

        X = np.array(struct_props['Descriptor'].tolist())
        n_samples, n_x, n_y = X.shape
        X = X.reshape(n_samples, n_x * n_y)
        y = struct_props['Avgvalue'].tolist()

        avg_mae, avg_r2 = estimate_regr_quality(get_regr(), X, y)

        print("Avg. MAE: %.2f" % avg_mae)
        print("Avg. R2 score: %.2f" % avg_r2)

        tune_model(struct_props.export_file)
Exemple #13
0
from mpds_client import MPDSDataRetrieval

from miner_nonformers import pd_svg_to_points

MARGIN_EDGES_COMP = 0.1
MARGIN_EDGES_TEMP = 5

if __name__ == "__main__":
    try:
        ela, elb = list(set([sys.argv[1], sys.argv[2]]))
    except IndexError:
        raise RuntimeError('Chemical element symbols should be given.')
    elements = sorted([ela, elb])
    print("Elements: %s" % elements)

    api_client = MPDSDataRetrieval()

    plt.xlabel('Composition')
    plt.ylabel('Temperature')
    plt.annotate(ela, xy=(-0.05, -0.1), xycoords='axes fraction')
    plt.annotate(elb, xy=(1.05, -0.1), xycoords='axes fraction')
    ymin, ymax = 500, 700

    for pd in api_client.get_data(
        {
            "props": "phase diagram",
            "classes": "binary",
            "elements": "-".join(elements)
        },
            fields={}):  # fields={} means all fields
        # Consider only full-composition diagrams
Exemple #14
0
    'z': 'bulk_modulus',
    'x': 'heat_capacity',
    'w': 'direct_band_gap'
    # 'enthalpy of formation'
    # 'linear thermal expansion coefficient'
}

CACHE_FILE = os.path.dirname(os.path.realpath(
    os.path.abspath(__file__))) + os.sep + 'example_aiida_cache.json'
if not os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, 'w') as f:
        f.write('{}')

LABS_SERVER_ADDR = 'https://labs.mpds.io/predict'  # http://127.0.0.1:5000/predict
raw_req = httplib2.Http()
mpds_api = MPDSDataRetrieval()


def get_structures(elements):
    """
    Given some arbitrary chemical elements,
    get their possible crystalline structures
    """
    assert sorted(list(set(elements))) == sorted(elements) and \
    len(elements) <= len(supported_arities)

    structures = []
    for item in mpds_api.get_data(
        {
            "props": "atomic structure",
            "elements": '-'.join(elements),
Exemple #15
0
#!/usr/bin/env python
"""
MPDS API usage example:

Display thermal expansion coefficient (alpha^E5)
for all the phases which have T_melt > 1800 C reported

Warning: ML data should be considered with a grain of salt
"""

import numpy as np
from mpds_client import MPDSDataRetrieval, MPDSDataTypes

mpds_api = MPDSDataRetrieval(
    dtype=MPDSDataTypes.MACHINE_LEARNING)  # NB MPDSDataTypes.ALL

phase_for_formula = {}
phase_for_val_a, phase_for_val_b = {}, {}

for deck in mpds_api.get_data(
    {
        'props': 'temperature for congruent melting',
        'classes': 'oxide'
    },
        fields={
            'P': [
                'sample.material.phase_id', 'sample.material.chemical_formula',
                'sample.measurement[0].property.scalar'
            ]
        }):
    if deck[2] > (1800 + 273):
Exemple #16
0
    for atom in ase_obj:
        volume += 4 / 3 * np.pi * covalent_radii[chemical_symbols.index(
            atom.symbol)]**3
    return volume / abs(np.linalg.det(ase_obj.cell))


def get_Wiener(ase_obj):
    """
    Example crystal structure descriptor:
    https://en.wikipedia.org/wiki/Wiener_index
    defined per a unit cell
    """
    return np.sum(ase_obj.get_all_distances()) * 0.5


client = MPDSDataRetrieval()

dfrm = client.get_dataframe({
    "classes": "transitional, oxide",
    "props": "isothermal bulk modulus"
})
dfrm = dfrm[np.isfinite(dfrm['Phase'])]
dfrm = dfrm[dfrm['Units'] == 'GPa']
dfrm = dfrm[dfrm['Value'] > 0]

phases = set(dfrm['Phase'].tolist())
answer = client.get_data({"props": "atomic structure"},
                         phases=phases,
                         fields={
                             'S': [
                                 'phase_id', 'entry', 'chemical_formula',