Exemple #1
0
class CitrineDataRetrievalTest(unittest.TestCase):
    def setUp(self):
        self.cdr = CitrineDataRetrieval(citrine_key)

    def test_get_data(self):
        pifs_lst = self.cdr.get_api_data(formula="W",
                                         data_type='EXPERIMENTAL',
                                         max_results=10)
        self.assertEqual(len(pifs_lst), 10)
        df = self.cdr.get_dataframe(criteria={
            'formula': 'W',
            'data_type': 'EXPERIMENTAL',
            'max_results': 10
        },
                                    print_properties_options=False)
        self.assertEqual(df.shape[0], 10)

    def test_multiple_items_in_list(self):
        df = self.cdr.get_dataframe(criteria={
            'data_set_id': 114192,
            'max_results': 102
        },
                                    print_properties_options=False)
        self.assertEqual(df.shape[0], 102)
        test_cols = {
            "Thermal conductivity_5-conditions", "Condition_1",
            "Thermal conductivity_10"
        }
        self.assertTrue(test_cols < set(df.columns))
Exemple #2
0
def plot_expt_compt_band_gaps(citrine_api_key, limit=0):
    """
    Pulls experimental band gaps from Citrine (w/o dataset limitations) and
        evaluate the DFT computed band gaps (data from materialsproject.org)
        in xy scatter plot. To compare the right values, we pick the computed
        band gaps calculated for a chemical formula that has the lowest energy
        above hull (the most stable structure).
    Args:
        citrine_api_key (str): Your Citrine API key for getting data. Don't have
            a Citrine account? Visit https://citrine.io/
        limit (int): limit the number of entries (0 means no limit)
    Returns:
        plotly plots in "offline" mode poped in the default browser.
    """

    # pull experimental band gaps from Citrine
    cdr = CitrineDataRetrieval(api_key=citrine_api_key)
    cols = ['chemicalFormula', 'Band gap']
    df_ct = cdr.get_dataframe(prop='band gap', data_type='experimental',
                              show_columns=cols, max_results=limit).rename(
        columns={'chemicalFormula': 'Formula', 'Band gap': 'Expt. gap'})
    df_ct = df_ct[df_ct['Formula'] != 'In1p1'] # p1 not recognized in Composition
    df_ct = df_ct.dropna() # null band gaps cause problem when plotting residuals
    df_ct['Formula'] = df_ct['Formula'].transform(
        lambda x: Composition(x).get_reduced_formula_and_factor()[0])

    # pull computational band gaps from the Materials Project
    df = MPDataRetrieval().get_dataframe(
        criteria={'pretty_formula': {'$in': list(df_ct['Formula'].values)}},
        properties=['pretty_formula', 'material_id', 'band_gap', 'e_above_hull'],
        index_mpid=False).rename(
        columns={'pretty_formula': 'Formula', 'band_gap': 'MP computed gap',
                 'material_id': 'mpid'})


    # pick the most stable structure
    df_mp = df.loc[df.groupby("Formula")["e_above_hull"].idxmin()]
    df_final = df_ct.merge(df_mp, on='Formula').drop(
                                    'e_above_hull', axis=1).set_index('mpid')
    pf = PlotlyFig(df_final, x_title='Experimental band gap (eV)',
                   y_title='Computed Band Gap (eV)',
                   filename='band_gaps')

    # computed vs. experimental band gap:
    pf.xy([
        ('Expt. gap', 'MP computed gap'),
        ([0, 12], [0, 12])
    ],
        lines=[{}, {'color': 'black', 'dash': 'dash'}],
        labels=df_final.index, modes=['markers', 'lines'],
        names=['Computed vs. expt.', 'Expt. gap'])

    # residual:
    residuals = df_final['MP computed gap']-df_final['Expt. gap'].astype(float)
    pf.set_arguments(x_title='Experimental band gap (eV)',
                    y_title='Residual (Computed - Expt.) Band Gap (eV)',
                    filename='band_gap_residuals')
    pf.xy(('Expt. gap', residuals), labels = df_final.index)
class CitrineDataRetrievalTest(unittest.TestCase):

    def setUp(self):
        self.cdr = CitrineDataRetrieval(citrine_key)

    def test_get_data(self):
        df = self.cdr.get_dataframe(formula="W", data_type='EXPERIMENTAL', max_results=10)
    def _apply_query(self, sorted: Optional[bool]) -> pd.DataFrame:
        cdr = CitrineDataRetrieval(api_key=self.API_KEY)
        criteria = {"data_type": "EXPERIMENTAL"}
        properties = ['Band gap']
        common_fields = [
            "uid", "chemicalFormula", "references", "Crystallinity",
            "Structure", "Crystal structure", "uid"
        ]

        df = cdr.get_dataframe(criteria=criteria,
                               properties=properties,
                               common_fields=common_fields)

        LOG.info("Writing to raw data...")
        df.to_pickle(self.raw_data_path)
        return df
Exemple #5
0
def plot_thermoelectrics(citrine_api_key, limit=0):
    """
    Scatter plot of the properties of thermoelectric materials based on the data
        available in http://www.mrl.ucsb.edu:8080/datamine/thermoelectric.jsp
        The data is extracted via Citrine data retrieval tools. The dataset
        id on Citrine is 150557
    Args:
        citrine_api_key (str): Your Citrine API key for getting data. Don't have
            a Citrine account? Visit https://citrine.io/
        limit (int): limit the number of entries (0 means no limit)
    Returns:
        plotly plot in "offline" mode popped in the default browser.
    """
    cdr = CitrineDataRetrieval(api_key=citrine_api_key)
    cols = [
        'Electrical resistivity', 'Seebeck coefficient',
        'Thermal conductivity', 'Thermoelectric figure of merit (zT)'
    ]
    df_te = cdr.get_dataframe(
        criteria={
            'data_type': 'experimental',
            'data_set_id': 150557,
            'max_results': limit
        },
        properties=['Seebeck coefficient'],
        secondary_fields=True,
    )
    df_te[cols] = df_te[cols].astype(float)
    df_te = df_te[(df_te['Electrical resistivity'] > 5e-4) & \
                  (df_te['Electrical resistivity'] < 0.1)]
    df_te = df_te[abs(df_te['Seebeck coefficient']) < 500].rename(
        columns={'Thermoelectric figure of merit (zT)': 'zT'})

    print(df_te.head())
    pf = PlotlyFig(df_te,
                   x_scale='log',
                   fontfamily='Times New Roman',
                   hovercolor='white',
                   x_title='Electrical Resistivity (cm/S)',
                   y_title='Seebeck Coefficient (uV/K)',
                   colorbar_title='Thermal Conductivity (W/m.K)',
                   filename='thermoelectrics.html')
    pf.xy(('Electrical resistivity', 'Seebeck coefficient'),
          labels=['chemicalFormula', 'Preparation method', 'Crystallinity'],
          sizes='zT',
          colors='Thermal conductivity',
          color_range=[0, 5])
Exemple #6
0
class CitrineDataRetrievalTest(unittest.TestCase):
    def setUp(self):
        self.cdr = CitrineDataRetrieval(citrine_key)

    def test_get_data(self):
        df = self.cdr.get_dataframe(formula="W",
                                    data_type='EXPERIMENTAL',
                                    max_results=10)
Exemple #7
0
def plot_thermoelectrics(citrine_api_key, limit=0):
    """
    Scatter plot of the properties of thermoelectric materials based on the data
        available in http://www.mrl.ucsb.edu:8080/datamine/thermoelectric.jsp
        The data is extracted via Citrine data retrieval tools. The dataset
        id on Citrine is 150557
    Args:
        citrine_api_key (str): Your Citrine API key for getting data. Don't have
            a Citrine account? Visit https://citrine.io/
        limit (int): limit the number of entries (0 means no limit)
    Returns:
        plotly plot in "offline" mode popped in the default browser.
    """
    cdr = CitrineDataRetrieval(api_key=citrine_api_key)
    cols = ['Electrical resistivity', 'Seebeck coefficient',
            'Thermal conductivity', 'Thermoelectric figure of merit (zT)']
    df_te = cdr.get_dataframe(criteria={'data_type': 'experimental',
                                        'data_set_id': 150557,
                                        'max_results': limit},
                              properties=['Seebeck coefficient'],
                              secondary_fields=True,
                              )
    df_te[cols] = df_te[cols].astype(float)
    df_te = df_te[(df_te['Electrical resistivity'] > 5e-4) & \
                  (df_te['Electrical resistivity'] < 0.1)]
    df_te = df_te[abs(df_te['Seebeck coefficient']) < 500].rename(
                columns={'Thermoelectric figure of merit (zT)': 'zT'})

    print(df_te.head())
    pf = PlotlyFig(df_te,
                   x_scale='log',
                   fontfamily='Times New Roman',
                   hovercolor='white',
                   x_title='Electrical Resistivity (cm/S)',
                   y_title='Seebeck Coefficient (uV/K)',
                   colorbar_title='Thermal Conductivity (W/m.K)',
                   filename='thermoelectrics.html')
    pf.xy(('Electrical resistivity', 'Seebeck coefficient'),
          labels=['chemicalFormula', 'Preparation method', 'Crystallinity'],
          sizes='zT',
          colors='Thermal conductivity',
          color_range=[0, 5])
Exemple #8
0
class CitrineDataRetrievalTest(unittest.TestCase):
    def setUp(self):
        self.cdr = CitrineDataRetrieval(citrine_key)

    def test_get_data(self):
        pifs_lst = self.cdr.get_api_data(formula="W",
                                         data_type='EXPERIMENTAL',
                                         max_results=10)
        df = self.cdr.get_dataframe(pifs_lst)
        assert df.shape[0] == 10

    def test_mutiple_items_in_list(self):
        pifs_lst = self.cdr.get_api_data(data_set_id=114192, max_results=102)
        df = self.cdr.get_dataframe(pifs_lst)
        assert df.shape[0] == 102
        for col in [
                "Thermal conductivity_5-conditions", "Condition_1",
                "Thermal conductivity_10"
        ]:
            assert col in df.columns
class CitrineDataRetrievalTest(unittest.TestCase):
    def setUp(self):
        self.cdr = CitrineDataRetrieval(citrine_key)

    def test_get_data(self):
        pifs_lst = self.cdr.get_data(formula="W", data_type='EXPERIMENTAL',
                                     max_results=10)
        self.assertEqual(len(pifs_lst), 10)
        df = self.cdr.get_dataframe(criteria={'formula':'W',
                                              'data_type':'EXPERIMENTAL',
                                              'max_results':10},
                                    print_properties_options=False)
        self.assertEqual(df.shape[0], 10)

    def test_multiple_items_in_list(self):
        df = self.cdr.get_dataframe(criteria={'data_set_id': 114192,
                                              'max_results':102},
                                    print_properties_options=False)
        self.assertEqual(df.shape[0], 102)
        test_cols = {"Thermal conductivity_5-conditions", "Condition_1",
                     "Thermal conductivity_10"}
        self.assertTrue(test_cols < set(df.columns))
Exemple #10
0
#这是一个matminer的测试性项目,用于比较不同数据库数据

from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
from matminer.data_retrieval.retrieve_Citrine import CitrineDataRetrieval
import pandas as pd
import numpy as np

#首先设置pandas的显示设置,保证都能显示出来
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

c = CitrineDataRetrieval(api_key='QgqmY9PPathNDgu6gJjnTQtt')
df = c.get_dataframe(criteria={
    'data_type': 'EXPERIMENTAL',
    'max_results': 100
},
                     properties=['Band gap', 'Temperature'],
                     common_fields=['chemicalFormula'])
df.to_csv('duibi.csv')
df.rename(columns={'Band gap': 'Experimnetal band gap'}, inplace=True)
df.head()

#然后针对每种组成,从mp数据库中计算的带隙,找到对应的最稳定结构的值
from pymatgen import MPRester, Composition
mpr = MPRester()


def get_mp_bandgap(formula):
    #这个函数的作用是给定一定的化学组成,返回稳定状态的带隙
    #而mo数据库需要用到interger的化学式
Exemple #11
0
from matminer.featurizers.composition import ElementProperty

# In[2]:

# Retrieve NIST SCD dataset from Citrine using matminer.
# The data will be stored in the df DataFrame.

first_retrieve = False  #change it to indicate first time retrieve dataset or not

from matminer.data_retrieval.retrieve_Citrine import CitrineDataRetrieval
from os import environ

if first_retrieve:

    api_key = environ['CITRINATION_API_KEY']  # insert your api key here
    c = CitrineDataRetrieval(api_key=api_key)
    df = c.get_dataframe(criteria={'data_set_id': '151803'})

    # Save downloaded dataset
    df.to_csv('NIST_CeramicDataSet.csv')
    df.to_pickle('NIST_CeramicDataSet.pkl')
else:
    df = pd.read_pickle('NIST_CeramicDataSet.pkl')

# In[3]:

# Get the number of samples and number of features of the dataset
df.shape

# In[4]:
Exemple #12
0
            df_sym_col[cmp['symbol']] = cmp['df_name']

    # Solve each row of the dataframe
    for idx, row in df.iterrows():

        eqns_tosolve = eqns[:]

        # add equation of symbol and its values from provided df
        for col in df_sym_col:
            eqns_tosolve.append(sp.Eq(col, row[df_sym_col[col]]))

        soln = sp.solve(eqns_tosolve)

        if soln:
            print idx, eqns_tosolve, soln
            df.loc[idx, "Calculated Poisson's ratio"] = round(
                soln[0][sp.S('nu')], 2)

    return df


if __name__ == '__main__':
    pd.set_option('display.width', 1000)
    # df1 = pd.read_pickle('39135_BMG.pkl')
    df = CitrineDataRetrieval().get_dataframe(data_set_id=150628,
                                              max_results=50)
    df = df.groupby(['chemicalFormula'], as_index=False).sum()
    print df
    new_df = decorate_dataframe(df)
    print new_df
Exemple #13
0
 def setUp(self):
     self.cdr = CitrineDataRetrieval(citrine_key)
 def setUp(self):
     self.cdr = CitrineDataRetrieval(citrine_key)
Exemple #15
0
        if cmp['catalog_name'] in mech_props:
            eqns.append(sp.Eq(mech_props[cmp['catalog_name']]().equation()))
            df_sym_col[cmp['symbol']] = cmp['df_name']

    # Solve each row of the dataframe
    for idx, row in df.iterrows():

        eqns_tosolve = eqns[:]

        # add equation of symbol and its values from provided df
        for col in df_sym_col:
            eqns_tosolve.append(sp.Eq(col, row[df_sym_col[col]]))

        soln = sp.solve(eqns_tosolve)

        if soln:
            print(idx, eqns_tosolve, soln)
            df.loc[idx, "Calculated Poisson's ratio"] = round(soln[0][sp.S('nu')], 2)

    return df


if __name__ == '__main__':
    pd.set_option('display.width', 1000)
    # df1 = pd.read_pickle('39135_BMG.pkl')
    df = CitrineDataRetrieval().get_dataframe(data_set_id=150628, max_results=50)
    df = df.groupby(['chemicalFormula'], as_index=False).sum()
    print(df)
    new_df = decorate_dataframe(df)
    print(new_df)
Exemple #16
0
def plot_expt_compt_band_gaps(citrine_api_key, limit=0):
    """
    Pulls experimental band gaps from Citrine (w/o dataset limitations) and
        evaluate the DFT computed band gaps (data from materialsproject.org)
        in xy scatter plot. To compare the right values, we pick the computed
        band gaps calculated for a chemical formula that has the lowest energy
        above hull (the most stable structure).
    Args:
        citrine_api_key (str): Your Citrine API key for getting data. Don't have
            a Citrine account? Visit https://citrine.io/
        limit (int): limit the number of entries (0 means no limit)
    Returns:
        plotly plots in "offline" mode popped in the default browser.
    """

    # pull experimental band gaps from Citrine
    cdr = CitrineDataRetrieval(api_key=citrine_api_key)
    cols = ['chemicalFormula', 'Band gap']
    df_ct = cdr.get_dataframe(criteria={'data_type':'experimental',
                                        'max_results':limit},
                              secondary_fields=True,
                              properties=['Band gap'])
    df_ct = df_ct[cols].rename(columns={'chemicalFormula': 'Formula',
                                        'Band gap': 'Expt. gap'})
    df_ct = df_ct[df_ct['Formula'] != 'In1p1'] # p1 not recognized in Composition
    df_ct = df_ct.dropna() # null band gaps cause problem when plotting residuals
    df_ct['Formula'] = df_ct['Formula'].transform(
        lambda x: Composition(x).get_reduced_formula_and_factor()[0])

    # pull computational band gaps from the Materials Project
    df = MPDataRetrieval().get_dataframe(
        criteria={'pretty_formula': {'$in': list(df_ct['Formula'].values)}},
        properties=['pretty_formula', 'material_id', 'band_gap', 'e_above_hull'],
        index_mpid=False).rename(
        columns={'pretty_formula': 'Formula', 'band_gap': 'MP computed gap',
                 'material_id': 'mpid'})


    # pick the most stable structure
    df_mp = df.loc[df.groupby("Formula")["e_above_hull"].idxmin()]
    df_final = df_ct.merge(df_mp, on='Formula').drop(
                                    'e_above_hull', axis=1).set_index('mpid')
    pf = PlotlyFig(df_final, x_title='Experimental band gap (eV)',
                   y_title='Computed Band Gap (eV)',
                   filename='band_gaps')

    # computed vs. experimental band gap:
    pf.xy([
        ('Expt. gap', 'MP computed gap'),
        ([0, 12], [0, 12])
    ],
        lines=[{}, {'color': 'black', 'dash': 'dash'}],
        labels=['Formula', df_final.index],
        modes=['markers', 'lines'],
        names=['Computed vs. expt.', 'Expt. gap'])

    # residual:
    residuals = df_final['MP computed gap']-df_final['Expt. gap'].astype(float)
    pf.set_arguments(x_title='Experimental band gap (eV)',
                    y_title='Residual (Computed - Expt.) Band Gap (eV)',
                    filename='band_gap_residuals')
    pf.xy(('Expt. gap', residuals),
          labels = ['Formula', df_final.index])
Exemple #17
0
        }
    },  # to limit the number of hits for the sake of time
    properties=[
        "elasticity.K_VRH", "elasticity.G_VRH", "pretty_formula",
        "e_above_hull", "bandstructure", "dos"
    ])
print("Pb,Te(K_VRH,G_VRH,pretty_formula,e_above_hull,bandstructure,dos):")
print(df.head())

mpid = 'mp-20740'
idx = df.index[df.index == mpid][0]
plt = BSDOSPlotter().get_plot(bs=df.loc[idx, 'bandstructure'],
                              dos=df.loc[idx, 'dos'])
plt.savefig('mp-20740.png')

cdr = CitrineDataRetrieval()

df_OH = cdr.get_dataframe(criteria={},
                          properties=['adsorption energy of OH'],
                          secondary_fields=True)
df_O = cdr.get_dataframe(criteria={},
                         properties=['adsorption energy of O'],
                         secondary_fields=True)
print('adsorption energy of OH\n')
print(df_OH.head())
print('adsorption energy of O\n')
print(df_O.head())

mdf_dr = MDFDataRetrieval(anonymous=True)

df = mdf_dr.get_dataframe(criteria={