class CitrineDataRetrievalTest(unittest.TestCase): def setUp(self): self.cdr = CitrineDataRetrieval(citrine_key) def test_get_data(self): pifs_lst = self.cdr.get_api_data(formula="W", data_type='EXPERIMENTAL', max_results=10) self.assertEqual(len(pifs_lst), 10) df = self.cdr.get_dataframe(criteria={ 'formula': 'W', 'data_type': 'EXPERIMENTAL', 'max_results': 10 }, print_properties_options=False) self.assertEqual(df.shape[0], 10) def test_multiple_items_in_list(self): df = self.cdr.get_dataframe(criteria={ 'data_set_id': 114192, 'max_results': 102 }, print_properties_options=False) self.assertEqual(df.shape[0], 102) test_cols = { "Thermal conductivity_5-conditions", "Condition_1", "Thermal conductivity_10" } self.assertTrue(test_cols < set(df.columns))
def plot_expt_compt_band_gaps(citrine_api_key, limit=0): """ Pulls experimental band gaps from Citrine (w/o dataset limitations) and evaluate the DFT computed band gaps (data from materialsproject.org) in xy scatter plot. To compare the right values, we pick the computed band gaps calculated for a chemical formula that has the lowest energy above hull (the most stable structure). Args: citrine_api_key (str): Your Citrine API key for getting data. Don't have a Citrine account? Visit https://citrine.io/ limit (int): limit the number of entries (0 means no limit) Returns: plotly plots in "offline" mode poped in the default browser. """ # pull experimental band gaps from Citrine cdr = CitrineDataRetrieval(api_key=citrine_api_key) cols = ['chemicalFormula', 'Band gap'] df_ct = cdr.get_dataframe(prop='band gap', data_type='experimental', show_columns=cols, max_results=limit).rename( columns={'chemicalFormula': 'Formula', 'Band gap': 'Expt. gap'}) df_ct = df_ct[df_ct['Formula'] != 'In1p1'] # p1 not recognized in Composition df_ct = df_ct.dropna() # null band gaps cause problem when plotting residuals df_ct['Formula'] = df_ct['Formula'].transform( lambda x: Composition(x).get_reduced_formula_and_factor()[0]) # pull computational band gaps from the Materials Project df = MPDataRetrieval().get_dataframe( criteria={'pretty_formula': {'$in': list(df_ct['Formula'].values)}}, properties=['pretty_formula', 'material_id', 'band_gap', 'e_above_hull'], index_mpid=False).rename( columns={'pretty_formula': 'Formula', 'band_gap': 'MP computed gap', 'material_id': 'mpid'}) # pick the most stable structure df_mp = df.loc[df.groupby("Formula")["e_above_hull"].idxmin()] df_final = df_ct.merge(df_mp, on='Formula').drop( 'e_above_hull', axis=1).set_index('mpid') pf = PlotlyFig(df_final, x_title='Experimental band gap (eV)', y_title='Computed Band Gap (eV)', filename='band_gaps') # computed vs. experimental band gap: pf.xy([ ('Expt. gap', 'MP computed gap'), ([0, 12], [0, 12]) ], lines=[{}, {'color': 'black', 'dash': 'dash'}], labels=df_final.index, modes=['markers', 'lines'], names=['Computed vs. expt.', 'Expt. gap']) # residual: residuals = df_final['MP computed gap']-df_final['Expt. gap'].astype(float) pf.set_arguments(x_title='Experimental band gap (eV)', y_title='Residual (Computed - Expt.) Band Gap (eV)', filename='band_gap_residuals') pf.xy(('Expt. gap', residuals), labels = df_final.index)
class CitrineDataRetrievalTest(unittest.TestCase): def setUp(self): self.cdr = CitrineDataRetrieval(citrine_key) def test_get_data(self): df = self.cdr.get_dataframe(formula="W", data_type='EXPERIMENTAL', max_results=10)
def _apply_query(self, sorted: Optional[bool]) -> pd.DataFrame: cdr = CitrineDataRetrieval(api_key=self.API_KEY) criteria = {"data_type": "EXPERIMENTAL"} properties = ['Band gap'] common_fields = [ "uid", "chemicalFormula", "references", "Crystallinity", "Structure", "Crystal structure", "uid" ] df = cdr.get_dataframe(criteria=criteria, properties=properties, common_fields=common_fields) LOG.info("Writing to raw data...") df.to_pickle(self.raw_data_path) return df
def plot_thermoelectrics(citrine_api_key, limit=0): """ Scatter plot of the properties of thermoelectric materials based on the data available in http://www.mrl.ucsb.edu:8080/datamine/thermoelectric.jsp The data is extracted via Citrine data retrieval tools. The dataset id on Citrine is 150557 Args: citrine_api_key (str): Your Citrine API key for getting data. Don't have a Citrine account? Visit https://citrine.io/ limit (int): limit the number of entries (0 means no limit) Returns: plotly plot in "offline" mode popped in the default browser. """ cdr = CitrineDataRetrieval(api_key=citrine_api_key) cols = [ 'Electrical resistivity', 'Seebeck coefficient', 'Thermal conductivity', 'Thermoelectric figure of merit (zT)' ] df_te = cdr.get_dataframe( criteria={ 'data_type': 'experimental', 'data_set_id': 150557, 'max_results': limit }, properties=['Seebeck coefficient'], secondary_fields=True, ) df_te[cols] = df_te[cols].astype(float) df_te = df_te[(df_te['Electrical resistivity'] > 5e-4) & \ (df_te['Electrical resistivity'] < 0.1)] df_te = df_te[abs(df_te['Seebeck coefficient']) < 500].rename( columns={'Thermoelectric figure of merit (zT)': 'zT'}) print(df_te.head()) pf = PlotlyFig(df_te, x_scale='log', fontfamily='Times New Roman', hovercolor='white', x_title='Electrical Resistivity (cm/S)', y_title='Seebeck Coefficient (uV/K)', colorbar_title='Thermal Conductivity (W/m.K)', filename='thermoelectrics.html') pf.xy(('Electrical resistivity', 'Seebeck coefficient'), labels=['chemicalFormula', 'Preparation method', 'Crystallinity'], sizes='zT', colors='Thermal conductivity', color_range=[0, 5])
def plot_thermoelectrics(citrine_api_key, limit=0): """ Scatter plot of the properties of thermoelectric materials based on the data available in http://www.mrl.ucsb.edu:8080/datamine/thermoelectric.jsp The data is extracted via Citrine data retrieval tools. The dataset id on Citrine is 150557 Args: citrine_api_key (str): Your Citrine API key for getting data. Don't have a Citrine account? Visit https://citrine.io/ limit (int): limit the number of entries (0 means no limit) Returns: plotly plot in "offline" mode popped in the default browser. """ cdr = CitrineDataRetrieval(api_key=citrine_api_key) cols = ['Electrical resistivity', 'Seebeck coefficient', 'Thermal conductivity', 'Thermoelectric figure of merit (zT)'] df_te = cdr.get_dataframe(criteria={'data_type': 'experimental', 'data_set_id': 150557, 'max_results': limit}, properties=['Seebeck coefficient'], secondary_fields=True, ) df_te[cols] = df_te[cols].astype(float) df_te = df_te[(df_te['Electrical resistivity'] > 5e-4) & \ (df_te['Electrical resistivity'] < 0.1)] df_te = df_te[abs(df_te['Seebeck coefficient']) < 500].rename( columns={'Thermoelectric figure of merit (zT)': 'zT'}) print(df_te.head()) pf = PlotlyFig(df_te, x_scale='log', fontfamily='Times New Roman', hovercolor='white', x_title='Electrical Resistivity (cm/S)', y_title='Seebeck Coefficient (uV/K)', colorbar_title='Thermal Conductivity (W/m.K)', filename='thermoelectrics.html') pf.xy(('Electrical resistivity', 'Seebeck coefficient'), labels=['chemicalFormula', 'Preparation method', 'Crystallinity'], sizes='zT', colors='Thermal conductivity', color_range=[0, 5])
class CitrineDataRetrievalTest(unittest.TestCase): def setUp(self): self.cdr = CitrineDataRetrieval(citrine_key) def test_get_data(self): pifs_lst = self.cdr.get_api_data(formula="W", data_type='EXPERIMENTAL', max_results=10) df = self.cdr.get_dataframe(pifs_lst) assert df.shape[0] == 10 def test_mutiple_items_in_list(self): pifs_lst = self.cdr.get_api_data(data_set_id=114192, max_results=102) df = self.cdr.get_dataframe(pifs_lst) assert df.shape[0] == 102 for col in [ "Thermal conductivity_5-conditions", "Condition_1", "Thermal conductivity_10" ]: assert col in df.columns
class CitrineDataRetrievalTest(unittest.TestCase): def setUp(self): self.cdr = CitrineDataRetrieval(citrine_key) def test_get_data(self): pifs_lst = self.cdr.get_data(formula="W", data_type='EXPERIMENTAL', max_results=10) self.assertEqual(len(pifs_lst), 10) df = self.cdr.get_dataframe(criteria={'formula':'W', 'data_type':'EXPERIMENTAL', 'max_results':10}, print_properties_options=False) self.assertEqual(df.shape[0], 10) def test_multiple_items_in_list(self): df = self.cdr.get_dataframe(criteria={'data_set_id': 114192, 'max_results':102}, print_properties_options=False) self.assertEqual(df.shape[0], 102) test_cols = {"Thermal conductivity_5-conditions", "Condition_1", "Thermal conductivity_10"} self.assertTrue(test_cols < set(df.columns))
#这是一个matminer的测试性项目,用于比较不同数据库数据 from matminer.data_retrieval.retrieve_MP import MPDataRetrieval from matminer.data_retrieval.retrieve_Citrine import CitrineDataRetrieval import pandas as pd import numpy as np #首先设置pandas的显示设置,保证都能显示出来 pd.set_option('display.width', 1000) pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) c = CitrineDataRetrieval(api_key='QgqmY9PPathNDgu6gJjnTQtt') df = c.get_dataframe(criteria={ 'data_type': 'EXPERIMENTAL', 'max_results': 100 }, properties=['Band gap', 'Temperature'], common_fields=['chemicalFormula']) df.to_csv('duibi.csv') df.rename(columns={'Band gap': 'Experimnetal band gap'}, inplace=True) df.head() #然后针对每种组成,从mp数据库中计算的带隙,找到对应的最稳定结构的值 from pymatgen import MPRester, Composition mpr = MPRester() def get_mp_bandgap(formula): #这个函数的作用是给定一定的化学组成,返回稳定状态的带隙 #而mo数据库需要用到interger的化学式
from matminer.featurizers.composition import ElementProperty # In[2]: # Retrieve NIST SCD dataset from Citrine using matminer. # The data will be stored in the df DataFrame. first_retrieve = False #change it to indicate first time retrieve dataset or not from matminer.data_retrieval.retrieve_Citrine import CitrineDataRetrieval from os import environ if first_retrieve: api_key = environ['CITRINATION_API_KEY'] # insert your api key here c = CitrineDataRetrieval(api_key=api_key) df = c.get_dataframe(criteria={'data_set_id': '151803'}) # Save downloaded dataset df.to_csv('NIST_CeramicDataSet.csv') df.to_pickle('NIST_CeramicDataSet.pkl') else: df = pd.read_pickle('NIST_CeramicDataSet.pkl') # In[3]: # Get the number of samples and number of features of the dataset df.shape # In[4]:
df_sym_col[cmp['symbol']] = cmp['df_name'] # Solve each row of the dataframe for idx, row in df.iterrows(): eqns_tosolve = eqns[:] # add equation of symbol and its values from provided df for col in df_sym_col: eqns_tosolve.append(sp.Eq(col, row[df_sym_col[col]])) soln = sp.solve(eqns_tosolve) if soln: print idx, eqns_tosolve, soln df.loc[idx, "Calculated Poisson's ratio"] = round( soln[0][sp.S('nu')], 2) return df if __name__ == '__main__': pd.set_option('display.width', 1000) # df1 = pd.read_pickle('39135_BMG.pkl') df = CitrineDataRetrieval().get_dataframe(data_set_id=150628, max_results=50) df = df.groupby(['chemicalFormula'], as_index=False).sum() print df new_df = decorate_dataframe(df) print new_df
def setUp(self): self.cdr = CitrineDataRetrieval(citrine_key)
if cmp['catalog_name'] in mech_props: eqns.append(sp.Eq(mech_props[cmp['catalog_name']]().equation())) df_sym_col[cmp['symbol']] = cmp['df_name'] # Solve each row of the dataframe for idx, row in df.iterrows(): eqns_tosolve = eqns[:] # add equation of symbol and its values from provided df for col in df_sym_col: eqns_tosolve.append(sp.Eq(col, row[df_sym_col[col]])) soln = sp.solve(eqns_tosolve) if soln: print(idx, eqns_tosolve, soln) df.loc[idx, "Calculated Poisson's ratio"] = round(soln[0][sp.S('nu')], 2) return df if __name__ == '__main__': pd.set_option('display.width', 1000) # df1 = pd.read_pickle('39135_BMG.pkl') df = CitrineDataRetrieval().get_dataframe(data_set_id=150628, max_results=50) df = df.groupby(['chemicalFormula'], as_index=False).sum() print(df) new_df = decorate_dataframe(df) print(new_df)
def plot_expt_compt_band_gaps(citrine_api_key, limit=0): """ Pulls experimental band gaps from Citrine (w/o dataset limitations) and evaluate the DFT computed band gaps (data from materialsproject.org) in xy scatter plot. To compare the right values, we pick the computed band gaps calculated for a chemical formula that has the lowest energy above hull (the most stable structure). Args: citrine_api_key (str): Your Citrine API key for getting data. Don't have a Citrine account? Visit https://citrine.io/ limit (int): limit the number of entries (0 means no limit) Returns: plotly plots in "offline" mode popped in the default browser. """ # pull experimental band gaps from Citrine cdr = CitrineDataRetrieval(api_key=citrine_api_key) cols = ['chemicalFormula', 'Band gap'] df_ct = cdr.get_dataframe(criteria={'data_type':'experimental', 'max_results':limit}, secondary_fields=True, properties=['Band gap']) df_ct = df_ct[cols].rename(columns={'chemicalFormula': 'Formula', 'Band gap': 'Expt. gap'}) df_ct = df_ct[df_ct['Formula'] != 'In1p1'] # p1 not recognized in Composition df_ct = df_ct.dropna() # null band gaps cause problem when plotting residuals df_ct['Formula'] = df_ct['Formula'].transform( lambda x: Composition(x).get_reduced_formula_and_factor()[0]) # pull computational band gaps from the Materials Project df = MPDataRetrieval().get_dataframe( criteria={'pretty_formula': {'$in': list(df_ct['Formula'].values)}}, properties=['pretty_formula', 'material_id', 'band_gap', 'e_above_hull'], index_mpid=False).rename( columns={'pretty_formula': 'Formula', 'band_gap': 'MP computed gap', 'material_id': 'mpid'}) # pick the most stable structure df_mp = df.loc[df.groupby("Formula")["e_above_hull"].idxmin()] df_final = df_ct.merge(df_mp, on='Formula').drop( 'e_above_hull', axis=1).set_index('mpid') pf = PlotlyFig(df_final, x_title='Experimental band gap (eV)', y_title='Computed Band Gap (eV)', filename='band_gaps') # computed vs. experimental band gap: pf.xy([ ('Expt. gap', 'MP computed gap'), ([0, 12], [0, 12]) ], lines=[{}, {'color': 'black', 'dash': 'dash'}], labels=['Formula', df_final.index], modes=['markers', 'lines'], names=['Computed vs. expt.', 'Expt. gap']) # residual: residuals = df_final['MP computed gap']-df_final['Expt. gap'].astype(float) pf.set_arguments(x_title='Experimental band gap (eV)', y_title='Residual (Computed - Expt.) Band Gap (eV)', filename='band_gap_residuals') pf.xy(('Expt. gap', residuals), labels = ['Formula', df_final.index])
} }, # to limit the number of hits for the sake of time properties=[ "elasticity.K_VRH", "elasticity.G_VRH", "pretty_formula", "e_above_hull", "bandstructure", "dos" ]) print("Pb,Te(K_VRH,G_VRH,pretty_formula,e_above_hull,bandstructure,dos):") print(df.head()) mpid = 'mp-20740' idx = df.index[df.index == mpid][0] plt = BSDOSPlotter().get_plot(bs=df.loc[idx, 'bandstructure'], dos=df.loc[idx, 'dos']) plt.savefig('mp-20740.png') cdr = CitrineDataRetrieval() df_OH = cdr.get_dataframe(criteria={}, properties=['adsorption energy of OH'], secondary_fields=True) df_O = cdr.get_dataframe(criteria={}, properties=['adsorption energy of O'], secondary_fields=True) print('adsorption energy of OH\n') print(df_OH.head()) print('adsorption energy of O\n') print(df_O.head()) mdf_dr = MDFDataRetrieval(anonymous=True) df = mdf_dr.get_dataframe(criteria={