def get_features(self, formula): ''' Parameters ---------- formula: string put a valid chemical fomula as a string. Example( 'NaCl') Return ---------- features: np.array() This is an 1xN length array containing feature values for use in the machine learning model. ''' try: fractional_composition = mg.Composition(formula).fractional_composition.as_dict() element_composition = mg.Composition(formula).element_composition.as_dict() avg_feature = np.zeros(len(self.element_df.iloc[0])) sum_feature = np.zeros(len(self.element_df.iloc[0])) for key in fractional_composition: try: avg_feature += self.element_df.loc[key].values * fractional_composition[key] diff_feature = self.element_df.loc[list(fractional_composition.keys())].max()-self.element_df.loc[list(fractional_composition.keys())].min() except: print('The element:', key, 'from formula', formula,'is not currently supported in our database') return np.array([np.nan]*len(self.element_df.iloc[0])*4) max_feature = self.element_df.loc[list(fractional_composition.keys())].max() min_feature = self.element_df.loc[list(fractional_composition.keys())].min() features = pd.DataFrame(np.concatenate([avg_feature, diff_feature, np.array(max_feature), np.array(min_feature)])) features = np.concatenate([avg_feature, diff_feature, np.array(max_feature), np.array(min_feature)]) return features.transpose() except: print('There was an error with the Formula: '+ formula + ', this is a general exception with an unkown error') return [np.nan]*len(self.element_df.iloc[0])*4
def get_ABE(self, formula, A_site, B_site, verbose=False): """ Estimate average metal-oxygen bond energy for complex perovskite oxide from simple oxide thermo data Formula from Sammells et al. (1992), Solid State Ionics 52, 111-123. Parameters: ----------- formula: oxide formula A_site: list of A-site elements B_site: list of B-site elements verbose: if True, print info about which simple oxides used in calculation """ #validated on compounds in Sammells 1992 - all but CaTi0.7Al0.3O3 agree #validated on (La,Sr)(Cr,Co,Fe)O3 compounds in https://pubs.acs.org/doi/suppl/10.1021/acs.jpcc.6b10571/suppl_file/jp6b10571_si_001.pdf #works if Co3O4 specified in oxide_dict comp = mg.Composition(formula) cd = comp.get_el_amt_dict() metals = [x for x in cd.keys() if x != 'O'] abe = 0 if verbose == True: print('Oxides used for ABE calculation:') for metal in metals: amt = cd[metal] met_mg = mg.Element(metal) try: #oxide_dict specifies which oxide to use oxide = self.oxide_dict[metal] oxide_mg = mg.Composition(oxide) m = oxide_mg.get(metal) n = oxide_mg.get('O') obe = self.oxide_obe(oxide) except KeyError: #if no oxide indicated in oxide_dict "placeholder - for now, take the lowest common oxidation state with a corresponding stable oxide" i = 0 while i != -1: ox = met_mg.common_oxidation_states[i] oxide, m, n = self.oxide_formula(metal, ox, return_mn=True) try: obe = self.oxide_obe(oxide) #print(obe) i = -1 except LookupError as err: i += 1 #try the next oxidation state if verbose == True: print(oxide) #print('m: {}, n: {}'.format(m,n)) if metal in A_site: abe += amt * obe / (12 * m) elif metal in B_site: abe += amt * obe / (6 * m) else: raise KeyError( '{} is not assigned to A or B site'.format(metal)) #print(abe) return abe
def get_features(self, formula): try: fractional_composition = mg.Composition( formula).fractional_composition.as_dict() #显示化学式归一成分 element_composition = mg.Composition( formula).element_composition.as_dict() #显示化学式成分 avg_feature = np.zeros(len(self.element_df.iloc[0])) std_feature = np.zeros(len(self.element_df.iloc[0])) for key in fractional_composition: try: avg_feature += self.element_df.loc[ key].values * fractional_composition[key] #element_df.loc[key].values为元素表中相应属性值, #fractional_composition[key]为化学成分中元素所对应的成分 #element_df.loc[key].values * fractional_composition[key]=原子属性值在化学式式中所占有的比例值 diff_feature = self.element_df.loc[list( fractional_composition.keys())].max( ) - self.element_df.loc[list( fractional_composition.keys())].min() #找出化学式中每种原子的每种属性的最大值和最小值,然后相减 except Exception as e: print('The element:', key, 'from formula', formula, 'is not currently supported in our database') return np.array([np.nan] * len(self.element_df.iloc[0]) * 5) max_feature = self.element_df.loc[list( fractional_composition.keys())].max() min_feature = self.element_df.loc[list( fractional_composition.keys())].min() std_feature = self.element_df.loc[list( fractional_composition.keys())].std(ddof=0) # 把相关的信息拼接成 features = pd.DataFrame( np.concatenate([ avg_feature, diff_feature, np.array(max_feature), np.array(min_feature), np.array(std_feature) ])) features = np.concatenate([ avg_feature, diff_feature, np.array(max_feature), np.array(min_feature), np.array(std_feature) ]) return features.transpose() except: print('There was an error with the Formula: ' + formula + ', this is a general exception with an unkown error') return [np.nan] * len(self.element_df.iloc[0]) * 5
def run(mpfile, **kwargs): import pymatgen import pandas as pd from mpcontribs.users.swf.rest.rester import SwfRester # load data from google sheet google_sheet = mpfile.document[mp_level01_titles[0]].pop("google_sheet") google_sheet += "/export?format=xlsx" df_dct = pd.read_excel(google_sheet, sheet_name=None) # rename sheet columns elements = ["Fe", "V", "Co"] df_dct["IP Energy Product"].columns = ["IP_Energy_product"] + elements df_dct["total"].columns = elements df_dct["MOKE"].columns = elements + ["thickness", "MOKE_IP_Hc"] df_dct["VSM"].columns = elements + ["thickness", "VSM_IP_Hc"] df_dct["formula"].columns = elements df_dct["Kondorsky"].columns = ["angle", "Kondorsky_Model", "Experiment"] # round all compositions to 100% for sheet, df in df_dct.items(): if sheet != "Kondorsky": for idx, row in df.iterrows(): df.loc[idx:idx, elements] = round_to_100_percent(row[elements]) row5 = df_dct["formula"].iloc[0] formula5 = get_composition_from_string( pymatgen.Composition(10 * row5).formula.replace(" ", "")) dct = dict((k, clean_value(v, "%")) for k, v in row5.to_dict().items()) mpfile.add_hierarchical_data({"data": dct}, identifier=formula5) mpfile.add_data_table(formula5, df_dct["Kondorsky"], name="Angular Dependence of Switching Field") for sheet, df in df_dct.items(): if sheet == "formula" or sheet == "Kondorsky" or sheet == "total": continue for idx, row in df.iterrows(): composition = pymatgen.Composition(row[elements] * 10) formula = get_composition_from_string( composition.formula.replace(" ", "")) dct = dict((k, clean_value(v, "%")) for k, v in row[elements].to_dict().items()) mpfile.add_hierarchical_data({"data": dct}, identifier=formula) columns = [x for x in row.index if x not in elements] if columns: data = row[columns].round(decimals=1) dct = dict( (k, clean_value(v)) for k, v in data.to_dict().items()) mpfile.add_hierarchical_data({"data": dct}, identifier=formula)
def run(mpfile, **kwargs): import pymatgen import pandas as pd from mpcontribs.users.swf.rest.rester import SwfRester # load data from google sheet google_sheet = mpfile.document[mp_level01_titles[0]].pop('google_sheet') google_sheet += '/export?format=xlsx' df_dct = pd.read_excel(google_sheet, sheet_name=None) # rename sheet columns elements = ['Fe', 'V', 'Co'] df_dct['IP Energy Product'].columns = ['IP_Energy_product'] + elements df_dct['total'].columns = elements df_dct['MOKE'].columns = elements + ['thickness', 'MOKE_IP_Hc'] df_dct['VSM'].columns = elements + ['thickness', 'VSM_IP_Hc'] df_dct['formula'].columns = elements df_dct['Kondorsky'].columns = ['angle', 'Kondorsky_Model', 'Experiment'] # round all compositions to 100% for sheet, df in df_dct.items(): if sheet != 'Kondorsky': for idx, row in df.iterrows(): df.loc[idx:idx, elements] = round_to_100_percent(row[elements]) row5 = df_dct['formula'].iloc[0] formula5 = get_composition_from_string( pymatgen.Composition(10 * row5).formula.replace(' ', '')) dct = dict((k, clean_value(v, '%')) for k, v in row5.to_dict().items()) mpfile.add_hierarchical_data({'data': dct}, identifier=formula5) mpfile.add_data_table(formula5, df_dct['Kondorsky'], name='Angular Dependence of Switching Field') for sheet, df in df_dct.items(): if sheet == 'formula' or sheet == 'Kondorsky' or sheet == 'total': continue for idx, row in df.iterrows(): composition = pymatgen.Composition(row[elements] * 10) formula = get_composition_from_string( composition.formula.replace(' ', '')) dct = dict((k, clean_value(v, '%')) for k, v in row[elements].to_dict().items()) mpfile.add_hierarchical_data({'data': dct}, identifier=formula) columns = [x for x in row.index if x not in elements] if columns: data = row[columns].round(decimals=1) dct = dict( (k, clean_value(v)) for k, v in data.to_dict().items()) mpfile.add_hierarchical_data({'data': dct}, identifier=formula)
def get_class(self, formula): output = '' try: dc = pg.Composition(formula, strict=False).as_dict().keys() except Exception as ce: print("Exception when parsing " + str(formula) + ". Error: " + str(ce)) # Trying with some tricks c_with_replacements = re.sub(r'[+-][ZXYzxy]', '', formula) try: print("Trying to parse " + str(c_with_replacements)) dc = pg.Composition(c_with_replacements, strict=False).as_dict().keys() except Exception as ce: print("Exception when parsing " + str(c_with_replacements) + ". Error: " + str(ce)) # We give up... skipping this record return output input_formula = list(dc) # print(" Input Formula: " + str(input_formula)) for composition in self.composition_map: and_compounds = [] if 'and_compounds' in composition: and_compounds = composition['and_compounds'] or_compounds = [] if 'or_compounds' in composition: or_compounds = composition['or_compounds'] output_class = composition['name'] if len(and_compounds) > 0: if all(elem in input_formula for elem in and_compounds): output = output_class break elif len(or_compounds) > 0: if any(elem in input_formula for elem in or_compounds): output = output_class break if output == '': output = "Alloy" return output
def check_neutrality_4(formula): charge_neutral_count = 0 comp = mg.Composition(formula) reduce_formula = comp.get_el_amt_dict() list_of_elements = list(reduce_formula.keys()) max_num = max(reduce_formula.values()) for i, ele_a in enumerate(list_of_elements): for ox_a in smact.Element(ele_a).oxidation_states: for j, ele_b in enumerate(list_of_elements[i+1:]): for ox_b in smact.Element(ele_b).oxidation_states: for k, ele_c in enumerate(list_of_elements[i+j+2:]): for ox_c in smact.Element(ele_c).oxidation_states: for m, ele_d in enumerate(list_of_elements[i+j+k+3:]): for ox_d in smact.Element(ele_d).oxidation_states: # Checks if the combination is charge neutral before printing it out! # cn_e, cn_r = neutral_ratios([ox_a, ox_b, ox_c, ox_d], threshold = int(max_num)) if cn_e: for num in cn_r: if tuple(reduce_formula.values()) == num: return True # print(cn_e) # print(cn_r) # print(ox_a, ox_b, ox_c) # charge_neutral_count = charge_neutral_count + 1 # print('{0:3s} {1:3s} {2:3s}'.format(ele_a, ele_b, ele_c)) print('Number of combinations = {0}'.format(charge_neutral_count)) print("--- {0} seconds to run ---".format(time.time() - start_time)) return False
def ox_states_from_binary_formula(self,formula,anion=None,anion_ox_state=None): """ Determine oxidation states from binary formula. Could also use mg.Composition.oxi_state_guesses(), but the logic used is more complex. Args: formula: chemical formula anion: Element symbol of anion. If None, search for common anion anion_ox_state: oxidation state of anion. If None, assume common oxidation state """ comp = mg.Composition(formula) if len(comp.elements) != 2: raise ValueError('Formula must be binary') # determine anion if anion is None: anion = np.intersect1d([e.name for e in comp.elements],self.common_anions) if len(anion) > 1: raise ValueError('Found multiple possible anions in formula. Please specify anion') elif len(anion)==0: raise ValueError('No common anions found in formula. Please specify anion') else: anion = anion[0] metal = np.setdiff1d(comp.elements,mg.Element(anion))[0].name #get common oxidation state for anion if anion_ox_state is None: anion_ox_state = [ox for ox in mg.Element(anion).common_oxidation_states if ox < 0] if len(anion_ox_state) > 1: raise Exception(f"Multiple common oxidation states for {anion}. Please specify anion_ox_state") else: anion_ox_state = anion_ox_state[0] metal_ox_state = -comp.get(anion)*anion_ox_state/comp.get(metal) return {metal:metal_ox_state,anion:anion_ox_state}
def check_electronegativity_2(formula): pauling_count = 0 comp = mg.Composition(formula) reduce_formula = comp.get_el_amt_dict() list_of_elements = list(reduce_formula.keys()) max_num = max(reduce_formula.values()) for i, ele_a in enumerate(list_of_elements): paul_a = smact.Element(ele_a).pauling_eneg for ox_a in smact.Element(ele_a).oxidation_states: for j, ele_b in enumerate(list_of_elements[i + 1:]): paul_b = smact.Element(ele_b).pauling_eneg for ox_b in smact.Element(ele_b).oxidation_states: # Puts elements, oxidation states and electronegativites into lists for convenience # elements = [ele_a, ele_b] oxidation_states = [ox_a, ox_b] pauling_electro = [paul_a, paul_b] # Checks if the electronegativity makes sense and if the combination is charge neutral # electroneg_makes_sense = pauling_test( oxidation_states, pauling_electro, elements) cn_e, cn_r = neutral_ratios([ox_a, ox_b], threshold=int(max_num)) if cn_e: if electroneg_makes_sense: pauling_count = pauling_count + 1 for num in cn_r: if tuple(reduce_formula.values()) == num: # print('{0:2s}{1:3d} {2:2s}{3:3d} {4:2s}{5:3d}'.format(ele_a, ox_a, ele_b, # ox_b, ele_c, ox_c)) return True print('Number of combinations = {0}'.format(pauling_count)) print("--- {0} seconds to run ---".format(time.time() - start_time)) return False
def check_neutrality_2(formula): charge_neutral_count = 0 comp = mg.Composition(formula) reduce_formula = comp.get_el_amt_dict() list_of_elements = list(reduce_formula.keys()) # stoichs = np.array(list(reduce_formula.values())).astype(np.int32)[:,np.newaxis] max_num = max(reduce_formula.values()) for i, ele_a in enumerate(list_of_elements): for ox_a in smact.Element(ele_a).oxidation_states: for j, ele_b in enumerate(list_of_elements[i+1:]): for ox_b in smact.Element(ele_b).oxidation_states: # Checks if the combination is charge neutral before printing it out! # # neutral_ratios(oxidations, stoichs=False, threshold=5) #speed up by providing stoichs # cn_e, cn_r = neutral_ratios([ox_a, ox_b], threshold = int(max_num)) cn_e, cn_r = neutral_ratios([ox_a, ox_b], threshold = int(max_num)) if cn_e: for num in cn_r: if tuple(reduce_formula.values()) == num: return True # print(cn_e) # print(cn_r) # print(ox_a, ox_b, ox_c) # charge_neutral_count = charge_neutral_count + 1 # print('{0:3s} {1:3s} {2:3s}'.format(ele_a, ele_b, ele_c)) print('Number of combinations = {0}'.format(charge_neutral_count)) print("--- {0} seconds to run ---".format(time.time() - start_time)) return False
def get_features(self, formula): try: fractional_composition = mg.Composition( formula).fractional_composition.as_dict() element_composition = mg.Composition( formula).element_composition.as_dict() avg_feature = np.zeros(len(self.element_df.iloc[0])) std_feature = np.zeros(len(self.element_df.iloc[0])) for key in fractional_composition: try: avg_feature += self.element_df.loc[ key].values * fractional_composition[key] diff_feature = self.element_df.loc[list( fractional_composition.keys())].max( ) - self.element_df.loc[list( fractional_composition.keys())].min() except Exception as e: print('The element:', key, 'from formula', formula, 'is not currently supported in our database') return np.array([np.nan] * len(self.element_df.iloc[0]) * 5) max_feature = self.element_df.loc[list( fractional_composition.keys())].max() min_feature = self.element_df.loc[list( fractional_composition.keys())].min() std_feature = self.element_df.loc[list( fractional_composition.keys())].std(ddof=0) features = pd.DataFrame( np.concatenate([ avg_feature, diff_feature, np.array(max_feature), np.array(min_feature), np.array(std_feature) ])) features = np.concatenate([ avg_feature, diff_feature, np.array(max_feature), np.array(min_feature), np.array(std_feature) ]) return features.transpose() except: print('There was an error with the Formula: ' + formula + ', this is a general exception with an unkown error') return [np.nan] * len(self.element_df.iloc[0]) * 5
def get_elem_of_interest(formula_str): """Get the element of interest from the chemical formula""" # get all the elements elem_lst = mg.Composition(formula_str).elements # grab the second last element from the right elem_of_interest = elem_lst[-2] # return the element symbol in string format return elem_of_interest.symbol
def check_electronegativity_8(formula): pauling_count = 0 comp = mg.Composition(formula) reduce_formula = comp.get_el_amt_dict() list_of_elements = list(reduce_formula.keys()) max_num = max(reduce_formula.values()) # for element in reduce_formula.keys(): # print(len(smact.Element(element).oxidation_states), end = ",") for i, ele_a in enumerate(list_of_elements): paul_a = smact.Element(ele_a).pauling_eneg for ox_a in smact.Element(ele_a).oxidation_states: for j, ele_b in enumerate(list_of_elements[i+1:]): paul_b = smact.Element(ele_b).pauling_eneg for ox_b in smact.Element(ele_b).oxidation_states: for k, ele_c in enumerate(list_of_elements[i+j+2:]): paul_c = smact.Element(ele_c).pauling_eneg for ox_c in smact.Element(ele_c).oxidation_states: for m, ele_d in enumerate(list_of_elements[i+j+k+3:]): paul_d = smact.Element(ele_d).pauling_eneg for ox_d in smact.Element(ele_d).oxidation_states: for n, ele_e in enumerate(list_of_elements[i+j+k+m+4:]): paul_e = smact.Element(ele_e).pauling_eneg for ox_e in smact.Element(ele_e).oxidation_states: for p, ele_f in enumerate(list_of_elements[i+j+k+m+n+5:]): paul_f = smact.Element(ele_f).pauling_eneg for ox_f in smact.Element(ele_f).oxidation_states: for q, ele_g in enumerate(list_of_elements[i+j+k+m+n+p+6:]): paul_g = smact.Element(ele_g).pauling_eneg for ox_g in smact.Element(ele_g).oxidation_states: for s, ele_h in enumerate(list_of_elements[i+j+k+m+n+p+q+7:]): paul_h = smact.Element(ele_h).pauling_eneg for ox_h in smact.Element(ele_h).oxidation_states: # Puts elements, oxidation states and electronegativites into lists for convenience # elements = [ele_a, ele_b, ele_c, ele_d, ele_e, ele_f, ele_g, ele_h] oxidation_states = [ox_a, ox_b, ox_c, ox_d, ox_e, ox_f, ox_g, ox_h] pauling_electro = [paul_a, paul_b, paul_c, paul_d, paul_e, paul_f, paul_g, paul_h] if None in pauling_electro: print("No pauling electronegativity data") return False # Checks if the electronegativity makes sense and if the combination is charge neutral # electroneg_makes_sense = pauling_test(oxidation_states, pauling_electro, elements) cn_e, cn_r = neutral_ratios([ox_a, ox_b, ox_c, ox_d, ox_e, ox_f, ox_g, ox_h], threshold = int(max_num)) if cn_e: if electroneg_makes_sense: pauling_count = pauling_count + 1 for num in cn_r: if tuple(reduce_formula.values()) == num: # print('{0:2s}{1:3d} {2:2s}{3:3d} {4:2s}{5:3d}'.format(ele_a, ox_a, ele_b, # ox_b, ele_c, ox_c)) return True # print('Number of combinations = {0}'.format(pauling_count)) # print("--- {0} seconds to run ---".format(time.time() - start_time)) return False
def check_electronegativity_4(formula): pauling_count = 0 comp = mg.Composition(formula) reduce_formula = comp.get_el_amt_dict() list_of_elements = list(reduce_formula.keys()) stoichs = list( np.array(list(reduce_formula.values())).astype(np.int32)[:, np.newaxis]) max_num = max(reduce_formula.values()) for i, ele_a in enumerate(list_of_elements): paul_a = smact.Element(ele_a).pauling_eneg for ox_a in smact.Element(ele_a).oxidation_states: for j, ele_b in enumerate(list_of_elements[i + 1:]): paul_b = smact.Element(ele_b).pauling_eneg for ox_b in smact.Element(ele_b).oxidation_states: for k, ele_c in enumerate(list_of_elements[i + j + 2:]): paul_c = smact.Element(ele_c).pauling_eneg for ox_c in smact.Element(ele_c).oxidation_states: for m, ele_d in enumerate( list_of_elements[i + j + k + 3:]): paul_d = smact.Element(ele_d).pauling_eneg for ox_d in smact.Element( ele_d).oxidation_states: # Puts elements, oxidation states and electronegativites into lists for convenience # elements = [ele_a, ele_b, ele_c, ele_d] oxidation_states = [ox_a, ox_b, ox_c, ox_d] pauling_electro = [ paul_a, paul_b, paul_c, paul_d ] if None in pauling_electro: print( "No pauling electronegativity data" ) return False # Checks if the electronegativity makes sense and if the combination is charge neutral # electroneg_makes_sense = pauling_test( oxidation_states, pauling_electro, elements) cn_e, cn_r = neutral_ratios( [ox_a, ox_b, ox_c, ox_d], stoichs=stoichs, threshold=int(max_num)) if cn_e: if electroneg_makes_sense: pauling_count = pauling_count + 1 for num in cn_r: if tuple(reduce_formula.values( )) == num: # print('{0:2s}{1:3d} {2:2s}{3:3d} {4:2s}{5:3d}'.format(ele_a, ox_a, ele_b, # ox_b, ele_c, ox_c)) return True # print('Number of combinations = {0}'.format(pauling_count)) # print("--- {0} seconds to run ---".format(time.time() - start_time)) return False
def transform(self, X_df): formulas = X_df.formula.values input = np.zeros(shape=(len(formulas), len(elements)), dtype=np.float32) for i, formula in enumerate(formulas): comp = mg.Composition(formula).as_dict() for k in comp.keys(): input[i][elements.index(k)] = comp[k] return input
def get_fH(self,formula, phase='solid', data_type='exp',silent=True,exclude_phases=[]): """ Get average experimental formation enthalpy for formula and phase Parameters: ----------- formula: chemical formula string phase: phase string. Can be 'solid', 'liquid', 'gas', or a specific solid phase (e.g. 'monoclinic'). If 'solid', returns average across all solid phases """ #first check for corrected/saved data in fH_dict try: fH,msg = self.fH_dict[(formula,phase,data_type,','.join(exclude_phases))] if silent==False: #print('already calculated') print(msg) #if no entry exists, look up in MP except KeyError: results = self.mp.get_data(formula,data_type=data_type) if data_type=='exp': #results = self.mp.get_exp_thermo_data(formula) if phase=='solid': phase_results = [r for r in results if r.type=='fH' and r.phaseinfo not in ['liquid','gas']+exclude_phases] else: phase_results = [r for r in results if r.type=='fH' and r.phaseinfo==phase] phases = np.unique([r.phaseinfo for r in phase_results]) fH = [r.value for r in phase_results] elif data_type=='vasp': if phase in ('liquid','gas'): raise ValueError('VASP data only valid for solid phases') elif phase=='solid': #get entry with lowest energy above hull srt_results = sorted(results,key=lambda x: x['e_above_hull']) phase_results = srt_results[0:1] else: phase_results = [r for r in results if r['spacegroup']['crystal_system']==phase] phases = np.unique([r['spacegroup']['crystal_system'] for r in phase_results]) n_atoms = mg.Composition(formula).num_atoms #DFT formation energies given in eV per atom - need to convert to kJ/mol fH = [r['formation_energy_per_atom']*n_atoms*96.485 for r in phase_results] if len(fH)==0: raise LookupError('No {} data for {} in {} phase'.format(data_type,formula,phase)) maxdiff = np.max(fH) - np.min(fH) if maxdiff > 15: warnings.warn('Max discrepancy of {} in formation enthalpies for {} exceeds limit'.format(maxdiff,formula)) fH = np.mean(fH) msg = 'Formation enthalpy for {} in {} phase includes {} data from phases: {}'.format(formula,phase,data_type,', '.join(phases)) if silent==False: print(msg) #store value and info message for future lookup self.fH_dict[(formula,phase,data_type,','.join(exclude_phases))] = (fH,msg) return fH
def _contains_element(self, comp): """ Returns 1 if comp contains that element, and 0 if not. Uses ints because sklearn and numpy like number classes better than bools. Could even be something crazy like "contains {element}" and "does not contain {element}" if you really wanted. """ comp = pymatgen.Composition(comp) count = comp[self.element] return int(count != 0)
def normalize_and_alphabetize_formula(formula): '''Normalizes composition labels. Used to enable matching / groupby on compositions.''' if formula: try: comp = mg.Composition(formula) weights = [comp.get_atomic_fraction(ele) for ele in comp.elements] normalized_weights = [round(w / max(weights), 3) for w in weights] normalized_comp = "".join([ str(x) + str(y) for x, y in zip(comp.elements, normalized_weights) ]) return mg.Composition(normalized_comp).alphabetical_formula except: print("INVALID: ", formula) return None else: return None
def __init__(self,composition,radius_type='ionic_radius',normalize_formula=False): self.cations = [el.name for el in composition.elements if el.name!='O'] self.radius_type = radius_type if normalize_formula==True: #scale to single total unit of cations tot_cat_amt = sum([composition[c] for c in self.cations]) composition = mg.Composition({el:amt/tot_cat_amt for el,amt in composition.get_el_amt_dict().items()}) self.composition = composition self.metal_composition = mg.Composition({c:self.composition[c] for c in self.cations}) #checks if len(self.cations)==0: raise Exception('No cations in composition') # if self.composition['O']!=self.composition['Ba'] + self.composition['Ca'] + self.composition['Al']*3/2: # raise Exception('Oxygen amount does not match BaO, CaO, Al2O3 stoichiometry') if self.radius_type not in ('crystal_radius','ionic_radius'): raise Exception(f'Invalid radius type {self.radius_type}. Options are crystal_radius and ionic_radius')
def featurize(self, comp): """ Args: comp: (Composition) pymatgen Composition object Returns: HOMO_character: (str) orbital symbol ('s', 'p', 'd', or 'f') HOMO_element: (str) symbol of element for H**O HOMO_energy: (float in eV) absolute energy of H**O LUMO_character: (str) orbital symbol ('s', 'p', 'd', or 'f') LUMO_element: (str) symbol of element for LUMO LUMO_energy: (float in eV) absolute energy of LUMO gap_AO: (float in eV) the estimated bandgap from H**O and LUMO energeis """ integer_comp, factor = comp.get_integer_formula_and_factor() # warning message if composition is dilute and truncated if not (len(mg.Composition(comp).elements) == len(mg.Composition(integer_comp).elements)): warn('AtomicOrbitals: {} truncated to {}'.format(comp, integer_comp)) homo_lumo = MolecularOrbitals(integer_comp).band_edges feat = collections.OrderedDict() for edge in ['H**O', 'LUMO']: if homo_lumo[edge] is not None: feat['{}_character'.format(edge)] = homo_lumo[edge][1][-1] feat['{}_element'.format(edge)] = homo_lumo[edge][0] feat['{}_energy'.format(edge)] = homo_lumo[edge][2] else: #if LUMO is None feat['{}_character'.format(edge)] = 'na' feat['{}_element'.format(edge)] = 'na' #unclear what this value should be. Arbitrarily set to 0. Don't want NaN for modeling feat['{}_energy'.format(edge)] = 0 feat['gap_AO'] = feat['LUMO_energy'] - feat['HOMO_energy'] return list(feat.values())
def calculate_density(formula): '''Calculates densisty based on Rule of Mixtures (ROM).''' comp = mg.Composition(formula) weights = [comp.get_atomic_fraction(e) for e in comp.elements] vols = np.array([e.molar_volume for e in comp.elements]) atomic_masses = np.array([e.atomic_mass for e in comp.elements]) val = np.sum(weights * atomic_masses) / np.sum(weights * vols) return round(val, 1)
def _contains_all_elements(self, compositions): elements = list() df_trans = pd.DataFrame() for comp in compositions.values: comp = pymatgen.Composition(comp) for element in comp.elements: if element not in elements: elements.append(element) for element in elements: self.element = element self.new_column_name = "has_" + str(self.element) has_element = compositions.apply(self._contains_element) df_trans[self.new_column_name] = has_element return df_trans
def calculate_youngs_modulus(formula): '''Calculates Young Modulus based on Rule of Mixtures (ROM).''' comp = mg.Composition(formula) weights = np.array([comp.get_atomic_fraction(e) for e in comp.elements]) vols = np.array([e.molar_volume for e in comp.elements]) ym_vals = np.array([e.youngs_modulus for e in comp.elements]) if None in ym_vals: return '' val = np.sum(weights * vols * ym_vals) / np.sum(weights * vols) return int(round(val, 0))
def load_data(file_path): if not os.path.exists(file_path): myfile = open(file_path, "w") myfile.close() file = open(file_path, "r") all_formula = [] for ind, form in enumerate(file.readlines()): if form == "reduced_cell_formula\n": continue comp = mg.Composition(form) reduce_formula = comp.get_el_amt_dict() for key in reduce_formula.keys(): reduce_formula[key] = int(reduce_formula[key]) all_formula.append(reduce_formula) return all_formula
def MX_bond_energy(self,formula,data_type='exp',ordered_formula=False,silent=True,exclude_phases=[]): """ Get metal-anion bond energy per mole of metal for binary ionic compound Parameters: ----------- formula: chemical formula string ordered_formula: if true, assume that first element in formula is metal, and second is anion (i.e. MmXn) exclude_phases: phases to exclude from aggregate over all solid phases """ comp = mg.Composition(formula) formula = comp.reduced_formula try: #look up compound if already calculated abe,msg = self.calc_MX_bond_energy[(formula,data_type,','.join(exclude_phases))] if silent==False: #print('already calculated') print(msg) except KeyError: if len(comp.elements) != 2: raise Exception("Formula is not a binary compound") if ordered_formula is False: anions = [el.name for el in comp.elements if el.name in self.common_anions] if len(anions) == 0: raise Exception('No common anions found in formula. Use ordered formula to indicate metal and anion') elif len(anions) > 1: raise Exception('Multiple anions found in formula. Use ordered formula to indicate metal and anion') else: anion = anions[0] metal = [el.name for el in comp.elements if el.name!=anion][0] elif ordered_formula is True: metal = comp.elements[0].name anion = comp.elements[1].name m = comp.get_el_amt_dict()[metal] n = comp.get_el_amt_dict()[anion] fH = self.get_fH(formula,data_type=data_type,silent=silent,exclude_phases=exclude_phases) #oxide formation enthalpy H_sub = self.get_fH(metal, phase='gas',silent=silent,exclude_phases=[]) #metal sublimation enthalpy - must be exp data (no vasp data for gas) #look up info messages from get_fH to store in dict msg = self.fH_dict[formula,'solid',data_type,','.join(exclude_phases)][1] + '\n' msg += self.fH_dict[metal,'gas','exp',''][1] DX2 = self.dissocation_energy[anion] #anion dissociation energy abe = (fH - m*H_sub - (n/2)*DX2)/m #M-O bond energy per mole of M self.calc_MX_bond_energy[(formula,data_type,','.join(exclude_phases))] = (abe,msg) return abe
def __call__(self, formulas): if isinstance(formulas, str): formulas = [formulas] reduce_forms = [] for i, formula in enumerate(formulas): comp = mg.Composition(formula) reduce_form = comp.get_el_amt_dict() reduce_forms.append(reduce_form) one_hot_vec = [] for ind, formula in enumerate(reduce_forms): vec = np.zeros((1, len(self.Periodic_table))) keys = formula.keys() for symbols in keys: symbols_index = list(self.Periodic_table).index(symbols) vec[0, symbols_index] = float(formula[symbols]) one_hot_vec.append(vec) one_hot_vec = np.concatenate(one_hot_vec, axis=0) return one_hot_vec
def create_inequalities(compounds, eoi, control_element = None): # Assumes that the anion is placed last inequalities = {} formulas = {} if control_element is None: control_element = eoi[-1] for compound in compounds: comp = pmg.Composition(compound['pretty_formula']) dict = comp.get_el_amt_dict() lhs = [] for k in list(dict.keys()): lhs.append([k, dict[k]]) formulas[compound['pretty_formula']] = lhs for compound in compounds: per_comp = [] c_formula = formulas[compound['pretty_formula']] # Basic inequalities per_comp.append([c_formula, '<', compound['final_energy']]) for items in c_formula: if items[0] != control_element: per_comp.append([[items], '>', compound['final_energy']]) per_comp.append([[items], '<', 0]) # Generates inequalities outside the first one for o_comp in compounds: if o_comp != compound: o_formula = formulas[o_comp['pretty_formula']] compul = [x[1] for x in c_formula if x[0] == control_element][0] o_compul = [x[1] for x in o_formula if x[0] == control_element][0] f = lcm(compul, o_compul) constants = [[x[0], x[1]*f/compul] for x in c_formula] o_constants = [[x[0], x[1]*f/o_compul] for x in o_formula] f_constants = [] for elm in [x for x in eoi if x!= control_element]: el = [x[1] for x in constants if x[0] == elm] sel = [x[1] for x in o_constants if x[0] == elm] if len(el) == 0: el = [0] if len(sel) == 0: sel = [0] f_constants.append([elm, el[0]-sel[0]]) per_comp.append([o_comp['pretty_formula'], f_constants, '>', f*(compound['final_energy']/compul-o_comp['final_energy']/o_compul)]) inequalities[compound['pretty_formula']] = per_comp return inequalities
def check_neutrality_8(formula): charge_neutral_count = 0 comp = mg.Composition(formula) reduce_formula = comp.get_el_amt_dict() list_of_elements = list(reduce_formula.keys()) stoichs = list( np.array(list(reduce_formula.values())).astype(np.int32)[:,np.newaxis] ) max_num = max(reduce_formula.values()) for element in reduce_formula.keys(): print(len(smact.Element(element).oxidation_states), end = ",") for i, ele_a in enumerate(list_of_elements): for ox_a in smact.Element(ele_a).oxidation_states: for j, ele_b in enumerate(list_of_elements[i+1:]): for ox_b in smact.Element(ele_b).oxidation_states: for k, ele_c in enumerate(list_of_elements[i+j+2:]): for ox_c in smact.Element(ele_c).oxidation_states: for m, ele_d in enumerate(list_of_elements[i+j+k+3:]): for ox_d in smact.Element(ele_d).oxidation_states: for n, ele_e in enumerate(list_of_elements[i+j+k+m+4:]): for ox_e in smact.Element(ele_e).oxidation_states: for p, ele_f in enumerate(list_of_elements[i+j+k+m+n+5:]): for ox_f in smact.Element(ele_f).oxidation_states: for q, ele_g in enumerate(list_of_elements[i+j+k+m+n+p+6:]): for ox_g in smact.Element(ele_g).oxidation_states: for s, ele_h in enumerate(list_of_elements[i+j+k+m+n+p+q+7:]): for ox_h in smact.Element(ele_h).oxidation_states: # Checks if the combination is charge neutral before printing it out! # cn_e, cn_r = neutral_ratios([ox_a, ox_b, ox_c, ox_d, ox_e, ox_f, ox_g, ox_h], stoichs = stoichs, threshold = int(max_num)) if cn_e: for num in cn_r: if tuple(reduce_formula.values()) == num: return True # print(cn_e) # print(cn_r) # print(ox_a, ox_b, ox_c) # charge_neutral_count = charge_neutral_count + 1 # print('{0:3s} {1:3s} {2:3s}'.format(ele_a, ele_b, ele_c)) print('Number of combinations = {0}'.format(charge_neutral_count)) print("--- {0} seconds to run ---".format(time.time() - start_time)) return False
def get_comp_from_coords(coords, tern_axes=['Ca', 'Al', 'Ba'], scale=1): if len(coords) == 2: a, b = coords c = scale - a - b coords = (a, b, c) # else: # a,b,c = coords oxides = { 'Ba': 'BaO', 'Ca': 'CaO', 'Al': 'Al2O3', 'B': 'B2O3', 'Mg': 'MgO', 'Sr': 'SrO' } formula = ''.join( ['({}){}'.format(oxides[m], amt) for m, amt in zip(tern_axes, coords)]) return mg.Composition(formula)
def __call__(self, formulas): if isinstance(formulas, str): formulas = [formulas] reduce_forms = [] for i, formula in enumerate(formulas): comp = mg.Composition(formula) reduce_form = comp.get_el_amt_dict() reduce_forms.append(reduce_form) atom2vec = [] for ind, formula in enumerate(reduce_forms): matrix = 0 keys = formula.keys() for symbols in keys: symbols_index = list(self.atom).index(symbols) matrix += self.atom_fec[symbols_index] * float( formula[symbols]) atom2vec.append(matrix[np.newaxis, :]) atom2vecs = np.concatenate(atom2vec, axis=0) return atom2vecs