def test_composition_to_oxidcomposition(self): df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition() df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({ "Fe3+": 2, "O2-": 3 })) # test error handling df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition(return_original_on_error=False, max_sites=2) self.assertRaises(ValueError, cto.featurize_dataframe, df, 'composition') # check non oxi state structure returned correctly cto = CompositionToOxidComposition(return_original_on_error=True, max_sites=2) df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({ "Fe": 2, "O": 3 }))
def composition_featurizer(df_input: pd.DataFrame, **kwargs) -> pd.DataFrame: """Return a Pandas DataFrame with all compositional features""" # generate the "composition" column df_comp = StrToComposition().featurize_dataframe(df_input, col_id="Compound") # generate features based on elemental properites ep_featurizer = ElementProperty.from_preset(preset_name="magpie") ep_featurizer.featurize_dataframe(df_comp, col_id="composition", inplace=True) # generate the "composition_oxid" column based on guessed oxidation states CompositionToOxidComposition( return_original_on_error=True, **kwargs).featurize_dataframe( # ignore errors from non-integer stoichiometries df_comp, "composition", ignore_errors=True, inplace=True) # correct oxidation states df_comp = correct_comp_oxid(df_comp) # generate features based on oxidation states os_featurizer = OxidationStates() os_featurizer.featurize_dataframe(df_comp, "composition_oxid", ignore_errors=True, inplace=True) # remove compounds with predicted oxidation states of 0 return df_comp[df_comp["minimum oxidation state"] != 0]
def featurize_composition(df: pd.DataFrame) -> pd.DataFrame: """ Decorate input `pandas.DataFrame` of structures with composition features from matminer. Currently applies the set of all matminer composition features. Args: df (pandas.DataFrame): the input dataframe with `"structure"` column containing `pymatgen.Structure` objects. Returns: pandas.DataFrame: the decorated DataFrame. """ logging.info("Applying composition featurizers...") df = df.copy() df['composition'] = df['structure'].apply(lambda s: s.composition) featurizer = MultipleFeaturizer([ElementProperty.from_preset("magpie"), AtomicOrbitals(), BandCenter(), # ElectronAffinity(), - This descriptor was not used in the paper preset Stoichiometry(), ValenceOrbital(), IonProperty(), ElementFraction(), TMetalFraction(), # CohesiveEnergy(), - This descriptor was not used in the paper preset Miedema(), YangSolidSolution(), AtomicPackingEfficiency(), ]) df = featurizer.featurize_dataframe(df, "composition", multiindex=True, ignore_errors=True) df.columns = df.columns.map('|'.join).str.strip('|') ox_featurizer = MultipleFeaturizer([OxidationStates(), ElectronegativityDiff() ]) df = CompositionToOxidComposition().featurize_dataframe(df, "Input Data|composition") df = ox_featurizer.featurize_dataframe(df, "composition_oxid", multiindex=True, ignore_errors=True) df = df.rename(columns={'Input Data': ''}) df.columns = df.columns.map('|'.join).str.strip('|') _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df['AtomicOrbitals|HOMO_character'] = df['AtomicOrbitals|HOMO_character'].map(_orbitals) df['AtomicOrbitals|LUMO_character'] = df['AtomicOrbitals|LUMO_character'].map(_orbitals) df['AtomicOrbitals|HOMO_element'] = df['AtomicOrbitals|HOMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df['AtomicOrbitals|LUMO_element'] = df['AtomicOrbitals|LUMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df = df.replace([np.inf, -np.inf, np.nan], 0) return clean_df(df)
def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame: """Decorate input `pandas.DataFrame` of structures with composition features from matminer, specified by the MODFeaturizer preset. Currently applies the set of all matminer composition features. Arguments: df: the input dataframe with a `"structure"` column containing `pymatgen.Structure` objects. Returns: pandas.DataFrame: the decorated DataFrame, or an empty DataFrame if no composition/oxidation featurizers exist for this class. """ df = df.copy() if self.composition_featurizers: LOG.info("Applying composition featurizers...") df["composition"] = df["structure"].apply(lambda s: s.composition) df = self._fit_apply_featurizers( df, self.composition_featurizers, "composition" ) df = df.rename(columns={"Input Data": ""}) df.columns = df.columns.map("|".join).str.strip("|") if self.oxid_composition_featurizers: LOG.info("Applying oxidation state featurizers...") if getattr(self, "fast_oxid", False): df = CompositionToOxidComposition( all_oxi_states=False, max_sites=-1 ).featurize_dataframe(df, "composition") else: df = CompositionToOxidComposition().featurize_dataframe( df, "composition" ) df = self._fit_apply_featurizers( df, self.oxid_composition_featurizers, "composition_oxid" ) df = df.rename(columns={"Input Data": ""}) df.columns = df.columns.map("|".join).str.strip("|") return df
def test_composition_to_oxidcomposition(self): df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition() df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({ "Fe3+": 2, "O2-": 3 }))
def test_miedema_all(self): df = pd.DataFrame({ "composition": [ Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3") ] }) miedema = Miedema(struct_types='all') self.assertTrue(miedema.precheck(df["composition"].iloc[0])) self.assertFalse(miedema.precheck(df["composition"].iloc[-1])) self.assertAlmostEqual(miedema.precheck_dataframe(df, "composition"), 2 / 3) # test precheck for oxidation-state decorated compositions df = CompositionToOxidComposition(return_original_on_error=True).\ featurize_dataframe(df, 'composition') self.assertTrue(miedema.precheck(df["composition_oxid"].iloc[0])) self.assertFalse(miedema.precheck(df["composition_oxid"].iloc[-1])) self.assertAlmostEqual( miedema.precheck_dataframe(df, "composition_oxid"), 2 / 3) mfps = miedema.featurize_dataframe(df, col_id="composition") self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0], -0.003445022152) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][1], -0.235125978427) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][1], -0.164541848271) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1], -0.05280843311) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_inter'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_amor'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]), True) # make sure featurization works equally for compositions with or without # oxidation states mfps = miedema.featurize_dataframe(df, col_id="composition_oxid") self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0], -0.003445022152) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)
def generate_data(name): #这个函数作用,输入是指定的文件名,输出增加了gaps,is_daoti,以及其他共计145特征的完整向量矩阵 #name='test_plus_gaps.csv' df=pd.read_csv(name,index_col=[0]) df['gaps']=-10.0 df_gap=pd.read_csv("gaps.csv",index_col = [0]) print(df_gap.index) i=0 str_s="" for j in range(len(df_gap.index)): #先打印二者的id # print(df.index[i]) str_s='mp-'+str(df_gap.index[j]) if(str_s==df.index[i]): df.iloc[i,-1]=df_gap.iloc[j,0] i=i+1 #print("确实一样") print("合并完毕") #同样的方法我们来建立不同的分类 df['is_daoti']=-2 for i in range(len(df.index)): if(df.ix[i,-2]==0): df.ix[i,-1]=1 else: df.ix[i,-1]=0 print("分类feature建立完成") #首先使用describe获得对于数据的整体把握 print(df.describe()) df.describe().to_csv('general_look_jie.csv') #通过观察数据发现并没有什么异常之处 df=StrToComposition().featurize_dataframe(df,'full_formula',ignore_errors=True) print(df.head()) #print(df['composition']) ep_feat=ElementProperty.from_preset(preset_name='magpie') df=ep_feat.featurize_dataframe(df,col_id='composition',ignore_errors=True)#将composition这一列作为特征化的输入 print(df.head()) #print(ep_feat.citations()) #df.to_csv("plus the composition.csv") #以上这部分是将formula转化为composition并转化feature df=CompositionToOxidComposition().featurize_dataframe(df,col_id='composition')#引入了氧化态的相关特征 os_feat=OxidationStates() df=os_feat.featurize_dataframe(df,col_id='composition_oxid') new_name='2d_vector_plus.csv' df.to_csv(new_name)
def generate_data(): df = load_elastic_tensor() df.to_csv('原始elastic数据.csv') print(df.columns) unwanted_columns = [ 'volume', 'nsites', 'compliance_tensor', 'elastic_tensor', 'elastic_tensor_original', 'K_Voigt', 'G_Voigt', 'K_Reuss', 'G_Reuss' ] df = df.drop(unwanted_columns, axis=1) print(df.head()) df.to_csv('扔掉不需要的部分.csv') #首先使用describe获得对于数据的整体把握 print(df.describe()) df.describe().to_csv('general_look.csv') #通过观察数据发现并没有什么异常之处 df = StrToComposition().featurize_dataframe(df, 'formula') print(df.head()) df.to_csv('引入composition.csv') #下一步,我们需要其中一个特征化来增加一系列的特征算符 ep_feat = ElementProperty.from_preset(preset_name='magpie') df = ep_feat.featurize_dataframe( df, col_id='composition') #将composition这一列作为特征化的输入 print(df.head()) print(ep_feat.citations()) df.to_csv('将composition特征化后.csv') #开始引入新的特征化算符吧 df = CompositionToOxidComposition().featurize_dataframe( df, 'composition') #引入了氧化态的相关特征 os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, col_id='composition_oxid') print(df.head()) df.to_csv('引入氧化态之后.csv') #其实除了基于composition的特征之外还有很多其他的,比如基于结构的 df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, 'structure') print(df.head()) df.to_csv('引入结构中的密度.csv') print(df_feat.feature_labels())
def test_featurizers(): df = pd.read_csv('test.csv', index_col=[0]) df = StrToComposition().featurize_dataframe(df, 'formula') print(df.head()) #下一步,我们需要其中一个特征化来增加一系列的特征算符 ep_feat = ElementProperty.from_preset(preset_name='magpie') df = ep_feat.featurize_dataframe( df, col_id='composition') #将composition这一列作为特征化的输入 print(df.head()) print(ep_feat.citations()) #df.to_csv('将composition特征化后.csv') #开始引入新的特征化算符吧 df = CompositionToOxidComposition().featurize_dataframe( df, 'composition') #引入了氧化态的相关特征 os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, col_id='composition_oxid') print(df.head()) df.to_csv('after_test.csv')
def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame: """ Decorate input `pandas.DataFrame` of structures with composition features from matminer, specified by the MODFeaturizer preset. Currently applies the set of all matminer composition features. Arguments: df: the input dataframe with a `"structure"` column containing `pymatgen.Structure` objects. Returns: pandas.DataFrame: the decorated DataFrame, or an empty DataFrame if no composition/oxidation featurizers exist for this class. """ if not (self.composition_featurizers or self.oxide_composition_featurizers): return pd.DataFrame([]) df = df.copy() if self.composition_featurizers: logging.info("Applying composition featurizers...") df['composition'] = df['structure'].apply(lambda s: s.composition) df = self._fit_apply_featurizers(df, self.composition_featurizers, "composition") df = df.replace([np.inf, -np.inf, np.nan], 0) df = df.rename(columns={'Input Data': ''}) df.columns = df.columns.map('|'.join).str.strip('|') if self.oxide_composition_featurizers: logging.info("Applying oxidation state featurizers...") df = CompositionToOxidComposition().featurize_dataframe( df, "composition") df = self._fit_apply_featurizers( df, self.oxide_composition_featurizers, "composition_oxid") df = df.rename(columns={'Input Data': ''}) df.columns = df.columns.map('|'.join).str.strip('|') return df
def test_yang(self): comps = list( map(Composition, [ "ZrHfTiCuNi", "CuNi", "CoCrFeNiCuAl0.3", "CoCrFeNiCuAl", "LaO3" ])) # Run the featurization feat = YangSolidSolution() df = pd.DataFrame({"composition": comps}) self.assertFalse(feat.precheck(df["composition"].iloc[-1])) self.assertAlmostEqual(feat.precheck_dataframe(df, "composition"), 0.8, places=2) # test precheck for oxidation-state decorated compositions df = CompositionToOxidComposition(return_original_on_error=True). \ featurize_dataframe(df, 'composition') self.assertFalse(feat.precheck(df["composition_oxid"].iloc[-1])) self.assertAlmostEqual(feat.precheck_dataframe(df, "composition_oxid"), 0.8, places=2) feat.set_n_jobs(1) features = feat.featurize_many(comps) # Check the results # These are compared to results from the original paper, # except for CoCrFeNiCuAl0.3, where the paper reports a value # exactly 1/10th of what I compute using Excel and matminer # I use a high tolerance because matminer uses a different source # of radii than the original paper (do not have Kittel's atomic # radii available) self.assertEqual((5, 2), np.array(features).shape) self.assertArrayAlmostEqual([0.95, 0.1021], features[0], decimal=2) self.assertArrayAlmostEqual([2.22, 0.0], features[1], decimal=2) self.assertArrayAlmostEqual([158.5, 0.0315], features[2], decimal=1) self.assertArrayAlmostEqual([5.06, 0.0482], features[3], decimal=1)
import numpy as np np.savez_compressed("heusler_all.npz", data=data_3) # In[ ]: # Featurization # This part is done with reference to the matiner examples from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") data_3 = ep_feat.featurize_dataframe(data_3, col_id="composition") from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates data_3 = CompositionToOxidComposition().featurize_dataframe( data_3, "composition") os_feat = OxidationStates() data_3 = os_feat.featurize_dataframe(data_3, "composition_oxid") from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() data_3 = df_feat.featurize_dataframe(data_3, "structure") unwanted_columns = [ "elasticity", "material_id", "nsites", "compliance_tensor", "elastic_tensor", "elastic_tensor_original", "K_Voigt", "G_Voigt", "K_Reuss", "G_Reuss", "warnings" ] data_4 = data_3.drop(unwanted_columns, axis=1)
#Block 1 - Loading dataframe ''' # arbitrary inputs - Li must be excluded to ensure consistency data = [['mp-1025496', 'Nb1 Se2'], ['mp-977563', 'Nb1 Ir2'], ['mp-864631', 'Nb1 Rh2'], ['mp-3368', 'Nb3 O8']] fdf = pd.DataFrame(data, columns=['Id', 'Reduced Formula']) ## Initial conversion to matminer objects from matminer.featurizers.conversions import StrToComposition fdf = StrToComposition().featurize_dataframe(fdf, 'Reduced Formula') from matminer.featurizers.conversions import CompositionToOxidComposition fdf = CompositionToOxidComposition().featurize_dataframe(fdf, 'composition') print("The initial dataset has {}".format(fdf.shape)) print(fdf.head()) ''' Block 2 - Featurization ''' # # -- start F1 from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name='magpie') fdf = ep_feat.featurize_dataframe(fdf, col_id='composition', ignore_errors=True)
"elastic_tensor_original", "K_Voigt", "G_Voigt", "K_Reuss", "G_Reuss"] df = df.drop(unwanted_columns, axis=1) from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, 'formula') from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe(df, col_id='composition') from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, col_id='structure') y = df['K_VRH'].values excluded = ["G_VRH", "K_VRH", "elastic_anisotropy", "formula", "material_id", "poisson_ratio", "structure", "composition", "composition_oxid"] X = df.drop(excluded, axis=1) print("There are {} possible descriptors:\n\n{}".format(X.shape[1], X.columns.values))
'cif', 'kpoint_density', 'poscar'] df = df.drop(unwanted_columns, axis=1) from matminer.featurizers.conversions import StrToComposition sc_feat = StrToComposition() df = sc_feat.featurize_dataframe(df, col_id='formula') from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name='magpie') df = ep_feat.featurize_dataframe(df, col_id='composition') from matminer.featurizers.conversions import CompositionToOxidComposition co_feat = CompositionToOxidComposition() df = co_feat.featurize_dataframe(df, col_id='composition') from matminer.featurizers.composition import OxidationStates os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, col_id='composition_oxid') from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, col_id='structure') """ formula, structure, elastic_anisotropy, G_Reuss, G_VRH, G_Voigt, K_Reuss, K_VRH, K_Voigt, poisson_ratio, compliance_tensor, elastic_tensor, elastic_tensor_original, composition
def _tidy_column(self, df, featurizer_type): """ Various conversions to homogenize columns for featurization input. For example, take a column of compositions and ensure they are decorated with oxidation states, are not strings, etc. Args: df (pandas.DataFrame) featurizer_type: The key defining the featurizer input. For example, composition featurizers should have featurizer_type of "composition". Returns: df (pandas.DataFrame): DataFrame with featurizer_type column ready for featurization. """ # todo: Make the following conversions more robust (no [0] type checking) type_tester = df[featurizer_type].iloc[0] if featurizer_type == self.composition_col: # Convert formulas to composition objects if isinstance(type_tester, str): self.logger.info( self._log_prefix + "Compositions detected as strings. Attempting " "conversion to Composition objects...") stc = StrToComposition(overwrite_data=True, target_col_id=featurizer_type) df = stc.featurize_dataframe(df, featurizer_type, multiindex=self.multiindex, ignore_errors=True, inplace=False) elif isinstance(type_tester, dict): self.logger.info(self._log_prefix + "Compositions detected as dicts. Attempting " "conversion to Composition objects...") df[featurizer_type] = [ Composition.from_dict(d) for d in df[featurizer_type] ] # Convert non-oxidstate containing comps to oxidstate comps if self.guess_oxistates: self.logger.info( self._log_prefix + "Guessing oxidation states of compositions, as" " they were not present in input.") cto = CompositionToOxidComposition( target_col_id=featurizer_type, overwrite_data=True, return_original_on_error=True, max_sites=-50) try: df = cto.featurize_dataframe(df, featurizer_type, multiindex=self.multiindex, inplace=False) except Exception as e: self.logger.info(self._log_prefix + "Could not decorate oxidation states due " "to {}. Excluding featurizers based on " "composition oxistates".format(e)) classes_require_oxi = [ c.__class__.__name__ for c in CompositionFeaturizers().need_oxi ] self.exclude.extend(classes_require_oxi) else: # Convert structure/bs/dos dicts to objects (robust already) if isinstance(type_tester, (dict, str)): self.logger.info(self._log_prefix.capitalize() + "{} detected as string or dict. Attempting " "conversion to {} objects..." "".format(featurizer_type, featurizer_type)) if isinstance(type_tester, str): raise ValueError("{} column is type {}. Cannot convert." "".format(featurizer_type, type(type_tester))) dto = DictToObject(overwrite_data=True, target_col_id=featurizer_type) df = dto.featurize_dataframe(df, featurizer_type, inplace=False) # Decorate with oxidstates if featurizer_type == self.structure_col and \ self.guess_oxistates: self.logger.info( self._log_prefix + "Guessing oxidation states of structures if they were " "not present in input.") sto = StructureToOxidStructure( target_col_id=featurizer_type, overwrite_data=True, return_original_on_error=True, max_sites=-50) try: df = sto.featurize_dataframe( df, featurizer_type, multiindex=self.multiindex, inplace=False) except Exception as e: self.logger.info( self._log_prefix + "Could not decorate oxidation states on structures " "due to {}.".format(e)) return df
# ##structural heterogeneity #from matminer.featurizers.structure import StructuralHeterogeneity #structural_heterogeneity = StructuralHeterogeneity() #structural_heterogeneity.set_n_jobs(28) #labels.append(structural_heterogeneity.feature_labels()) #df = structural_heterogeneity.featurize_dataframe(df, 'structures',ignore_errors=False) #convert structure to composition from matminer.featurizers.conversions import StructureToComposition structures_to_compositions = StructureToComposition() df = structures_to_compositions.featurize_dataframe(df, 'structures') #convert composition to oxidcomposition from matminer.featurizers.conversions import CompositionToOxidComposition OxidCompositions = CompositionToOxidComposition() print(OxidCompositions.feature_labels()) df = OxidCompositions.featurize_dataframe(df, 'composition') #CohesiveEnergy from matminer.featurizers.composition import CohesiveEnergy cohesive_energy = CohesiveEnergy() cohesive_energy.set_n_jobs(28) labels.append(cohesive_energy.feature_labels()) df = cohesive_energy.featurize_dataframe(df, 'composition', ignore_errors=True) #ValenceOrbital from matminer.featurizers.composition import ValenceOrbital valence_orbital = ValenceOrbital()
def AddFeatures(df): # Add features by Matminer from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, "formula") from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe( df, col_id="composition" ) # input the "composition" column to the featurizer from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.composition import ElectronAffinity ea_feat = ElectronAffinity() df = ea_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import BandCenter bc_feat = BandCenter() df = bc_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import CohesiveEnergy ce_feat = CohesiveEnergy() df = ce_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import Miedema m_feat = Miedema() df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import TMetalFraction tmf_feat = TMetalFraction() df = tmf_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import ValenceOrbital vo_feat = ValenceOrbital() df = vo_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import YangSolidSolution yss_feat = YangSolidSolution() df = yss_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.structure import GlobalSymmetryFeatures # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features. gsf_feat = GlobalSymmetryFeatures() df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralComplexity sc_feat = StructuralComplexity() df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import ChemicalOrdering co_feat = ChemicalOrdering() df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MaximumPackingEfficiency mpe_feat = MaximumPackingEfficiency() df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MinimumRelativeDistances mrd_feat = MinimumRelativeDistances() df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralHeterogeneity sh_feat = StructuralHeterogeneity() df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import SiteStatsFingerprint from matminer.featurizers.site import AverageBondLength from pymatgen.analysis.local_env import CrystalNN bl_feat = SiteStatsFingerprint( AverageBondLength(CrystalNN(search_cutoff=20))) df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import AverageBondAngle ba_feat = SiteStatsFingerprint( AverageBondAngle(CrystalNN(search_cutoff=20))) df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import BondOrientationalParameter bop_feat = SiteStatsFingerprint(BondOrientationalParameter()) df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import CoordinationNumber cn_feat = SiteStatsFingerprint(CoordinationNumber()) df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True) return (df)