def test_miedema_all(self): df = pd.DataFrame({ "composition": [ Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3") ] }) miedema = Miedema(struct_types='all') self.assertFalse(miedema.precheck(df["composition"].iloc[-1])) self.assertAlmostEqual(miedema.precheck_dataframe(df, "composition"), 2 / 3) mfps = miedema.featurize_dataframe(df, col_id="composition") self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0], -0.003445022152) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][1], -0.235125978427) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][1], -0.164541848271) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1], -0.05280843311) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_inter'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_amor'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]), True)
def test_miedema_all(self): df = pd.DataFrame({ "composition": [ Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3") ] }) miedema = Miedema(struct_types='all') self.assertTrue(miedema.precheck(df["composition"].iloc[0])) self.assertFalse(miedema.precheck(df["composition"].iloc[-1])) self.assertAlmostEqual(miedema.precheck_dataframe(df, "composition"), 2 / 3) # test precheck for oxidation-state decorated compositions df = CompositionToOxidComposition(return_original_on_error=True).\ featurize_dataframe(df, 'composition') self.assertTrue(miedema.precheck(df["composition_oxid"].iloc[0])) self.assertFalse(miedema.precheck(df["composition_oxid"].iloc[-1])) self.assertAlmostEqual( miedema.precheck_dataframe(df, "composition_oxid"), 2 / 3) mfps = miedema.featurize_dataframe(df, col_id="composition") self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0], -0.003445022152) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][1], -0.235125978427) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][1], -0.164541848271) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1], -0.05280843311) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_inter'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_amor'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]), True) # make sure featurization works equally for compositions with or without # oxidation states mfps = miedema.featurize_dataframe(df, col_id="composition_oxid") self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0], -0.003445022152) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)
def test_miedema_all(self): miedema_df = pd.DataFrame({ "composition": [ Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3") ] }) df_miedema = Miedema(struct_types='all').featurize_dataframe( miedema_df, col_id="composition") self.assertAlmostEqual(df_miedema['formation_enthalpy_inter'][0], -0.0034450221522328503) self.assertAlmostEqual(df_miedema['formation_enthalpy_amor'][0], 0.070765883630040161) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][0], 0.036635997549833224) self.assertAlmostEqual(df_miedema['formation_enthalpy_inter'][1], -0.23512597842733007) self.assertAlmostEqual(df_miedema['formation_enthalpy_amor'][1], -0.16454184827089643) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][1], -0.052808433113994087) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_inter'][2]), True) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_amor'][2]), True) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_ss_min'][2]), True)
def featurize_composition(df: pd.DataFrame) -> pd.DataFrame: """ Decorate input `pandas.DataFrame` of structures with composition features from matminer. Currently applies the set of all matminer composition features. Args: df (pandas.DataFrame): the input dataframe with `"structure"` column containing `pymatgen.Structure` objects. Returns: pandas.DataFrame: the decorated DataFrame. """ logging.info("Applying composition featurizers...") df = df.copy() df['composition'] = df['structure'].apply(lambda s: s.composition) featurizer = MultipleFeaturizer([ElementProperty.from_preset("magpie"), AtomicOrbitals(), BandCenter(), # ElectronAffinity(), - This descriptor was not used in the paper preset Stoichiometry(), ValenceOrbital(), IonProperty(), ElementFraction(), TMetalFraction(), # CohesiveEnergy(), - This descriptor was not used in the paper preset Miedema(), YangSolidSolution(), AtomicPackingEfficiency(), ]) df = featurizer.featurize_dataframe(df, "composition", multiindex=True, ignore_errors=True) df.columns = df.columns.map('|'.join).str.strip('|') ox_featurizer = MultipleFeaturizer([OxidationStates(), ElectronegativityDiff() ]) df = CompositionToOxidComposition().featurize_dataframe(df, "Input Data|composition") df = ox_featurizer.featurize_dataframe(df, "composition_oxid", multiindex=True, ignore_errors=True) df = df.rename(columns={'Input Data': ''}) df.columns = df.columns.map('|'.join).str.strip('|') _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df['AtomicOrbitals|HOMO_character'] = df['AtomicOrbitals|HOMO_character'].map(_orbitals) df['AtomicOrbitals|LUMO_character'] = df['AtomicOrbitals|LUMO_character'].map(_orbitals) df['AtomicOrbitals|HOMO_element'] = df['AtomicOrbitals|HOMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df['AtomicOrbitals|LUMO_element'] = df['AtomicOrbitals|LUMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df = df.replace([np.inf, -np.inf, np.nan], 0) return clean_df(df)
def test_miedema_all(self): df = pd.DataFrame({"composition": [Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3")]}) miedema = Miedema(struct_types='all') mfps = miedema.featurize_dataframe(df, col_id="composition") self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0], -0.003445022152) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][1], -0.235125978427) self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][1], -0.164541848271) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1], -0.05280843311) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_inter'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_amor'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]), True)
def test_miedema_all(self): miedema_df = pd.DataFrame({"composition": [Composition("TiZr")]}) df_miedema_all = Miedema(struct='all').featurize_dataframe( miedema_df, col_id="composition") self.assertAlmostEqual(df_miedema_all['formation_enthalpy_inter'][0], -0.0034450221522328503) self.assertAlmostEqual(df_miedema_all['formation_enthalpy_amor'][0], 0.070765883630040161) self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_min'][0], 0.036635997549833224)
def _generate_pairwise_features(self): """Generates features for each bonding pair. Column labels contain a prefix "pairwise_feature {}", where the string in parethesis is either "bcc_tet1", "bcc_tet2", or "tet1_tet2". """ # initializes requested matminer featurizers feat_element_property = PairwiseElementProperty( data_source='deml', features=[ 'atom_radius', 'electronegativity', 'first_ioniz', 'col_num', 'row_num', 'molar_vol', 'heat_fusion', 'melting_point', 'GGAU_Etot', 'mus_fere', 'FERE correction' ], stats=['difference', 'mean']) feat_miedema = Miedema(struct_types=['inter', 'amor']) # generates freatures for each bonding pair for bonding_pair in ['bcc_tet1', 'bcc_tet2', 'tet1_tet2']: # gets the string composition of the bonding pairs composition_index = 'pairwise_composition {}'.format(bonding_pair) composition = self.memory[[composition_index]] # adds ElementProperty features features = feat_element_property.featurize_dataframe( df=composition, col_id=composition_index, inplace=False).drop( labels=composition_index, axis=1).add_prefix( prefix='pairwise_feature {} '.format(bonding_pair)) self.memory = concat([self.memory, features], axis=1) # gets the pymatgen.Composition of the bonding pairs composition[composition_index] = [ Composition(i) for i in composition[composition_index] ] # adds Miedema features features = feat_miedema.featurize_dataframe( df=composition, col_id=composition_index, inplace=False).drop( labels=composition_index, axis=1).add_prefix( prefix='pairwise_feature {} '.format(bonding_pair)) self.memory = concat([self.memory, features], axis=1)
def test_miedema_ss(self): df = pd.DataFrame({ "composition": [ Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3") ] }) miedema = Miedema(struct_types='ss', ss_types=['min', 'fcc', 'bcc', 'hcp', 'no_latt']) mfps = miedema.featurize_dataframe(df, col_id="composition") self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_fcc'][0], 0.04700027066) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_bcc'][0], 0.08327522653) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_hcp'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_no_latt'][0], 0.036635998) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1], -0.05280843311) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_fcc'][1], 0.03010575174) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_bcc'][1], -0.05280843311) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_hcp'][1], 0.03010575174) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_no_latt'][1], -0.0035781359) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_fcc'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_bcc'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_hcp'][2]), True) self.assertAlmostEqual( math.isnan(mfps['Miedema_deltaH_ss_no_latt'][2]), True)
def test_miedema_ss(self): miedema_df = pd.DataFrame({"composition": [Composition("TiZr")]}) df_miedema_all = Miedema( struct='ss|ss,none,bcc,fcc,hcp').featurize_dataframe( miedema_df, col_id="composition") self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_min'][0], 0.036635997549833224) self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_none'][0], 0.036635997549833224) self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_bcc'][0], 0.083275226530828264) self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_fcc'][0], 0.047000270656721008) self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_hcp'][0], 0.036635997549833224)
def test_miedema_ss(self): df = pd.DataFrame({"composition": [Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3")]}) miedema = Miedema(struct_types='ss', ss_types=['min', 'fcc', 'bcc', 'hcp', 'no_latt']) mfps = miedema.featurize_dataframe(df, col_id="composition") self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_fcc'][0], 0.04700027066) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_bcc'][0], 0.08327522653) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_hcp'][0], 0.03663599755) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_no_latt'][0], 0.036635998) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1], -0.05280843311) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_fcc'][1], 0.03010575174) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_bcc'][1], -0.05280843311) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_hcp'][1], 0.03010575174) self.assertAlmostEqual(mfps['Miedema_deltaH_ss_no_latt'][1], -0.0035781359) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_fcc'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_bcc'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_hcp'][2]), True) self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_no_latt'][2]), True)
def test_miedema_ss(self): miedema_df = pd.DataFrame({ "composition": [ Composition("TiZr"), Composition("Mg10Cu50Ca40"), Composition("Fe2O3") ] }) df_miedema = Miedema(struct_types='ss', ss_types=['min', 'fcc', 'bcc', 'hcp', 'no_latt']).featurize_dataframe( miedema_df, col_id="composition") self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][0], 0.036635997549833224) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_fcc'][0], 0.047000270656721008) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_bcc'][0], 0.083275226530828264) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_hcp'][0], 0.036635997549833224) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_no_latt'][0], 0.036635997549833224) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][1], -0.052808433113994087) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_fcc'][1], 0.030105751741108196) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_bcc'][1], -0.052808433113994087) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_hcp'][1], 0.030105751741108196) self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_no_latt'][1], -0.0035781358562771083) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_ss_min'][2]), True) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_ss_fcc'][2]), True) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_ss_bcc'][2]), True) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_ss_hcp'][2]), True) self.assertAlmostEqual( math.isnan(df_miedema['formation_enthalpy_ss_no_latt'][2]), True)
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer): """ Featurizer presets used for the paper 'Machine learning materials properties for small datasets' by Pierre-Paul De Breuck, Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020). Uses most of the featurizers implemented by matminer at the time of writing with their default hyperparameters and presets. """ from matminer.featurizers.composition import ( AtomicOrbitals, AtomicPackingEfficiency, BandCenter, # CohesiveEnergy, - This descriptor was not used in the paper preset # ElectronAffinity, - This descriptor was not used in the paper preset ElectronegativityDiff, ElementFraction, ElementProperty, IonProperty, Miedema, OxidationStates, Stoichiometry, TMetalFraction, ValenceOrbital, YangSolidSolution, ) from matminer.featurizers.structure import ( # BagofBonds, - This descriptor was not used in the paper preset BondFractions, ChemicalOrdering, CoulombMatrix, DensityFeatures, EwaldEnergy, GlobalSymmetryFeatures, MaximumPackingEfficiency, # PartialRadialDistributionFunction, RadialDistributionFunction, SineCoulombMatrix, StructuralHeterogeneity, XRDPowderPattern, ) from matminer.featurizers.site import ( AGNIFingerprints, AverageBondAngle, AverageBondLength, BondOrientationalParameter, ChemEnvSiteFingerprint, CoordinationNumber, CrystalNNFingerprint, GaussianSymmFunc, GeneralizedRadialDistributionFunction, LocalPropertyDifference, OPSiteFingerprint, VoronoiFingerprint, ) composition_featurizers = ( AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty.from_preset("magpie"), IonProperty(), Miedema(), Stoichiometry(), TMetalFraction(), ValenceOrbital(), YangSolidSolution(), ) oxide_composition_featurizers = ( ElectronegativityDiff(), OxidationStates(), ) structure_featurizers = ( DensityFeatures(), GlobalSymmetryFeatures(), RadialDistributionFunction(), CoulombMatrix(), # PartialRadialDistributionFunction(), SineCoulombMatrix(), EwaldEnergy(), BondFractions(), StructuralHeterogeneity(), MaximumPackingEfficiency(), ChemicalOrdering(), XRDPowderPattern(), # BagofBonds(), ) site_featurizers = ( AGNIFingerprints(), AverageBondAngle(VoronoiNN()), AverageBondLength(VoronoiNN()), BondOrientationalParameter(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), CrystalNNFingerprint.from_preset("ops"), GaussianSymmFunc(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), LocalPropertyDifference(), OPSiteFingerprint(), VoronoiFingerprint(), ) def featurize_composition(self, df): """ Applies the preset composition featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_composition(df) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df['AtomicOrbitals|HOMO_character'] = df[ 'AtomicOrbitals|HOMO_character'].map(_orbitals) df['AtomicOrbitals|LUMO_character'] = df[ 'AtomicOrbitals|LUMO_character'].map(_orbitals) df['AtomicOrbitals|HOMO_element'] = df[ 'AtomicOrbitals|HOMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z) df['AtomicOrbitals|LUMO_element'] = df[ 'AtomicOrbitals|LUMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z) df = df.replace([np.inf, -np.inf, np.nan], 0) return modnet.featurizers.clean_df(df) def featurize_structure(self, df): """ Applies the preset structural featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_structure(df) dist = df[ "RadialDistributionFunction|radial distribution function"].iloc[0][ 'distances'][:50] for i, d in enumerate(dist): _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format( d) df[_rdf_key] = df[ "RadialDistributionFunction|radial distribution function"].apply( lambda x: x['distribution'][i]) df = df.drop("RadialDistributionFunction|radial distribution function", axis=1) _crystal_system = { "cubic": 1, "tetragonal": 2, "orthorombic": 3, "hexagonal": 4, "trigonal": 5, "monoclinic": 6, "triclinic": 7 } def _int_map(x): if x == np.nan: return 0 elif x: return 1 else: return 0 df["GlobalSymmetryFeatures|crystal_system"] = df[ "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system) df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[ "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map) return modnet.featurizers.clean_df(df) def featurize_site(self, df): """ Applies the preset site featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ # rename some features for backwards compatibility with pretrained models aliases = { "GeneralizedRadialDistributionFunction": "GeneralizedRDF", "AGNIFingerprints": "AGNIFingerPrint", "BondOrientationalParameter": "BondOrientationParameter", "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc", } df = super().featurize_site(df, aliases=aliases) df = df.loc[:, (df != 0).any(axis=0)] return modnet.featurizers.clean_df(df)
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer): from matminer.featurizers.composition import ( AtomicOrbitals, AtomicPackingEfficiency, BandCenter, CohesiveEnergy, ElectronAffinity, ElectronegativityDiff, ElementFraction, ElementProperty, IonProperty, Miedema, OxidationStates, Stoichiometry, TMetalFraction, ValenceOrbital, YangSolidSolution, ) from matminer.featurizers.structure import ( BagofBonds, BondFractions, ChemicalOrdering, CoulombMatrix, DensityFeatures, EwaldEnergy, GlobalSymmetryFeatures, MaximumPackingEfficiency, PartialRadialDistributionFunction, RadialDistributionFunction, SineCoulombMatrix, StructuralHeterogeneity, XRDPowderPattern, ) from matminer.featurizers.site import ( AGNIFingerprints, AverageBondAngle, AverageBondLength, BondOrientationalParameter, ChemEnvSiteFingerprint, CoordinationNumber, CrystalNNFingerprint, GaussianSymmFunc, GeneralizedRadialDistributionFunction, LocalPropertyDifference, OPSiteFingerprint, VoronoiFingerprint, ) from matminer.featurizers.dos import ( DOSFeaturizer, SiteDOS, Hybridization, DosAsymmetry, ) from matminer.featurizers.bandstructure import ( BandFeaturizer, BranchPointEnergy ) composition_featurizers = ( AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty.from_preset("magpie"), IonProperty(), Miedema(), Stoichiometry(), TMetalFraction(), ValenceOrbital(), YangSolidSolution(), ) oxid_composition_featurizers = ( ElectronegativityDiff(), OxidationStates(), ) structure_featurizers = ( DensityFeatures(), GlobalSymmetryFeatures(), RadialDistributionFunction(), CoulombMatrix(), #PartialRadialDistributionFunction(), #Introduces a large amount of features SineCoulombMatrix(), EwaldEnergy(), BondFractions(), StructuralHeterogeneity(), MaximumPackingEfficiency(), ChemicalOrdering(), XRDPowderPattern(), ) site_featurizers = ( AGNIFingerprints(), AverageBondAngle(VoronoiNN()), AverageBondLength(VoronoiNN()), BondOrientationalParameter(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), CrystalNNFingerprint.from_preset("ops"), GaussianSymmFunc(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), LocalPropertyDifference(), OPSiteFingerprint(), VoronoiFingerprint(), ) dos_featurizers = ( DOSFeaturizer(), SiteDOS(), Hybridization() ) band_featurizers = ( BandFeaturizer(), BranchPointEnergy() ) def __init__(self, n_jobs=None): self._n_jobs = n_jobs def featurize_composition(self, df): """Applies the preset composition featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_composition(df) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map( _orbitals ) df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map( _orbitals ) df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) return clean_df(df) def featurize_structure(self, df): """Applies the preset structural featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_structure(df) dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][ "distances" ][:50] for i, d in enumerate(dist): _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format( d ) df[_rdf_key] = df[ "RadialDistributionFunction|radial distribution function" ].apply(lambda x: x["distribution"][i]) df = df.drop("RadialDistributionFunction|radial distribution function", axis=1) _crystal_system = { "cubic": 1, "tetragonal": 2, "orthorombic": 3, "hexagonal": 4, "trigonal": 5, "monoclinic": 6, "triclinic": 7, } def _int_map(x): if x == np.nan: return 0 elif x: return 1 else: return 0 df["GlobalSymmetryFeatures|crystal_system"] = df[ "GlobalSymmetryFeatures|crystal_system" ].map(_crystal_system) df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[ "GlobalSymmetryFeatures|is_centrosymmetric" ].map(_int_map) return clean_df(df) def featurize_dos(self, df): """Applies the presetdos featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_dos(df) hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"] one_hot = pd.get_dummies(df[hotencodeColumns]) df = df.drop(hotencodeColumns, axis = 1).join(one_hot) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["DOSFeaturizer|vbm_character_1"] = df[ "DOSFeaturizer|vbm_character_1" ].map(_orbitals) df["DOSFeaturizer|cbm_character_1"] = df[ "DOSFeaturizer|cbm_character_1" ].map(_orbitals) # Splitting one feature into several floating features # e.g. number;number;number into three columns splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"] for column in splitColumns: try: newColumns = df[column].str.split(";", n = 2, expand = True) for i in range(0,3): df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float) except: continue df = df.drop(splitColumns, axis=1) df = df.drop(["dos"], axis=1) return clean_df(df) def featurize_bandstructure(self, df): """Applies the preset band structure featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_bandstructure(df) def _int_map(x): if str(x) == "False": return 0 elif str(x) == "True": return 1 df["BandFeaturizer|is_gap_direct"] = df[ "BandFeaturizer|is_gap_direct" ].map(_int_map) df = df.drop(["bandstructure"], axis=1) return clean_df(df) def featurize_site(self, df): """Applies the preset site featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ aliases = { "GeneralizedRadialDistributionFunction": "GeneralizedRDF", "AGNIFingerprints": "AGNIFingerPrint", "BondOrientationalParameter": "BondOrientationParameter", "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc", } df = super().featurize_site(df, aliases=aliases) df = df.loc[:, (df != 0).any(axis=0)] return clean_df(df)
def AddFeatures(df): # Add features by Matminer from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, "formula") from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe( df, col_id="composition" ) # input the "composition" column to the featurizer from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.composition import ElectronAffinity ea_feat = ElectronAffinity() df = ea_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import BandCenter bc_feat = BandCenter() df = bc_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import CohesiveEnergy ce_feat = CohesiveEnergy() df = ce_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import Miedema m_feat = Miedema() df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import TMetalFraction tmf_feat = TMetalFraction() df = tmf_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import ValenceOrbital vo_feat = ValenceOrbital() df = vo_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import YangSolidSolution yss_feat = YangSolidSolution() df = yss_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.structure import GlobalSymmetryFeatures # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features. gsf_feat = GlobalSymmetryFeatures() df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralComplexity sc_feat = StructuralComplexity() df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import ChemicalOrdering co_feat = ChemicalOrdering() df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MaximumPackingEfficiency mpe_feat = MaximumPackingEfficiency() df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MinimumRelativeDistances mrd_feat = MinimumRelativeDistances() df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralHeterogeneity sh_feat = StructuralHeterogeneity() df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import SiteStatsFingerprint from matminer.featurizers.site import AverageBondLength from pymatgen.analysis.local_env import CrystalNN bl_feat = SiteStatsFingerprint( AverageBondLength(CrystalNN(search_cutoff=20))) df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import AverageBondAngle ba_feat = SiteStatsFingerprint( AverageBondAngle(CrystalNN(search_cutoff=20))) df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import BondOrientationalParameter bop_feat = SiteStatsFingerprint(BondOrientationalParameter()) df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import CoordinationNumber cn_feat = SiteStatsFingerprint(CoordinationNumber()) df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True) return (df)