def test_BandFeaturizer(self): df_bf = BandFeaturizer().featurize_dataframe(self.df, col_id='bs') self.assertAlmostEqual(df_bf['band_gap'][0], 0.612, 3) self.assertAlmostEqual(df_bf['direct_gap'][0], 2.557, 3) self.assertAlmostEqual(df_bf['n_ex1_norm'][0], 0.58413, 5) self.assertAlmostEqual(df_bf['p_ex1_norm'][0], 0.0, 5) self.assertEquals(df_bf['is_gap_direct'][0], False)
def test_BandFeaturizer(self): bs_featurizer = BandFeaturizer(self.si_kpts) df_bf = bs_featurizer.featurize_dataframe(self.df, col_id='bs_line') self.assertAlmostEqual(df_bf['band_gap'][0], 0.612, 3) self.assertAlmostEqual(df_bf['direct_gap'][0], 2.557, 3) self.assertAlmostEqual(df_bf['n_ex1_norm'][0], 0.58413, 5) self.assertAlmostEqual(df_bf['p_ex1_norm'][0], 0.0, 5) self.assertEqual(df_bf['is_gap_direct'][0], False) self.assertEqual(df_bf['n_ex1_degen'][0], 6) self.assertEqual(df_bf['p_ex1_degen'][0], 1) self.assertEqual(df_bf['p_0.0;0.0;0.0_en'][0], 0.0) self.assertEqual(df_bf['p_0.0;0.0;0.0_en'][0], 0.0) self.assertAlmostEqual(df_bf['p_0.375;0.375;0.75_en'][0], -2.3745, 4) self.assertAlmostEqual(df_bf['p_0.5;0.0;0.5_en'][0], -2.7928, 4) self.assertAlmostEqual(df_bf['p_0.625;0.25;0.625_en'][0], -2.3745, 4) self.assertAlmostEqual(df_bf['p_0.5;0.5;0.5_en'][0], -1.1779, 4) self.assertAlmostEqual(df_bf['n_0.0;0.0;0.0_en'][0], 1.945, 4) self.assertAlmostEqual(df_bf['n_0.5;0.25;0.75_en'][0], 3.6587, 4) self.assertAlmostEqual(df_bf['n_0.5;0.5;0.5_en'][0], 0.8534, 4)
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer): from matminer.featurizers.composition import ( AtomicOrbitals, AtomicPackingEfficiency, BandCenter, CohesiveEnergy, ElectronAffinity, ElectronegativityDiff, ElementFraction, ElementProperty, IonProperty, Miedema, OxidationStates, Stoichiometry, TMetalFraction, ValenceOrbital, YangSolidSolution, ) from matminer.featurizers.structure import ( BagofBonds, BondFractions, ChemicalOrdering, CoulombMatrix, DensityFeatures, EwaldEnergy, GlobalSymmetryFeatures, MaximumPackingEfficiency, PartialRadialDistributionFunction, RadialDistributionFunction, SineCoulombMatrix, StructuralHeterogeneity, XRDPowderPattern, ) from matminer.featurizers.site import ( AGNIFingerprints, AverageBondAngle, AverageBondLength, BondOrientationalParameter, ChemEnvSiteFingerprint, CoordinationNumber, CrystalNNFingerprint, GaussianSymmFunc, GeneralizedRadialDistributionFunction, LocalPropertyDifference, OPSiteFingerprint, VoronoiFingerprint, ) from matminer.featurizers.dos import ( DOSFeaturizer, SiteDOS, Hybridization, DosAsymmetry, ) from matminer.featurizers.bandstructure import ( BandFeaturizer, BranchPointEnergy ) composition_featurizers = ( AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty.from_preset("magpie"), IonProperty(), Miedema(), Stoichiometry(), TMetalFraction(), ValenceOrbital(), YangSolidSolution(), ) oxid_composition_featurizers = ( ElectronegativityDiff(), OxidationStates(), ) structure_featurizers = ( DensityFeatures(), GlobalSymmetryFeatures(), RadialDistributionFunction(), CoulombMatrix(), #PartialRadialDistributionFunction(), #Introduces a large amount of features SineCoulombMatrix(), EwaldEnergy(), BondFractions(), StructuralHeterogeneity(), MaximumPackingEfficiency(), ChemicalOrdering(), XRDPowderPattern(), ) site_featurizers = ( AGNIFingerprints(), AverageBondAngle(VoronoiNN()), AverageBondLength(VoronoiNN()), BondOrientationalParameter(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), CrystalNNFingerprint.from_preset("ops"), GaussianSymmFunc(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), LocalPropertyDifference(), OPSiteFingerprint(), VoronoiFingerprint(), ) dos_featurizers = ( DOSFeaturizer(), SiteDOS(), Hybridization() ) band_featurizers = ( BandFeaturizer(), BranchPointEnergy() ) def __init__(self, n_jobs=None): self._n_jobs = n_jobs def featurize_composition(self, df): """Applies the preset composition featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_composition(df) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map( _orbitals ) df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map( _orbitals ) df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) return clean_df(df) def featurize_structure(self, df): """Applies the preset structural featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_structure(df) dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][ "distances" ][:50] for i, d in enumerate(dist): _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format( d ) df[_rdf_key] = df[ "RadialDistributionFunction|radial distribution function" ].apply(lambda x: x["distribution"][i]) df = df.drop("RadialDistributionFunction|radial distribution function", axis=1) _crystal_system = { "cubic": 1, "tetragonal": 2, "orthorombic": 3, "hexagonal": 4, "trigonal": 5, "monoclinic": 6, "triclinic": 7, } def _int_map(x): if x == np.nan: return 0 elif x: return 1 else: return 0 df["GlobalSymmetryFeatures|crystal_system"] = df[ "GlobalSymmetryFeatures|crystal_system" ].map(_crystal_system) df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[ "GlobalSymmetryFeatures|is_centrosymmetric" ].map(_int_map) return clean_df(df) def featurize_dos(self, df): """Applies the presetdos featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_dos(df) hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"] one_hot = pd.get_dummies(df[hotencodeColumns]) df = df.drop(hotencodeColumns, axis = 1).join(one_hot) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["DOSFeaturizer|vbm_character_1"] = df[ "DOSFeaturizer|vbm_character_1" ].map(_orbitals) df["DOSFeaturizer|cbm_character_1"] = df[ "DOSFeaturizer|cbm_character_1" ].map(_orbitals) # Splitting one feature into several floating features # e.g. number;number;number into three columns splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"] for column in splitColumns: try: newColumns = df[column].str.split(";", n = 2, expand = True) for i in range(0,3): df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float) except: continue df = df.drop(splitColumns, axis=1) df = df.drop(["dos"], axis=1) return clean_df(df) def featurize_bandstructure(self, df): """Applies the preset band structure featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_bandstructure(df) def _int_map(x): if str(x) == "False": return 0 elif str(x) == "True": return 1 df["BandFeaturizer|is_gap_direct"] = df[ "BandFeaturizer|is_gap_direct" ].map(_int_map) df = df.drop(["bandstructure"], axis=1) return clean_df(df) def featurize_site(self, df): """Applies the preset site featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ aliases = { "GeneralizedRadialDistributionFunction": "GeneralizedRDF", "AGNIFingerprints": "AGNIFingerPrint", "BondOrientationalParameter": "BondOrientationParameter", "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc", } df = super().featurize_site(df, aliases=aliases) df = df.loc[:, (df != 0).any(axis=0)] return clean_df(df)
def test_BandFeaturizer(self): # silicon: bs_featurizer = BandFeaturizer(kpoints=self.si_kpts, nbands=5) self.assertTrue(len(bs_featurizer.feature_labels()) > 0) df_bf = bs_featurizer.featurize_dataframe(self.df, col_id='bs_line') self.assertAlmostEqual(df_bf['band_gap'][0], 0.612, 3) self.assertAlmostEqual(df_bf['direct_gap'][0], 2.557, 3) self.assertAlmostEqual(df_bf['n_ex1_norm'][0], 0.58413, 5) self.assertAlmostEqual(df_bf['p_ex1_norm'][0], 0.0, 5) self.assertEqual(df_bf['is_gap_direct'][0], False) self.assertEqual(df_bf['n_ex1_degen'][0], 6) self.assertEqual(df_bf['p_ex1_degen'][0], 1) # \\Gamma: self.assertAlmostEqual(df_bf['n_0.0;0.0;0.0_en4'][0], 2.5169, 4) self.assertAlmostEqual(df_bf['n_0.0;0.0;0.0_en1'][0], 1.945, 4) self.assertEqual(df_bf['p_0.0;0.0;0.0_en1'][0], 0.0) self.assertEqual(df_bf['p_0.0;0.0;0.0_en2'][0], 0.0) self.assertEqual(df_bf['p_0.0;0.0;0.0_en4'][0], -11.8118) # K: self.assertAlmostEqual(df_bf['p_0.375;0.375;0.75_en1'][0], -2.3745, 4) # X: self.assertAlmostEqual(df_bf['n_0.5;0.0;0.5_en2'][0], 0.1409, 4) self.assertAlmostEqual(df_bf['n_0.5;0.0;0.5_en1'][0], 0.1409, 4) self.assertAlmostEqual(df_bf['p_0.5;0.0;0.5_en1'][0], -2.7928, 4) # U: self.assertAlmostEqual(df_bf['p_0.625;0.25;0.625_en1'][0], -2.3745, 4) self.assertAlmostEqual(df_bf['p_0.625;0.25;0.625_en4'][0], -8.1598, 4) self.assertTrue(math.isnan(df_bf['p_0.625;0.25;0.625_en5'][0])) # L: self.assertAlmostEqual(df_bf['n_0.5;0.5;0.5_en2'][0], 2.7381, 4) self.assertAlmostEqual(df_bf['n_0.5;0.5;0.5_en1'][0], 0.8534, 4) self.assertAlmostEqual(df_bf['p_0.5;0.5;0.5_en1'][0], -1.1779, 4) # W: self.assertAlmostEqual(df_bf['n_0.5;0.25;0.75_en1'][0], 3.6587, 4) # VBr2 with unoccupied Spin.down electrons for ib<ib_VBM but E>E_CBM: bs_featurizer = BandFeaturizer(kpoints=self.vbr2kpts, nbands=3) df_bf2 = bs_featurizer.featurize_dataframe(self.df2, col_id='bs_line') self.assertTrue(math.isnan(df_bf2['p_ex1_degen'][0])) # \\Gamma: self.assertAlmostEqual(df_bf2['n_0.0;0.0;0.0_en3'][0], 0.8020, 4) self.assertAlmostEqual(df_bf2['n_0.0;0.0;0.0_en2'][0], 0.4243, 4) self.assertAlmostEqual(df_bf2['n_0.0;0.0;0.0_en1'][0], 0.4243, 4) self.assertAlmostEqual(df_bf2['p_0.0;0.0;0.0_en1'][0], -0.3312, 4) self.assertAlmostEqual(df_bf2['p_0.0;0.0;0.0_en2'][0], -0.6076, 4) self.assertAlmostEqual(df_bf2['p_0.0;0.0;0.0_en3'][0], -0.6076, 4) # M: self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.0_en3'][0], 0.5524, 4) self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.0_en2'][0], 0.5074, 4) self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.0_en1'][0], 0.2985, 4) self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.0_en1'][0], -0.0636, 4) self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.0_en2'][0], -0.1134, 4) self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.0_en3'][0], -0.8091, 4) # between \\Gamma and M: self.assertAlmostEqual(df_bf2['n_0.2;0.0;0.0_en3'][0], 0.6250, 4) self.assertAlmostEqual(df_bf2['n_0.2;0.0;0.0_en2'][0], 0.3779, 4) self.assertAlmostEqual(df_bf2['n_0.2;0.0;0.0_en1'][0], 0.1349, 4) self.assertAlmostEqual(df_bf2['p_0.2;0.0;0.0_en1'][0], -0.1049, 4) self.assertAlmostEqual(df_bf2['p_0.2;0.0;0.0_en2'][0], -0.3044, 4) self.assertAlmostEqual(df_bf2['p_0.2;0.0;0.0_en3'][0], -0.6399, 4) # L: self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.5_en2'][0], 0.4448, 4) self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.5_en1'][0], 0.3076, 4) self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.5_en1'][0], -0.0639, 4) self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.5_en2'][0], -0.1133, 4)