コード例 #1
0
 def test_BandFeaturizer(self):
     df_bf = BandFeaturizer().featurize_dataframe(self.df, col_id='bs')
     self.assertAlmostEqual(df_bf['band_gap'][0], 0.612, 3)
     self.assertAlmostEqual(df_bf['direct_gap'][0], 2.557, 3)
     self.assertAlmostEqual(df_bf['n_ex1_norm'][0], 0.58413, 5)
     self.assertAlmostEqual(df_bf['p_ex1_norm'][0], 0.0, 5)
     self.assertEquals(df_bf['is_gap_direct'][0], False)
コード例 #2
0
 def test_BandFeaturizer(self):
     bs_featurizer = BandFeaturizer(self.si_kpts)
     df_bf = bs_featurizer.featurize_dataframe(self.df, col_id='bs_line')
     self.assertAlmostEqual(df_bf['band_gap'][0], 0.612, 3)
     self.assertAlmostEqual(df_bf['direct_gap'][0], 2.557, 3)
     self.assertAlmostEqual(df_bf['n_ex1_norm'][0], 0.58413, 5)
     self.assertAlmostEqual(df_bf['p_ex1_norm'][0], 0.0, 5)
     self.assertEqual(df_bf['is_gap_direct'][0], False)
     self.assertEqual(df_bf['n_ex1_degen'][0], 6)
     self.assertEqual(df_bf['p_ex1_degen'][0], 1)
     self.assertEqual(df_bf['p_0.0;0.0;0.0_en'][0], 0.0)
     self.assertEqual(df_bf['p_0.0;0.0;0.0_en'][0], 0.0)
     self.assertAlmostEqual(df_bf['p_0.375;0.375;0.75_en'][0], -2.3745, 4)
     self.assertAlmostEqual(df_bf['p_0.5;0.0;0.5_en'][0], -2.7928, 4)
     self.assertAlmostEqual(df_bf['p_0.625;0.25;0.625_en'][0], -2.3745, 4)
     self.assertAlmostEqual(df_bf['p_0.5;0.5;0.5_en'][0], -1.1779, 4)
     self.assertAlmostEqual(df_bf['n_0.0;0.0;0.0_en'][0], 1.945, 4)
     self.assertAlmostEqual(df_bf['n_0.5;0.25;0.75_en'][0], 3.6587, 4)
     self.assertAlmostEqual(df_bf['n_0.5;0.5;0.5_en'][0], 0.8534, 4)
コード例 #3
0
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer):

    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        CohesiveEnergy,
        ElectronAffinity,
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        BagofBonds,
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )
    from matminer.featurizers.dos import (
        DOSFeaturizer,
        SiteDOS,
        Hybridization,
        DosAsymmetry,
    )
    from matminer.featurizers.bandstructure import (
        BandFeaturizer,
        BranchPointEnergy
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxid_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        #PartialRadialDistributionFunction(), #Introduces a large amount of features
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    dos_featurizers = (
        DOSFeaturizer(),
        SiteDOS(),
        Hybridization()
    )

    band_featurizers = (
        BandFeaturizer(),
        BranchPointEnergy()
    )
    def __init__(self, n_jobs=None):
            self._n_jobs = n_jobs

    def featurize_composition(self, df):
        """Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
            _orbitals
        )
        df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
            _orbitals
        )

        df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )
        df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )

        return clean_df(df)

    def featurize_structure(self, df):
        """Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_structure(df)

        dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][
            "distances"
        ][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d
            )
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"
            ].apply(lambda x: x["distribution"][i])

        df = df.drop("RadialDistributionFunction|radial distribution function", axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7,
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"
        ].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"
        ].map(_int_map)

        return clean_df(df)

    def featurize_dos(self, df):
        """Applies the presetdos featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_dos(df)


        hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"]

        one_hot = pd.get_dummies(df[hotencodeColumns])
        df = df.drop(hotencodeColumns, axis = 1).join(one_hot)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

        df["DOSFeaturizer|vbm_character_1"] = df[
           "DOSFeaturizer|vbm_character_1"
           ].map(_orbitals)
        df["DOSFeaturizer|cbm_character_1"] = df[
           "DOSFeaturizer|cbm_character_1"
           ].map(_orbitals)

        # Splitting one feature into several floating features
        # e.g. number;number;number into three columns
        splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"]

        for column in splitColumns:
            try:
                newColumns = df[column].str.split(";", n = 2, expand = True)
                for i in range(0,3):
                    df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float)
            except:
                continue
        df = df.drop(splitColumns, axis=1)
        df = df.drop(["dos"], axis=1)
        return clean_df(df)

    def featurize_bandstructure(self, df):
        """Applies the preset band structure featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_bandstructure(df)

        def _int_map(x):
            if str(x) == "False":
                return 0
            elif str(x) == "True":
                return 1

        df["BandFeaturizer|is_gap_direct"] = df[
            "BandFeaturizer|is_gap_direct"
        ].map(_int_map)


        df = df.drop(["bandstructure"], axis=1)

        return clean_df(df)


    def featurize_site(self, df):
        """Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return clean_df(df)
コード例 #4
0
ファイル: test_bandstructure.py プロジェクト: FilipchukB/P1
    def test_BandFeaturizer(self):
        # silicon:
        bs_featurizer = BandFeaturizer(kpoints=self.si_kpts, nbands=5)
        self.assertTrue(len(bs_featurizer.feature_labels()) > 0)
        df_bf = bs_featurizer.featurize_dataframe(self.df, col_id='bs_line')
        self.assertAlmostEqual(df_bf['band_gap'][0], 0.612, 3)
        self.assertAlmostEqual(df_bf['direct_gap'][0], 2.557, 3)
        self.assertAlmostEqual(df_bf['n_ex1_norm'][0], 0.58413, 5)
        self.assertAlmostEqual(df_bf['p_ex1_norm'][0], 0.0, 5)
        self.assertEqual(df_bf['is_gap_direct'][0], False)
        self.assertEqual(df_bf['n_ex1_degen'][0], 6)
        self.assertEqual(df_bf['p_ex1_degen'][0], 1)
        # \\Gamma:
        self.assertAlmostEqual(df_bf['n_0.0;0.0;0.0_en4'][0], 2.5169, 4)
        self.assertAlmostEqual(df_bf['n_0.0;0.0;0.0_en1'][0], 1.945, 4)
        self.assertEqual(df_bf['p_0.0;0.0;0.0_en1'][0], 0.0)
        self.assertEqual(df_bf['p_0.0;0.0;0.0_en2'][0], 0.0)
        self.assertEqual(df_bf['p_0.0;0.0;0.0_en4'][0], -11.8118)
        # K:
        self.assertAlmostEqual(df_bf['p_0.375;0.375;0.75_en1'][0], -2.3745, 4)
        # X:
        self.assertAlmostEqual(df_bf['n_0.5;0.0;0.5_en2'][0], 0.1409, 4)
        self.assertAlmostEqual(df_bf['n_0.5;0.0;0.5_en1'][0], 0.1409, 4)
        self.assertAlmostEqual(df_bf['p_0.5;0.0;0.5_en1'][0], -2.7928, 4)
        # U:
        self.assertAlmostEqual(df_bf['p_0.625;0.25;0.625_en1'][0], -2.3745, 4)
        self.assertAlmostEqual(df_bf['p_0.625;0.25;0.625_en4'][0], -8.1598, 4)
        self.assertTrue(math.isnan(df_bf['p_0.625;0.25;0.625_en5'][0]))
        # L:
        self.assertAlmostEqual(df_bf['n_0.5;0.5;0.5_en2'][0], 2.7381, 4)
        self.assertAlmostEqual(df_bf['n_0.5;0.5;0.5_en1'][0], 0.8534, 4)
        self.assertAlmostEqual(df_bf['p_0.5;0.5;0.5_en1'][0], -1.1779, 4)
        # W:
        self.assertAlmostEqual(df_bf['n_0.5;0.25;0.75_en1'][0], 3.6587, 4)

        # VBr2 with unoccupied Spin.down electrons for ib<ib_VBM but E>E_CBM:
        bs_featurizer = BandFeaturizer(kpoints=self.vbr2kpts, nbands=3)
        df_bf2 = bs_featurizer.featurize_dataframe(self.df2, col_id='bs_line')
        self.assertTrue(math.isnan(df_bf2['p_ex1_degen'][0]))
        # \\Gamma:
        self.assertAlmostEqual(df_bf2['n_0.0;0.0;0.0_en3'][0], 0.8020, 4)
        self.assertAlmostEqual(df_bf2['n_0.0;0.0;0.0_en2'][0], 0.4243, 4)
        self.assertAlmostEqual(df_bf2['n_0.0;0.0;0.0_en1'][0], 0.4243, 4)
        self.assertAlmostEqual(df_bf2['p_0.0;0.0;0.0_en1'][0], -0.3312, 4)
        self.assertAlmostEqual(df_bf2['p_0.0;0.0;0.0_en2'][0], -0.6076, 4)
        self.assertAlmostEqual(df_bf2['p_0.0;0.0;0.0_en3'][0], -0.6076, 4)
        # M:
        self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.0_en3'][0], 0.5524, 4)
        self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.0_en2'][0], 0.5074, 4)
        self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.0_en1'][0], 0.2985, 4)
        self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.0_en1'][0], -0.0636, 4)
        self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.0_en2'][0], -0.1134, 4)
        self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.0_en3'][0], -0.8091, 4)
        # between \\Gamma and M:
        self.assertAlmostEqual(df_bf2['n_0.2;0.0;0.0_en3'][0], 0.6250, 4)
        self.assertAlmostEqual(df_bf2['n_0.2;0.0;0.0_en2'][0], 0.3779, 4)
        self.assertAlmostEqual(df_bf2['n_0.2;0.0;0.0_en1'][0], 0.1349, 4)
        self.assertAlmostEqual(df_bf2['p_0.2;0.0;0.0_en1'][0], -0.1049, 4)
        self.assertAlmostEqual(df_bf2['p_0.2;0.0;0.0_en2'][0], -0.3044, 4)
        self.assertAlmostEqual(df_bf2['p_0.2;0.0;0.0_en3'][0], -0.6399, 4)
        # L:
        self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.5_en2'][0], 0.4448, 4)
        self.assertAlmostEqual(df_bf2['n_0.5;0.0;0.5_en1'][0], 0.3076, 4)
        self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.5_en1'][0], -0.0639, 4)
        self.assertAlmostEqual(df_bf2['p_0.5;0.0;0.5_en2'][0], -0.1133, 4)