예제 #1
0
 def test_band_center(self):
     df_band_center = BandCenter().featurize_dataframe(self.df,
                                                       col_id="composition")
     self.assertAlmostEqual(df_band_center["band center"][0], -2.672486385)
     self.assertAlmostEqual(
         BandCenter().featurize(Composition('Ag33O500V200'))[0],
         -2.7337150991)
예제 #2
0
def featurize_composition(df: pd.DataFrame) -> pd.DataFrame:
    """ Decorate input `pandas.DataFrame` of structures with composition
    features from matminer.

    Currently applies the set of all matminer composition features.

    Args:
        df (pandas.DataFrame): the input dataframe with `"structure"`
            column containing `pymatgen.Structure` objects.

    Returns:
        pandas.DataFrame: the decorated DataFrame.

    """
    logging.info("Applying composition featurizers...")
    df = df.copy()
    df['composition'] = df['structure'].apply(lambda s: s.composition)
    featurizer = MultipleFeaturizer([ElementProperty.from_preset("magpie"),
                                     AtomicOrbitals(),
                                     BandCenter(),
                                     # ElectronAffinity(), - This descriptor was not used in the paper preset
                                     Stoichiometry(),
                                     ValenceOrbital(),
                                     IonProperty(),
                                     ElementFraction(),
                                     TMetalFraction(),
                                     # CohesiveEnergy(), - This descriptor was not used in the paper preset
                                     Miedema(),
                                     YangSolidSolution(),
                                     AtomicPackingEfficiency(),
                                     ])

    df = featurizer.featurize_dataframe(df, "composition", multiindex=True, ignore_errors=True)
    df.columns = df.columns.map('|'.join).str.strip('|')

    ox_featurizer = MultipleFeaturizer([OxidationStates(),
                                        ElectronegativityDiff()
                                        ])

    df = CompositionToOxidComposition().featurize_dataframe(df, "Input Data|composition")

    df = ox_featurizer.featurize_dataframe(df, "composition_oxid", multiindex=True, ignore_errors=True)
    df = df.rename(columns={'Input Data': ''})
    df.columns = df.columns.map('|'.join).str.strip('|')

    _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

    df['AtomicOrbitals|HOMO_character'] = df['AtomicOrbitals|HOMO_character'].map(_orbitals)
    df['AtomicOrbitals|LUMO_character'] = df['AtomicOrbitals|LUMO_character'].map(_orbitals)

    df['AtomicOrbitals|HOMO_element'] = df['AtomicOrbitals|HOMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )
    df['AtomicOrbitals|LUMO_element'] = df['AtomicOrbitals|LUMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )

    df = df.replace([np.inf, -np.inf, np.nan], 0)

    return clean_df(df)
예제 #3
0
 def __init__(self, dopings=None, eref="midgap", T=300, return_eref=False):
     """
     Args:
         dopings ([float]): list of doping concentrations 1/cm3. Note that a
             negative concentration is treated as electron majority carrier
             (n-type) and positive for holes (p-type)
         eref (str or int or float): energy alignment reference. Defaults
             to midgap (equilibrium fermi). A fixed number can also be used.
             str options: "midgap", "vbm", "cbm", "dos_fermi", "band_center"
         T (float): absolute temperature in Kelvin
         return_eref: if True, instead of aligning the fermi levels based
             on eref, it (eref) will be explicitly returned as a feature
     """
     self.dopings = dopings or [-1e20, 1e20]
     self.eref = eref
     self.T = T
     self.return_eref = return_eref
     self.BC = BandCenter()
예제 #4
0
	def __init__(self,radius_type='ionic_radius',normalize_formula=False):
		self.radius_type = radius_type
		self.normalize_formula = normalize_formula
		self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		#custom ElementProperty featurizer
		elemental_properties = ['BoilingT', 'MeltingT',
			'BulkModulus', 'ShearModulus', 
			'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
			'Density','MolarVolume',
			'FusionEnthalpy','HeatVaporization',
			'Polarizability', 
			'ThermalConductivity']
		self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties,
						  stats=["mean", "std_dev"])
		#check matminer featurizers
		self.check_matminer_featurizers()
예제 #5
0
	def __init__(self,normalize_formula=False):
		self.normalize_formula = normalize_formula
		# don't need ValenceOrbital - valence counts etc. covered in ElementProperty.from_preset('magpie')
		# self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		# ElementProperty featurizer with magpie properties plus additional properties
		self.ElementProperty = ElementProperty.from_preset('magpie')
		self.ElementProperty.features += ['BoilingT', 
					'BulkModulus', 'ShearModulus', 
					'Density','MolarVolume',
					'FusionEnthalpy','HeatVaporization',
					'Polarizability', 
					'ThermalConductivity']
		# range, min, max are irrelevant inside the ternary
		# self.ElementProperty.stats = ['mean', 'avg_dev','mode']

		# check matminer featurizers
		self.check_matminer_featurizers()
예제 #6
0
fdf = os_feat.featurize_dataframe(fdf, 'composition_oxid', ignore_errors=True)
# -- end F3

# -- start F4 --
from matminer.featurizers.composition import AtomicOrbitals

ao_feat = AtomicOrbitals()
fdf = ao_feat.featurize_dataframe(fdf,
                                  col_id='composition',
                                  ignore_errors=True)
# -- end F4

# -- start F5
from matminer.featurizers.composition import BandCenter

bce_feat = BandCenter()
fdf = bce_feat.featurize_dataframe(fdf,
                                   col_id='composition',
                                   ignore_errors=True)
# -- end F5

# -- start F6
from matminer.featurizers.composition import ElectronegativityDiff

eld_feat = ElectronegativityDiff()
fdf = eld_feat.featurize_dataframe(fdf,
                                   col_id='composition',
                                   ignore_errors=True)
# -- end F6

# -- start F7
예제 #7
0
 def test_band_center(self):
     df_band_center = BandCenter().featurize_dataframe(self.df,
                                                       col_id="composition")
     self.assertAlmostEqual(df_band_center["band center"][0], -2.672486385)
예제 #8
0
 def __init__(self, dopings=None, eref="midgap", T=300, return_eref=False):
     self.dopings = dopings or [-1e20, 1e20]
     self.eref = eref
     self.T = T
     self.return_eref = return_eref
     self.BC = BandCenter()
예제 #9
0
class DopingFermi(BaseFeaturizer):
    """
    The fermi level (w.r.t. selected reference energy) associated with a
    specified carrier concentration (1/cm3) and temperature. This featurizar
    requires the total density of states and structure. The Structure
    as dos.structure (e.g. in CompleteDos) is required by FermiDos class.

    Args:
        dopings ([float]): list of doping concentrations 1/cm3. Note that a
            negative concentration is treated as electron majority carrier
            (n-type) and positive for holes (p-type)
        eref (str or int or float): energy alignment reference. Defaults
            to midgap (equilibrium fermi). A fixed number can also be used.
            str options: "midgap", "vbm", "cbm", "dos_fermi", "band_center"
        T (float): absolute temperature in Kelvin
        return_eref: if True, instead of aligning the fermi levels based
            on eref, it (eref) will be explicitly returned as a feature

    Returns (featurize returns [float] and featurize_labels returns [str]):
        examples:
            fermi_c-1e+20T300 (float): the fermi level for the electron
                concentration of 1e20 and the temperature of 300K.
            fermi_c1e+18T600 (float): the fermi level for the hole concentration
                of 1e18 and the temperature of 600K.
            midgap eref (float): if return_eref==True then eref (midgap here)
                energy is returned. In this case other fermi levels returned are
                absolute as opposed to relative to eref (i.e. if not return_eref)
    """
    def __init__(self, dopings=None, eref="midgap", T=300, return_eref=False):
        self.dopings = dopings or [-1e20, 1e20]
        self.eref = eref
        self.T = T
        self.return_eref = return_eref
        self.BC = BandCenter()

    def featurize(self, dos, bandgap=None):
        """
        Args:
            dos (pymatgen Dos, CompleteDos or FermiDos):
            bandgap (float): for example the experimentally measured band gap
                or one that is calculated via more accurate methods than the
                one used to generate dos. dos will be scissored to have the
                same electronic band gap as bandgap.

        Returns ([float]): features are fermi levels in eV at the given
            concentrations and temperature + eref in eV if return_eref
        """
        dos = FermiDos(dos, bandgap=bandgap)
        feats = []
        eref = 0.0
        for c in self.dopings:
            fermi = dos.get_fermi(c=c, T=self.T, nstep=50)
            if isinstance(self.eref, str):
                if self.eref == "dos_fermi":
                    eref = dos.efermi
                elif self.eref in ["midgap", "vbm", "cbm"]:
                    ecbm, evbm = dos.get_cbm_vbm()
                    if self.eref == "midgap":
                        eref = (evbm + ecbm) / 2.0
                    elif self.eref == "vbm":
                        eref = evbm
                    elif self.eref == "cbm":
                        eref = ecbm
                elif self.eref == "band center":
                    eref = self.BC.featurize(dos.structure.composition)[0]
                else:
                    raise ValueError('Unsupported "eref": {}'.format(
                        self.eref))
            else:
                eref = self.eref
            if not self.return_eref:
                fermi -= eref
            feats.append(fermi)
        if self.return_eref:
            feats.append(eref)
        return feats

    def feature_labels(self):
        """
        Returns ([str]): list of names of the features generated by featurize
            example: "fermi_c-1e+20T300" that is the fermi level for the
            electron concentration of 1e20 (c-1e+20) and temperature of 300K.
        """
        labels = []
        for c in self.dopings:
            labels.append("fermi_c{}T{}".format(c, self.T))
        if self.return_eref:
            labels.append("{} eref".format(self.eref))
        return labels

    def implementors(self):
        return ["Alireza Faghaninia"]

    def citations(self):
        return []
예제 #10
0
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer):

    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        CohesiveEnergy,
        ElectronAffinity,
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        BagofBonds,
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )
    from matminer.featurizers.dos import (
        DOSFeaturizer,
        SiteDOS,
        Hybridization,
        DosAsymmetry,
    )
    from matminer.featurizers.bandstructure import (
        BandFeaturizer,
        BranchPointEnergy
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxid_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        #PartialRadialDistributionFunction(), #Introduces a large amount of features
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    dos_featurizers = (
        DOSFeaturizer(),
        SiteDOS(),
        Hybridization()
    )

    band_featurizers = (
        BandFeaturizer(),
        BranchPointEnergy()
    )
    def __init__(self, n_jobs=None):
            self._n_jobs = n_jobs

    def featurize_composition(self, df):
        """Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
            _orbitals
        )
        df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
            _orbitals
        )

        df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )
        df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )

        return clean_df(df)

    def featurize_structure(self, df):
        """Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_structure(df)

        dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][
            "distances"
        ][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d
            )
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"
            ].apply(lambda x: x["distribution"][i])

        df = df.drop("RadialDistributionFunction|radial distribution function", axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7,
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"
        ].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"
        ].map(_int_map)

        return clean_df(df)

    def featurize_dos(self, df):
        """Applies the presetdos featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_dos(df)


        hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"]

        one_hot = pd.get_dummies(df[hotencodeColumns])
        df = df.drop(hotencodeColumns, axis = 1).join(one_hot)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

        df["DOSFeaturizer|vbm_character_1"] = df[
           "DOSFeaturizer|vbm_character_1"
           ].map(_orbitals)
        df["DOSFeaturizer|cbm_character_1"] = df[
           "DOSFeaturizer|cbm_character_1"
           ].map(_orbitals)

        # Splitting one feature into several floating features
        # e.g. number;number;number into three columns
        splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"]

        for column in splitColumns:
            try:
                newColumns = df[column].str.split(";", n = 2, expand = True)
                for i in range(0,3):
                    df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float)
            except:
                continue
        df = df.drop(splitColumns, axis=1)
        df = df.drop(["dos"], axis=1)
        return clean_df(df)

    def featurize_bandstructure(self, df):
        """Applies the preset band structure featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_bandstructure(df)

        def _int_map(x):
            if str(x) == "False":
                return 0
            elif str(x) == "True":
                return 1

        df["BandFeaturizer|is_gap_direct"] = df[
            "BandFeaturizer|is_gap_direct"
        ].map(_int_map)


        df = df.drop(["bandstructure"], axis=1)

        return clean_df(df)


    def featurize_site(self, df):
        """Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return clean_df(df)
예제 #11
0
def predict_log10_eps(
    target: Union[Structure, Composition],
    dielectric_type: str,
    model_type: str,
) -> float:
    """
    :param target: structure or composition to predict dielectric constants
    :param dielectric_type: "el" or "ion"
    :param model_type: "comp" or "comp_st"
    :return: Descriptor vector
    """
    if dielectric_type not in ["el", "ion"]:
        raise ValueError(
            f'Specify dielectric type "el" or "ion"\nInput: {dielectric_type}')
    if model_type not in ["comp", "comp_st"]:
        raise ValueError(
            f'Specify regression_type "comp" or "comp_st"\nInput: {model_type}'
        )

    if model_type == "comp":
        if isinstance(target, Structure):
            comp = target.composition
        else:
            comp = target
        comp_oxi = comp.add_charges_from_oxi_state_guesses()
        if dielectric_type == "el":
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            valence = ScalarFeaturizer(ValenceOrbital(), comp)
            ion_prop = ScalarFeaturizer(IonProperty(), comp)
            en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi)
            oxi_state = ScalarFeaturizer(OxidationStates.from_preset("deml"),
                                         comp_oxi)
            atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp)
            descriptor = [
                ep.get_from_label("PymatgenData minimum X"),
                ep.get_from_label("PymatgenData range X"),
                ep.get_from_label("PymatgenData std_dev X"),
                ep.get_from_label("PymatgenData mean row"),
                ep.get_from_label("PymatgenData std_dev row"),
                ep.get_from_label("PymatgenData mean group"),
                ep.get_from_label("PymatgenData mean block"),
                ep.get_from_label("PymatgenData std_dev block"),
                ep.get_from_label("PymatgenData mean atomic_mass"),
                ep.get_from_label("PymatgenData std_dev atomic_mass"),
                ep.get_from_label("PymatgenData std_dev atomic_radius"),
                ep.get_from_label("PymatgenData minimum mendeleev_no"),
                ep.get_from_label("PymatgenData range mendeleev_no"),
                ep.get_from_label("PymatgenData std_dev mendeleev_no"),
                ep.get_from_label("PymatgenData mean thermal_conductivity"),
                ep.get_from_label("PymatgenData std_dev thermal_conductivity"),
                ep.get_from_label("PymatgenData mean melting_point"),
                ep.get_from_label("PymatgenData std_dev melting_point"),
                valence.get_from_label("avg s valence electrons"),
                valence.get_from_label("avg d valence electrons"),
                valence.get_from_label("frac s valence electrons"),
                valence.get_from_label("frac p valence electrons"),
                valence.get_from_label("frac d valence electrons"),
                ion_prop.get_from_label("avg ionic char"),
                TMetalFraction().featurize(comp)[0],
                en_diff.get_from_label("maximum EN difference"),
                en_diff.get_from_label("range EN difference"),
                en_diff.get_from_label("mean EN difference"),
                en_diff.get_from_label("std_dev EN difference"),
                BandCenter().featurize(comp)[0],
                oxi_state.get_from_label("std_dev oxidation state"),
                atomic_orbital.get_from_label("HOMO_energy"),
                atomic_orbital.get_from_label("LUMO_energy"),
                atomic_orbital.get_from_label("gap_AO"),
            ]
        elif dielectric_type == "ion":
            stoich = ScalarFeaturizer(Stoichiometry(), comp)
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            valence = ScalarFeaturizer(ValenceOrbital(), comp)
            ion_prop = ScalarFeaturizer(IonProperty(), comp)
            en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi)
            oxi_state = ScalarFeaturizer(OxidationStates.from_preset("deml"),
                                         comp_oxi)
            atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp)
            at_pack_eff = ScalarFeaturizer(AtomicPackingEfficiency(), comp)
            descriptor = [
                stoich.get_from_label("3-norm"),
                stoich.get_from_label("5-norm"),
                ep.get_from_label("PymatgenData mean X"),
                ep.get_from_label("PymatgenData mean row"),
                ep.get_from_label("PymatgenData std_dev row"),
                ep.get_from_label("PymatgenData std_dev group"),
                ep.get_from_label("PymatgenData mean block"),
                ep.get_from_label("PymatgenData std_dev block"),
                ep.get_from_label("PymatgenData maximum atomic_mass"),
                ep.get_from_label("PymatgenData range atomic_mass"),
                ep.get_from_label("PymatgenData mean atomic_mass"),
                ep.get_from_label("PymatgenData std_dev atomic_mass"),
                ep.get_from_label("PymatgenData maximum atomic_radius"),
                ep.get_from_label("PymatgenData range atomic_radius"),
                ep.get_from_label("PymatgenData mean atomic_radius"),
                ep.get_from_label("PymatgenData std_dev atomic_radius"),
                ep.get_from_label("PymatgenData minimum mendeleev_no"),
                ep.get_from_label("PymatgenData mean mendeleev_no"),
                ep.get_from_label("PymatgenData std_dev mendeleev_no"),
                ep.get_from_label("PymatgenData mean thermal_conductivity"),
                ep.get_from_label("PymatgenData std_dev thermal_conductivity"),
                ep.get_from_label("PymatgenData mean melting_point"),
                ep.get_from_label("PymatgenData std_dev melting_point"),
                valence.get_from_label("avg s valence electrons"),
                valence.get_from_label("frac s valence electrons"),
                valence.get_from_label("frac p valence electrons"),
                valence.get_from_label("frac d valence electrons"),
                ion_prop.get_from_label("avg ionic char"),
                TMetalFraction().featurize(comp)[0],
                en_diff.get_from_label("minimum EN difference"),
                en_diff.get_from_label("range EN difference"),
                en_diff.get_from_label("mean EN difference"),
                en_diff.get_from_label("std_dev EN difference"),
                oxi_state.get_from_label("range oxidation state"),
                oxi_state.get_from_label("std_dev oxidation state"),
                atomic_orbital.get_from_label("LUMO_energy"),
                atomic_orbital.get_from_label("gap_AO"),
                at_pack_eff.get_from_label("mean simul. packing efficiency"),
                at_pack_eff.get_from_label(
                    "mean abs simul. packing efficiency"),
                at_pack_eff.get_from_label(
                    "dist from 1 clusters |APE| < 0.010"),
                at_pack_eff.get_from_label(
                    "dist from 3 clusters |APE| < 0.010"),
                at_pack_eff.get_from_label(
                    "dist from 5 clusters |APE| < 0.010"),
            ]
    elif model_type == "comp_st":
        if isinstance(target, Composition):
            raise ValueError(
                'comp_st (Using compositional and structural descriptor) is specified, '
                'but target is composition')
        comp: Composition = target.composition
        comp_oxi = comp.add_charges_from_oxi_state_guesses()
        target_orig = deepcopy(target)
        target.add_oxidation_state_by_guess()
        if dielectric_type == "el":
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            valence = ScalarFeaturizer(ValenceOrbital(), comp)
            en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi)
            atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp)
            density = ScalarFeaturizer(DensityFeatures(), target)
            dist_btw_nn = MinimumRelativeDistances().featurize(target_orig)
            opsf = SiteFeaturizer(OPSiteFingerprint(), target)
            voro_fp = SiteFeaturizer(VoronoiFingerprint(use_symm_weights=True),
                                     target)
            gsf = SiteFeaturizer(GaussianSymmFunc(), target)
            lpd = SiteFeaturizer(
                LocalPropertyDifference.from_preset("ward-prb-2017"), target)
            descriptor = [
                ep.get_from_label("PymatgenData std_dev X"),
                ep.get_from_label("PymatgenData mean block"),
                ep.get_from_label("PymatgenData std_dev atomic_mass"),
                valence.get_from_label("frac d valence electrons"),
                TMetalFraction().featurize(comp)[0],
                en_diff.get_from_label("maximum EN difference"),
                en_diff.get_from_label("mean EN difference"),
                atomic_orbital.get_from_label("HOMO_energy"),
                atomic_orbital.get_from_label("LUMO_energy"),
                density.get_from_label("density"),
                np.mean(dist_btw_nn),
                np.std(dist_btw_nn),
                opsf.get_from_label_func("tetrahedral CN_4", np.max),
                opsf.get_from_label_func("rectangular see-saw-like CN_4",
                                         np.max),
                np.max([
                    EwaldSiteEnergy(accuracy=4).featurize(target, i)
                    for i in range(target.num_sites)
                ]),
                voro_fp.get_from_label_func("Voro_area_std_dev", np.max),
                voro_fp.get_from_label_func("Voro_area_std_dev", np.mean),
                voro_fp.get_from_label_func("Voro_dist_minimum", np.min),
                voro_fp.get_from_label_func("Voro_dist_minimum", np.std),
                gsf.get_from_label_func("G2_20.0", np.std),
                gsf.get_from_label_func("G2_80.0", np.max),
                gsf.get_from_label_func("G4_0.005_4.0_-1.0", np.mean),
                lpd.get_from_label_func("local difference in NdValence",
                                        np.mean),
                lpd.get_from_label_func("local difference in NValence",
                                        np.min),
                lpd.get_from_label_func("local difference in NValence",
                                        np.std),
                lpd.get_from_label_func("local difference in NdUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.min),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in GSmagmom",
                                        np.mean)
            ]
        elif dielectric_type == "ion":
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            atomic_orbitals = ScalarFeaturizer(AtomicOrbitals(), comp)
            density = ScalarFeaturizer(DensityFeatures(), target)
            str_het = ScalarFeaturizer(StructuralHeterogeneity(), target)
            opsf = SiteFeaturizer(OPSiteFingerprint(), target)
            voro_fp = SiteFeaturizer(VoronoiFingerprint(use_symm_weights=True),
                                     target)
            gsf = SiteFeaturizer(GaussianSymmFunc(), target)
            lpd = SiteFeaturizer(
                LocalPropertyDifference.from_preset("ward-prb-2017"), target)
            descriptor = [
                ep.get_from_label("PymatgenData std_dev row"),
                ep.get_from_label("PymatgenData mean thermal_conductivity"),
                ep.get_from_label("PymatgenData std_dev melting_point"),
                TMetalFraction().featurize(comp)[0],
                atomic_orbitals.get_from_label("gap_AO"),
                density.get_from_label("density"),
                density.get_from_label("packing fraction"),
                str_het.get_from_label("mean neighbor distance variation"),
                str_het.get_from_label("avg_dev neighbor distance variation"),
                opsf.get_from_label_func("sgl_bd CN_1", np.mean),
                opsf.get_from_label_func("bent 150 degrees CN_2", np.mean),
                opsf.get_from_label_func("linear CN_2", np.mean),
                opsf.get_from_label_func("trigonal planar CN_3", np.mean),
                opsf.get_from_label_func("pentagonal planar CN_5", np.std),
                opsf.get_from_label_func("octahedral CN_6", np.max),
                opsf.get_from_label_func("octahedral CN_6", np.std),
                opsf.get_from_label_func("q6 CN_12", np.mean),
                np.max([
                    EwaldSiteEnergy(accuracy=4).featurize(target, i)
                    for i in range(target.num_sites)
                ]),
                voro_fp.get_from_label_func("Symmetry_weighted_index_4",
                                            np.std),
                voro_fp.get_from_label_func("Voro_vol_maximum", np.mean),
                voro_fp.get_from_label_func("Voro_area_std_dev", np.mean),
                voro_fp.get_from_label_func("Voro_area_minimum", np.std),
                voro_fp.get_from_label_func("Voro_area_maximum", np.min),
                voro_fp.get_from_label_func("Voro_dist_std_dev", np.mean),
                gsf.get_from_label_func("G2_80.0", np.min),
                gsf.get_from_label_func("G4_0.005_4.0_1.0", np.std),
                lpd.get_from_label_func("local difference in Number", np.max),
                lpd.get_from_label_func("local difference in MendeleevNumber",
                                        np.max),
                lpd.get_from_label_func("local difference in MendeleevNumber",
                                        np.min),
                lpd.get_from_label_func("local difference in AtomicWeight",
                                        np.max),
                lpd.get_from_label_func("local difference in AtomicWeight",
                                        np.mean),
                lpd.get_from_label_func("local difference in MeltingT",
                                        np.mean),
                lpd.get_from_label_func("local difference in Row", np.max),
                lpd.get_from_label_func(
                    "local difference in Electronegativity", np.min),
                lpd.get_from_label_func("local difference in NValence",
                                        np.std),
                lpd.get_from_label_func("local difference in NsUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in NdUnfilled",
                                        np.max),
                lpd.get_from_label_func("local difference in NdUnfilled",
                                        np.std),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.max),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.min),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.std),
                lpd.get_from_label_func("local difference in GSvolume_pa",
                                        np.max),
                lpd.get_from_label_func("local difference in GSvolume_pa",
                                        np.min),
                lpd.get_from_label_func("local difference in SpaceGroupNumber",
                                        np.max),
            ]
    with open(
            f"{os.path.dirname(__file__)}/{dielectric_type}_{model_type}.joblib",
            "rb") as fr:
        model: RandomForestRegressor = joblib.load(fr)
    with open(
            f"{os.path.dirname(__file__)}/{dielectric_type}_{model_type}_scaler.joblib",
            "rb") as fr:
        scaler: StandardScaler = joblib.load(fr)
    descriptor = scaler.transform([descriptor])
    return model.predict(descriptor)[0]
    structlist.append([Structure.from_file(directoryname + i)
                       ])  #Converts CIF to pymatgen structure object
    namelist.append(os.path.splitext(i)[0])  #Collects all the structure names
    structs.append(Structure.from_file(directoryname + i))
#Creates Pandas dataframe with data being a list of structures and the row name being the structure name
dftest = pd.DataFrame(data=structlist, index=namelist, columns=namecolumns)

p = PartialRadialDistributionFunction()
p.fit(np.asarray(structs))

c = CoulombMatrix()
c.fit(np.asarray(structs))

erdf = ElectronicRadialDistributionFunction()
erdf.cutoff = 10  #longest diagonal of lattice...I picked a number semi-arbitrarily

#Featurizes the structures
featurizer = MultipleFeaturizer([
    ElementProperty.from_preset('magpie'),
    OxidationStates(),
    AtomicOrbitals(),
    BandCenter(),
    ElectronegativityDiff(),
    DensityFeatures(),
    RadialDistributionFunction(), p, c, erdf
])

r = (featurizer.featurize_many(dftest, ['structure'])
     )  #Featurizes entire Pandas Dataframe
#Yay it runs!
예제 #13
0
파일: dos.py 프로젝트: ardunn/MatMiner
 def __init__(self, dopings=None, eref="midgap", T=300, return_eref=False):
     self.dopings = dopings or [-1e20, 1e20]
     self.eref = eref
     self.T = T
     self.return_eref = return_eref
     self.BC = BandCenter()
예제 #14
0
파일: dos.py 프로젝트: ardunn/MatMiner
class DopingFermi(BaseFeaturizer):
    """
    The fermi level (w.r.t. selected reference energy) associated with a
    specified carrier concentration (1/cm3) and temperature. This featurizar
    requires the total density of states and structure. The Structure
    as dos.structure (e.g. in CompleteDos) is required by FermiDos class.

    Args:
        dopings ([float]): list of doping concentrations 1/cm3. Note that a
            negative concentration is treated as electron majority carrier
            (n-type) and positive for holes (p-type)
        eref (str or int or float): energy alignment reference. Defaults
            to midgap (equilibrium fermi). A fixed number can also be used.
            str options: "midgap", "vbm", "cbm", "dos_fermi", "band_center"
        T (float): absolute temperature in Kelvin
        return_eref: if True, instead of aligning the fermi levels based
            on eref, it (eref) will be explicitly returned as a feature

    Returns (featurize returns [float] and featurize_labels returns [str]):
        examples:
            fermi_c-1e+20T300 (float): the fermi level for the electron
                concentration of 1e20 and the temperature of 300K.
            fermi_c1e+18T600 (float): fermi level for the hole concentration
                of 1e18 and the temperature of 600K.
            midgap eref (float): if return_eref==True then eref (midgap here)
                energy is returned. In this case, fermi levels are absolute as
                opposed to relative to eref (i.e. if not return_eref)
    """
    def __init__(self, dopings=None, eref="midgap", T=300, return_eref=False):
        self.dopings = dopings or [-1e20, 1e20]
        self.eref = eref
        self.T = T
        self.return_eref = return_eref
        self.BC = BandCenter()

    def featurize(self, dos, bandgap=None):
        """
        Args:
            dos (pymatgen Dos, CompleteDos or FermiDos):
            bandgap (float): for example the experimentally measured band gap
                or one that is calculated via more accurate methods than the
                one used to generate dos. dos will be scissored to have the
                same electronic band gap as bandgap.

        Returns ([float]): features are fermi levels in eV at the given
            concentrations and temperature + eref in eV if return_eref
        """
        dos = FermiDos(dos, bandgap=bandgap)
        feats = []
        eref = 0.0
        for c in self.dopings:
            fermi = dos.get_fermi(c=c, T=self.T, nstep=50)
            if isinstance(self.eref, str):
                if self.eref == "dos_fermi":
                    eref = dos.efermi
                elif self.eref in ["midgap", "vbm", "cbm"]:
                    ecbm, evbm = dos.get_cbm_vbm()
                    if self.eref == "midgap":
                        eref = (evbm + ecbm) / 2.0
                    elif self.eref == "vbm":
                        eref = evbm
                    elif self.eref == "cbm":
                        eref = ecbm
                elif self.eref == "band center":
                    eref = self.BC.featurize(dos.structure.composition)[0]
                else:
                    raise ValueError('Unsupported "eref": {}'.format(self.eref))
            else:
                eref = self.eref
            if not self.return_eref:
                fermi -= eref
            feats.append(fermi)
        if self.return_eref:
            feats.append(eref)
        return feats

    def feature_labels(self):
        """
        Returns ([str]): list of names of the features generated by featurize
            example: "fermi_c-1e+20T300" that is the fermi level for the
            electron concentration of 1e20 (c-1e+20) and temperature of 300K.
        """
        labels = []
        for c in self.dopings:
            labels.append("fermi_c{}T{}".format(c, self.T))
        if self.return_eref:
            labels.append("{} eref".format(self.eref))
        return labels

    def implementors(self):
        return ["Alireza Faghaninia"]

    def citations(self):
        return []
예제 #15
0
class GenericFeaturizer(BaseFeaturizer):
	"""
	Featurizer to use generic properties available in matminer featurizers; no features from BCA class utilized
	"""
	def __init__(self,normalize_formula=False):
		self.normalize_formula = normalize_formula
		# don't need ValenceOrbital - valence counts etc. covered in ElementProperty.from_preset('magpie')
		# self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		# ElementProperty featurizer with magpie properties plus additional properties
		self.ElementProperty = ElementProperty.from_preset('magpie')
		self.ElementProperty.features += ['BoilingT', 
					'BulkModulus', 'ShearModulus', 
					'Density','MolarVolume',
					'FusionEnthalpy','HeatVaporization',
					'Polarizability', 
					'ThermalConductivity']
		# range, min, max are irrelevant inside the ternary
		# self.ElementProperty.stats = ['mean', 'avg_dev','mode']

		# check matminer featurizers
		self.check_matminer_featurizers()
		
	def featurize(self,composition):
		# use BCA just to get composition and metal_composition
		bca = BCA(composition,'ionic_radius',self.normalize_formula)
		
		ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) # H**O and LUMO character and energy levels for metals from atomic orbitals)
		ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)] # exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals
		ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) # avg metal elemental cohesive energy
		bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition)
		ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition)
		ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition)
		
		mm_features = ao_features + ce_features + bc_features + ve_features + ep_features 
		
		return mm_features
	
	def feature_labels(self):
		"""
		Feature labels for matminer-derived features
		"""
		labels = [
			#AtomicOrbitals labels
			#'M_HOMO_character',
			'M_HOMO_energy',
			#'M_LUMO_character',
			'M_LUMO_energy',
			'M_AO_gap',
			#CohesiveEnergy labels
			'M_cohesive_energy_mean',
			#BandCenter labels
			'M_BandCenter',
			'BCA_BandCenter',
			#ValenceOrbitalEnergy labels
			'M_ValenceEnergy_mean',
			'BCA_ValenceEnergy_mean'
			]
			
		labels += [f'M {l}' for l in self.ElementProperty.feature_labels()]
		labels += [f'BCA {l}' for l in self.ElementProperty.feature_labels()]
		
		return labels	
	
	@property
	def matminer_units(self):
		"""
		Feature units for matminer-derived features
		"""
		units = [
			#ValenceOrbital units
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			#AtomicOrbitals units
			#'M_HOMO_character',
			'energy',
			#'M_LUMO_character',
			'energy',
			'energy',
			#CohesiveEnergy units
			'energy',
			#BandCenter units
			'energy',
			'energy',
			#ValenceOrbitalEnergy units
			'energy',
			'energy'
			]
			
		units += self.ElementProperty_units
		
		return units
		
	def feature_units(self):
		bca_units = BCA(mg.Composition('BaO')).feature_units()
		
		return bca_units + self.matminer_units
		
	def check_matminer_featurizers(self):
		"""
		Check that features and feature order for matminer featurizers are as expected
		If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels()
		"""
		#verify that matminer feature labels haven't changed
		if self.AtomicOrbitals.feature_labels() != ['HOMO_character',
											 'HOMO_element',
											 'HOMO_energy',
											 'LUMO_character',
											 'LUMO_element',
											 'LUMO_energy',
											 'gap_AO']:
			raise Exception('AtomicOrbitals features or labels have changed')

		if self.CohesiveEnergy.feature_labels() != ['cohesive energy']:
			raise Exception('CohesiveEnergy features or labels have changed')
			
		if self.BandCenter.feature_labels() != ['band center']:
			raise Exception('BandCenter features or labels have changed')
			
	def citations(self):
		featurizers = [self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy]
		citations = sum([f.citations() for f in featurizers],[])
		# add pymatgen citation
		citations += [
			"@article{Ong2012b,"
				"author = {Ong, Shyue Ping and Richards, William Davidson and Jain, Anubhav and Hautier, Geoffroy and Kocher, Michael and Cholia, Shreyas and Gunter, Dan and Chevrier, Vincent L. and Persson, Kristin A. and Ceder, Gerbrand},"
				"doi = {10.1016/j.commatsci.2012.10.028},"
				"file = {:Users/shyue/Mendeley Desktop/Ong et al/Computational Materials Science/2013 - Ong et al. - Python Materials Genomics (pymatgen) A robust, open-source python library for materials analysis.pdf:pdf;:Users/shyue/Mendeley Desktop/Ong et al/Computational Materials Science/2013 - Ong et al. - Python Materials Genomics (pymatgen) A robust, open-source python library for materials analysis(2).pdf:pdf},"
				"issn = {09270256},"
				"journal = {Computational Materials Science},"
				"month = feb,"
				"pages = {314--319},"
				"title = {{Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis}},"
				"url = {http://linkinghub.elsevier.com/retrieve/pii/S0927025612006295},"
				"volume = {68},"
				"year = {2013}"
			"}"
			]
		return list(np.unique(citations))
예제 #16
0
class BCA_Featurizer(BaseFeaturizer):
	def __init__(self,radius_type='ionic_radius',normalize_formula=False):
		self.radius_type = radius_type
		self.normalize_formula = normalize_formula
		self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		#custom ElementProperty featurizer
		elemental_properties = ['BoilingT', 'MeltingT',
			'BulkModulus', 'ShearModulus', 
			'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
			'Density','MolarVolume',
			'FusionEnthalpy','HeatVaporization',
			'Polarizability', 
			'ThermalConductivity']
		self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties,
						  stats=["mean", "std_dev"])
		#check matminer featurizers
		self.check_matminer_featurizers()
		
	def featurize(self,composition):
		bca = BCA(composition,self.radius_type,self.normalize_formula)
		bca_features = bca.featurize()
		
		vo_features = self.ValenceOrbital.featurize(bca.metal_composition) #avg and frac s, p , d, f electrons for metals
		vo_features += [sum(vo_features[0:3])] #avg total valence electrons for metals
		ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) #H**O and LUMO character and energy levels for metals from atomic orbitals)
		ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)]#exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals
		ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) #avg metal elemental cohesive energy
		bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition)
		ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition)
		ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition)
		
		mm_features = vo_features + ao_features + ce_features + bc_features + ve_features + ep_features 
		
		return list(bca_features.values()) + mm_features
		
	@property
	def ElementProperty_custom_labels(self):
		"""
		Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
		elemental_property_label_map = {'BoilingT':'boil_temp','MeltingT':'melt_temp',
							'BulkModulus':'bulk_mod','ShearModulus':'shear_mod',
							'Row':'row','Column':'column','Number':'number','MendeleevNumber':'mendeleev','SpaceGroupNumber':'space_group',
							'Density':'density','MolarVolume':'molar_vol',
							'FusionEnthalpy':'H_fus','HeatVaporization':'H_vap',
							'Polarizability':'polarizability',
							'ThermalConductivity':'sigma_therm'}
							
		element_property_labels = list(map(elemental_property_label_map.get,self.ElementProperty.features))
		labels = []
		for attr in element_property_labels:
			for stat in self.ElementProperty.stats:
				if stat=='std_dev':
					stat = 'std'
				labels.append(f'M_{attr}_{stat}')
		for attr in element_property_labels:
			for stat in self.ElementProperty.stats:
				if stat=='std_dev':
					stat = 'std'
				labels.append(f'BCA_{attr}_{stat}')
		return labels
		
	@property
	def ElementProperty_units(self):
		"""
		Generate units for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
		elemental_property_unit_map = {'BoilingT':'temperature','MeltingT':'temperature',
							'BulkModulus':'pressure','ShearModulus':'pressure',
							'Row':'none','Column':'none','Number':'none','MendeleevNumber':'none','SpaceGroupNumber':'none',
							'Density':'density','MolarVolume':'volume',
							'FusionEnthalpy':'energy','HeatVaporization':'energy',
							'Polarizability':'polarizability',
							'ThermalConductivity':'therm'}
							
		element_property_units = list(map(elemental_property_unit_map.get,self.ElementProperty.features))
		units = []
		for ep_unit in element_property_units:
			for stat in self.ElementProperty.stats:
				units.append(ep_unit)
		return units*2
		
	def ElementProperty_label_check(self):
		"""
		Check that ElementProperty feature labels are as expected
		If not, features may not align with feature labels
		"""
		#ElementProperty.feature_labels() code as of 1/24/20
		labels = []
		for attr in self.ElementProperty.features:
			src = self.ElementProperty.data_source.__class__.__name__
			for stat in self.ElementProperty.stats:
				labels.append("{} {} {}".format(src, stat, attr))
		
		if labels!=self.ElementProperty.feature_labels():
			raise Exception('ElementProperty features or labels have changed')
	
	
	@property
	def matminer_labels(self):
		"""
		Feature labels for matminer-derived features
		"""
		labels = [
			#ValenceOrbital labels
			'M_ValenceElec_s_mean',
			'M_ValenceElec_p_mean',
			'M_ValenceElec_d_mean',
			'M_ValenceElec_f_mean',
			'M_ValenceElec_s_frac',
			'M_ValenceElec_p_frac',
			'M_ValenceElec_d_frac',
			'M_ValenceElec_f_frac',
			'M_ValenceElec_tot_mean',
			#AtomicOrbitals labels
			#'M_HOMO_character',
			'M_HOMO_energy',
			#'M_LUMO_character',
			'M_LUMO_energy',
			'M_AO_gap',
			#CohesiveEnergy labels
			'M_cohesive_energy_mean',
			#BandCenter labels
			'M_BandCenter',
			'BCA_BandCenter',
			#ValenceOrbitalEnergy labels
			'M_ValenceEnergy_mean',
			'BCA_ValenceEnergy_mean'
			]
			
		labels += self.ElementProperty_custom_labels
		
		return labels	
	
	@property
	def matminer_units(self):
		"""
		Feature units for matminer-derived features
		"""
		units = [
			#ValenceOrbital units
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			#AtomicOrbitals units
			#'M_HOMO_character',
			'energy',
			#'M_LUMO_character',
			'energy',
			'energy',
			#CohesiveEnergy units
			'energy',
			#BandCenter units
			'energy',
			'energy',
			#ValenceOrbitalEnergy units
			'energy',
			'energy'
			]
			
		units += self.ElementProperty_units
		
		return units
	
	def feature_labels(self):
		bca_feature_labels = list(BCA(mg.Composition('BaO'),self.radius_type,self.normalize_formula).featurize().keys())
		
		return bca_feature_labels + self.matminer_labels
		
	def feature_units(self):
		bca_units = BCA(mg.Composition('BaO')).feature_units()
		
		return bca_units + self.matminer_units
		
	def check_matminer_featurizers(self):
		"""
		Check that features and feature order for matminer featurizers are as expected
		If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels()
		"""
		#verify that matminer feature labels haven't changed
		if self.ValenceOrbital.feature_labels() != ['avg s valence electrons',
											 'avg p valence electrons',
											 'avg d valence electrons',
											 'avg f valence electrons',
											 'frac s valence electrons',
											 'frac p valence electrons',
											 'frac d valence electrons',
											 'frac f valence electrons']:
			raise Exception('ValenceOrbital features or labels have changed')
			
		if self.AtomicOrbitals.feature_labels() != ['HOMO_character',
											 'HOMO_element',
											 'HOMO_energy',
											 'LUMO_character',
											 'LUMO_element',
											 'LUMO_energy',
											 'gap_AO']:
			raise Exception('AtomicOrbitals features or labels have changed')

		if self.CohesiveEnergy.feature_labels() != ['cohesive energy']:
			raise Exception('CohesiveEnergy features or labels have changed')
			
		if self.BandCenter.feature_labels() != ['band center']:
			raise Exception('BandCenter features or labels have changed')
	
		self.ElementProperty_label_check()
		
	def citations(self):
		featurizers = [self.ValenceOrbital, self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy, BCA(mg.Composition('BaO'))]
		return list(np.unique(sum([f.citations() for f in featurizers],[])))
예제 #17
0
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer):
    """ Featurizer presets used for the paper 'Machine learning
    materials properties for small datasets' by Pierre-Paul De Breuck,
    Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020).

    Uses most of the featurizers implemented by matminer at the time of
    writing with their default hyperparameters and presets.

    """
    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        # CohesiveEnergy, - This descriptor was not used in the paper preset
        # ElectronAffinity, - This descriptor was not used in the paper preset
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        # BagofBonds, - This descriptor was not used in the paper preset
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        # PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxide_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        # PartialRadialDistributionFunction(),
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
        # BagofBonds(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    def featurize_composition(self, df):
        """ Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df['AtomicOrbitals|HOMO_character'] = df[
            'AtomicOrbitals|HOMO_character'].map(_orbitals)
        df['AtomicOrbitals|LUMO_character'] = df[
            'AtomicOrbitals|LUMO_character'].map(_orbitals)

        df['AtomicOrbitals|HOMO_element'] = df[
            'AtomicOrbitals|HOMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)
        df['AtomicOrbitals|LUMO_element'] = df[
            'AtomicOrbitals|LUMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)

        df = df.replace([np.inf, -np.inf, np.nan], 0)

        return modnet.featurizers.clean_df(df)

    def featurize_structure(self, df):
        """ Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_structure(df)

        dist = df[
            "RadialDistributionFunction|radial distribution function"].iloc[0][
                'distances'][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d)
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"].apply(
                    lambda x: x['distribution'][i])

        df = df.drop("RadialDistributionFunction|radial distribution function",
                     axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map)

        return modnet.featurizers.clean_df(df)

    def featurize_site(self, df):
        """ Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """

        # rename some features for backwards compatibility with pretrained models
        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return modnet.featurizers.clean_df(df)
def AddFeatures(df):  # Add features by Matminer
    from matminer.featurizers.conversions import StrToComposition
    df = StrToComposition().featurize_dataframe(df, "formula")

    from matminer.featurizers.composition import ElementProperty

    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df = ep_feat.featurize_dataframe(
        df, col_id="composition"
    )  # input the "composition" column to the featurizer

    from matminer.featurizers.conversions import CompositionToOxidComposition
    from matminer.featurizers.composition import OxidationStates

    df = CompositionToOxidComposition().featurize_dataframe(df, "composition")

    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, "composition_oxid")

    from matminer.featurizers.composition import ElectronAffinity

    ea_feat = ElectronAffinity()
    df = ea_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import BandCenter

    bc_feat = BandCenter()
    df = bc_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import CohesiveEnergy

    ce_feat = CohesiveEnergy()
    df = ce_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import Miedema

    m_feat = Miedema()
    df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True)

    from matminer.featurizers.composition import TMetalFraction

    tmf_feat = TMetalFraction()
    df = tmf_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.composition import ValenceOrbital

    vo_feat = ValenceOrbital()
    df = vo_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import YangSolidSolution

    yss_feat = YangSolidSolution()
    df = yss_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.structure import GlobalSymmetryFeatures

    # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features.

    gsf_feat = GlobalSymmetryFeatures()
    df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralComplexity
    sc_feat = StructuralComplexity()
    df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import ChemicalOrdering
    co_feat = ChemicalOrdering()
    df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MaximumPackingEfficiency
    mpe_feat = MaximumPackingEfficiency()
    df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MinimumRelativeDistances
    mrd_feat = MinimumRelativeDistances()
    df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralHeterogeneity
    sh_feat = StructuralHeterogeneity()
    df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import SiteStatsFingerprint

    from matminer.featurizers.site import AverageBondLength
    from pymatgen.analysis.local_env import CrystalNN
    bl_feat = SiteStatsFingerprint(
        AverageBondLength(CrystalNN(search_cutoff=20)))
    df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import AverageBondAngle
    ba_feat = SiteStatsFingerprint(
        AverageBondAngle(CrystalNN(search_cutoff=20)))
    df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import BondOrientationalParameter
    bop_feat = SiteStatsFingerprint(BondOrientationalParameter())
    df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import CoordinationNumber
    cn_feat = SiteStatsFingerprint(CoordinationNumber())
    df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import DensityFeatures
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True)
    return (df)