Example #1
0
def similarity(_parents, target):
    featurizer = MultipleFeaturizer([
        SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
        StructuralHeterogeneity(),
        ChemicalOrdering(),
        MaximumPackingEfficiency(),
        SiteStatsFingerprint.from_preset(
            "LocalPropertyDifference_ward-prb-2017"),
        StructureComposition(Stoichiometry()),
        StructureComposition(ElementProperty.from_preset("magpie")),
        StructureComposition(ValenceOrbital(props=["frac"])),
        StructureComposition(IonProperty(fast=True)),
    ])

    # HACK celery doesn't work with multiprocessing (used by matminer)
    try:
        from celery import current_task
        if current_task:
            featurizer.set_n_jobs(1)
    except ImportError:
        pass

    x_target = pd.DataFrame.from_records([featurizer.featurize(target)],
                                         columns=featurizer.feature_labels())
    x_parent = pd.DataFrame.from_records(
        featurizer.featurize_many(_parents, ignore_errors=True, pbar=False),
        columns=featurizer.feature_labels(),
    )
    nulls = x_parent[x_parent.isnull().any(axis=1)].index.values
    x_parent.fillna(100000, inplace=True)

    x_target = x_target.reindex(sorted(x_target.columns), axis=1)
    x_parent = x_parent.reindex(sorted(x_parent.columns), axis=1)

    with open(os.path.join(settings.rxn_files, "scaler2.pickle"), "rb") as f:
        scaler = pickle.load(f)
    with open(os.path.join(settings.rxn_files, "quantiles.pickle"), "rb") as f:
        quantiles = pickle.load(f)

    X = scaler.transform(x_parent.append(x_target))

    D = [pairwise_distances(np.array([row, X[-1]]))[0, 1] for row in X[:-1]]

    _res = []
    for d in D:
        _res.append(np.linspace(0, 1, 101)[np.abs(quantiles - d).argmin()])
    _res = np.array(_res)
    _res[nulls] = -1
    return _res
Example #2
0
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer):
    """ Featurizer presets used for the paper 'Machine learning
    materials properties for small datasets' by Pierre-Paul De Breuck,
    Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020).

    Uses most of the featurizers implemented by matminer at the time of
    writing with their default hyperparameters and presets.

    """
    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        # CohesiveEnergy, - This descriptor was not used in the paper preset
        # ElectronAffinity, - This descriptor was not used in the paper preset
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        # BagofBonds, - This descriptor was not used in the paper preset
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        # PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxide_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        # PartialRadialDistributionFunction(),
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
        # BagofBonds(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    def featurize_composition(self, df):
        """ Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df['AtomicOrbitals|HOMO_character'] = df[
            'AtomicOrbitals|HOMO_character'].map(_orbitals)
        df['AtomicOrbitals|LUMO_character'] = df[
            'AtomicOrbitals|LUMO_character'].map(_orbitals)

        df['AtomicOrbitals|HOMO_element'] = df[
            'AtomicOrbitals|HOMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)
        df['AtomicOrbitals|LUMO_element'] = df[
            'AtomicOrbitals|LUMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)

        df = df.replace([np.inf, -np.inf, np.nan], 0)

        return modnet.featurizers.clean_df(df)

    def featurize_structure(self, df):
        """ Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_structure(df)

        dist = df[
            "RadialDistributionFunction|radial distribution function"].iloc[0][
                'distances'][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d)
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"].apply(
                    lambda x: x['distribution'][i])

        df = df.drop("RadialDistributionFunction|radial distribution function",
                     axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map)

        return modnet.featurizers.clean_df(df)

    def featurize_site(self, df):
        """ Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """

        # rename some features for backwards compatibility with pretrained models
        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return modnet.featurizers.clean_df(df)
Example #3
0
                                   ignore_errors=True)
# -- end F6

# -- start F7
from matminer.featurizers.composition import ElectronAffinity

ela_feat = ElectronAffinity()
fdf = ela_feat.featurize_dataframe(fdf,
                                   col_id='composition',
                                   ignore_errors=True)
# -- end F7

# -- start F9
from matminer.featurizers.composition import ValenceOrbital

vlo_feat = ValenceOrbital()
fdf = vlo_feat.featurize_dataframe(fdf,
                                   col_id='composition',
                                   ignore_errors=True)
# -- end F9

# -- start F10
from matminer.featurizers.composition import IonProperty

iop_feat = IonProperty()
fdf = iop_feat.featurize_dataframe(fdf,
                                   col_id='composition',
                                   ignore_errors=True)
# -- end F10

# -- start F12
Example #4
0
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer):

    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        CohesiveEnergy,
        ElectronAffinity,
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        BagofBonds,
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )
    from matminer.featurizers.dos import (
        DOSFeaturizer,
        SiteDOS,
        Hybridization,
        DosAsymmetry,
    )
    from matminer.featurizers.bandstructure import (
        BandFeaturizer,
        BranchPointEnergy
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxid_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        #PartialRadialDistributionFunction(), #Introduces a large amount of features
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    dos_featurizers = (
        DOSFeaturizer(),
        SiteDOS(),
        Hybridization()
    )

    band_featurizers = (
        BandFeaturizer(),
        BranchPointEnergy()
    )
    def __init__(self, n_jobs=None):
            self._n_jobs = n_jobs

    def featurize_composition(self, df):
        """Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
            _orbitals
        )
        df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
            _orbitals
        )

        df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )
        df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )

        return clean_df(df)

    def featurize_structure(self, df):
        """Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_structure(df)

        dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][
            "distances"
        ][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d
            )
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"
            ].apply(lambda x: x["distribution"][i])

        df = df.drop("RadialDistributionFunction|radial distribution function", axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7,
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"
        ].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"
        ].map(_int_map)

        return clean_df(df)

    def featurize_dos(self, df):
        """Applies the presetdos featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_dos(df)


        hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"]

        one_hot = pd.get_dummies(df[hotencodeColumns])
        df = df.drop(hotencodeColumns, axis = 1).join(one_hot)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

        df["DOSFeaturizer|vbm_character_1"] = df[
           "DOSFeaturizer|vbm_character_1"
           ].map(_orbitals)
        df["DOSFeaturizer|cbm_character_1"] = df[
           "DOSFeaturizer|cbm_character_1"
           ].map(_orbitals)

        # Splitting one feature into several floating features
        # e.g. number;number;number into three columns
        splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"]

        for column in splitColumns:
            try:
                newColumns = df[column].str.split(";", n = 2, expand = True)
                for i in range(0,3):
                    df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float)
            except:
                continue
        df = df.drop(splitColumns, axis=1)
        df = df.drop(["dos"], axis=1)
        return clean_df(df)

    def featurize_bandstructure(self, df):
        """Applies the preset band structure featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_bandstructure(df)

        def _int_map(x):
            if str(x) == "False":
                return 0
            elif str(x) == "True":
                return 1

        df["BandFeaturizer|is_gap_direct"] = df[
            "BandFeaturizer|is_gap_direct"
        ].map(_int_map)


        df = df.drop(["bandstructure"], axis=1)

        return clean_df(df)


    def featurize_site(self, df):
        """Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return clean_df(df)
class PerovskiteProperty(BaseFeaturizer):
    """
	Class to calculate perovskite features. Includes custom features from the Perovskite class and generic features from ElementProperty, 
	AtomicOrbitals, ValenceOrbital, and CohesiveEnergy matminer featurizers.
	
	Options for initializing:
		ordered_formula_featurizer(): for featurizing ordered formulas
		cation_site_featurizer(): for featurizing unordered formulas based on user-provided cation site assignments
		from_preset(): load a preset
		The class can also be called manually, but be aware that different parameter sets are required for an ordered formula featurizer instance than for a cation site featurizer instance.	
	
	Parameters:
	-----------
	cation_site: dict of site assignments for cations, i.e. {el:site}. Elements not in cation_site are assumed to be anions on X-site
	site_ox_lim: dict of oxidation state limits for each site, i.e. {site:[min,max]}. Elements on sites are limited to oxidation states within these limits
	site_base_ox: dict of base oxidation state for each site, i.e. {site:ox}. Used for determining aliovalent ions and acceptor/donor dopants
	ordered_formulas: if True, determine cation site assignments from order 
	A_site_occupancy: Number of atoms on A site. Used when ordered_formulas is True
	anions: list of anions. Used when ordered_formulas is True
	
	Parameters for ordered formula featurizer: site_ox_lim, site_base_ox, A_site_occupancy, anions
	Parameters for cation site featurizer: cation_site, site_ox_lim, site_base_ox
	"""
    def __init__(self,
                 cation_site=None,
                 site_ox_lim={
                     'A': [0, 10],
                     'B': [0, 10],
                     'X': [-10, 0]
                 },
                 site_base_ox={
                     'A': 2,
                     'B': 4,
                     'X': -2
                 },
                 ordered_formulas=False,
                 A_site_occupancy=1,
                 anions=None):

        if cation_site is None and ordered_formulas is False:
            raise ValueError(
                'Either cation sites must be assigned, or formulas must be ordered. Otherwise site assignments can not be determined'
            )

        self.cation_site = cation_site
        self.site_ox_lim = site_ox_lim
        self.site_base_ox = site_base_ox
        self.ordered_formulas = ordered_formulas
        self.A_site_occupancy = A_site_occupancy
        self.anions = anions

        #matminer featurizers
        self.ValenceOrbital = ValenceOrbital()
        self.AtomicOrbitals = AtomicOrbitalsMod()
        self.CohesiveEnergy = CohesiveEnergy()
        #custom ElementProperty featurizer
        elemental_properties = [
            'BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row',
            'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
            'Density', 'MolarVolume', 'FusionEnthalpy', 'HeatVaporization',
            'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled',
            'Polarizability', 'ThermalConductivity'
        ]
        self.ElementProperty = ElementProperty(
            data_source='magpie',
            features=elemental_properties,
            stats=["mean", "std_dev", "range"])

        self.check_matminer_featurizers()
        self.featurize_options = {}

    @classmethod
    def from_preset(cls, preset_name):
        """
		Initialize from preset
		
		Parameters:
		-----------
		preset_name: name of preset to load. Currently accepts 'BCFZY'
		"""
        if preset_name == 'BCFZY':
            #Ba(Co,Fe,Zr,Y)O_3-d system
            cation_site = {
                'Ba': 'A',
                'Co': 'B',
                'Fe': 'B',
                'Zr': 'B',
                'Y': 'B'
            }
            site_ox_lim = {'A': [2, 2], 'B': [2, 4], 'X': [-2, -2]}
            site_base_ox = {'A': 2, 'B': 4, 'X': -2}
        else:
            raise ValueError("Invalid preset_name specified!")

        return cls(cation_site, site_ox_lim, site_base_ox)

    @classmethod
    def ordered_formula_featurizer(cls,
                                   A_site_occupancy=1,
                                   anions=None,
                                   site_ox_lim={
                                       'A': [0, 10],
                                       'B': [0, 10],
                                       'X': [-10, 0]
                                   },
                                   site_base_ox={
                                       'A': 2,
                                       'B': 4,
                                       'X': -2
                                   }):
        """
		Convenience method for instantiating a featurizer for ordered formulas
		"""
        return cls(cation_site=None,
                   site_ox_lim=site_ox_lim,
                   site_base_ox=site_base_ox,
                   ordered_formulas=True,
                   A_site_occupancy=A_site_occupancy,
                   anions=anions)

    @classmethod
    def cation_site_featurizer(cls,
                               cation_site,
                               site_ox_lim={
                                   'A': [0, 10],
                                   'B': [0, 10],
                                   'X': [-10, 0]
                               },
                               site_base_ox={
                                   'A': 2,
                                   'B': 4,
                                   'X': -2
                               }):
        """
		Convenience method for instantiating a featurizer for unordered formulas, based on site assignments
		"""
        return cls(cation_site, site_ox_lim, site_base_ox)

    @property
    def ElementProperty_custom_labels(self):
        """
		Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
        elemental_property_label_map = {
            'BoilingT': 'boil_temp',
            'MeltingT': 'melt_temp',
            'BulkModulus': 'bulk_mod',
            'ShearModulus': 'shear_mod',
            'Row': 'row',
            'Column': 'column',
            'Number': 'number',
            'MendeleevNumber': 'mendeleev',
            'SpaceGroupNumber': 'space_group',
            'Density': 'density',
            'MolarVolume': 'molar_vol',
            'FusionEnthalpy': 'H_fus',
            'HeatVaporization': 'H_vap',
            'NsUnfilled': 'valence_unfilled_s',
            'NpUnfilled': 'valence_unfilled_p',
            'NdUnfilled': 'valence_unfilled_d',
            'NfUnfilled': 'valence_unfilled_f',
            'Polarizability': 'polarizability',
            'ThermalConductivity': 'sigma_therm'
        }

        element_property_labels = list(
            map(elemental_property_label_map.get,
                self.ElementProperty.features))
        labels = []
        for attr in element_property_labels:
            for stat in self.ElementProperty.stats:
                if stat == 'std_dev':
                    stat = 'std'
                labels.append(f'{attr}_{stat}')
        return labels

    @property
    def ElementProperty_categories(self):
        """
		Generate categories for ElementProperty featurizer
		"""
        elemental_property_category_map = {
            'BoilingT': 'elemental',
            'MeltingT': 'elemental',
            'BulkModulus': 'elemental',
            'ShearModulus': 'elemental',
            'Row': 'periodic',
            'Column': 'periodic',
            'Number': 'periodic',
            'MendeleevNumber': 'periodic',
            'SpaceGroupNumber': 'periodic',
            'Density': 'elemental',
            'MolarVolume': 'elemental',
            'FusionEnthalpy': 'elemental',
            'HeatVaporization': 'elemental',
            'NsUnfilled': 'electronic',
            'NpUnfilled': 'electronic',
            'NdUnfilled': 'electronic',
            'NfUnfilled': 'electronic',
            'Polarizability': 'elemental',
            'ThermalConductivity': 'elemental'
        }

        element_property_categories = list(
            map(elemental_property_category_map.get,
                self.ElementProperty.features))
        categories = []
        for ep_cat in element_property_categories:
            for stat in self.ElementProperty.stats:
                categories.append(ep_cat)
        return categories

    @property
    def ElementProperty_units(self):
        """
		Generate units for ElementProperty featurizer
		"""
        elemental_property_unit_map = {
            'BoilingT': 'temp',
            'MeltingT': 'temp',
            'BulkModulus': 'pressure',
            'ShearModulus': 'pressure',
            'Row': 'none',
            'Column': 'none',
            'Number': 'none',
            'MendeleevNumber': 'none',
            'SpaceGroupNumber': 'none',
            'Density': 'density',
            'MolarVolume': 'volume',
            'FusionEnthalpy': 'energy',
            'HeatVaporization': 'energy',
            'NsUnfilled': 'none',
            'NpUnfilled': 'none',
            'NdUnfilled': 'none',
            'NfUnfilled': 'none',
            'Polarizability':
            'polarizability',  #complex units - doesn't matter
            'ThermalConductivity': 'therm'
        }  #complex units - doesn't matter

        element_property_units = list(
            map(elemental_property_unit_map.get,
                self.ElementProperty.features))
        units = []
        for ep_unit in element_property_units:
            for stat in self.ElementProperty.stats:
                units.append(ep_unit)
        return units

    def ElementProperty_label_check(self):
        """
		Check that ElementProperty feature labels are as expected
		If not, features may not align with feature labels
		"""
        #ElementProperty.feature_labels() code as of 2/17/19
        labels = []
        for attr in self.ElementProperty.features:
            src = self.ElementProperty.data_source.__class__.__name__
            for stat in self.ElementProperty.stats:
                labels.append("{} {} {}".format(src, stat, attr))

        if labels != self.ElementProperty.feature_labels():
            raise Exception('ElementProperty features or labels have changed')

    def set_featurize_options(
            self,
            sites,
            ox_stats=['min', 'max', 'mean', 'median', 'std', 'range'],
            ep_stats=["mean", "std_dev", "range"],
            radius_type='ionic_radius',
            normalize_formula=True,
            silent=True,
            categories=None):
        """
		Set options for featurization. Since these options should be the same for all compositions in a batch, set for the featurizer instance rather than passing as args to featurize()
		so that they do not have to be duplicated in every row of a DataFrame when calling featurize_dataframe().
		Since these options change the number and meaning of features returned, it's also safest to set for the whole instance for consistency.
		
		Parameters:
		-----------
		sites: list or string of sites to featurize. Any combination of 'A', 'B', 'X', and/or 'comp' accepted. 
			Composition-level, oxidation-state-dependent features are always calculated by the Perovskite class. Passing '' or [] will return only these features.
			Specifying 'A','B', and/or 'X' sites will calculate site-level features for these sites (oxidation-state independent and dependent features, and matminer features).
			Including 'comp' will calculate oxidation-state-independent features and matminer features for the full composition.
		ox_stats: list of aggregate functions to apply to oxidation state combinations for feature generation using Perovskite class. 
			Options: 'min','max','mean','median','std','range'
		ep_stats: ElementProperty stats. Options: "minimum", "maximum", "range", "mean", "avg_dev", "mode"
		radius_type: Shannon radius type to use in features. Accepts 'crystal_radius' or 'ionic_radius'
		normalize_formula: if True, normalize formula such that higher occupancy cation site has one formula unit (applies to Perovskite class only)
		silent: if False, print informational messages from Perovksite class
		categories: list of feature categories to return. If None, return all. Options: 'bonding','structure','charge','composition','electronic','elemental','periodic'
		"""

        feat_options = dict(sites=sites,
                            ox_stats=ox_stats,
                            radius_type=radius_type,
                            normalize_formula=normalize_formula,
                            silent=silent)
        self.featurize_options.update(feat_options)
        self.ElementProperty.stats = ep_stats

    def featurize(self, formula):
        """
		Calculate features
		
		Parameters:
		-----------
		formula: chemical formula string
		
		Returns: list of feature values
		"""
        if self.featurize_options == {}:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before featurizing'
            )

        if self.ordered_formulas is True:
            pvsk = Perovskite.from_ordered_formula(
                formula,
                self.A_site_occupancy,
                self.anions,
                site_ox_lim=self.site_ox_lim,
                site_base_ox=self.site_base_ox,
                radius_type=self.featurize_options['radius_type'],
                silent=self.featurize_options['silent'])
        elif self.ordered_formulas is False:
            pvsk = Perovskite(formula, self.cation_site, self.site_ox_lim,
                              self.site_base_ox,
                              self.featurize_options['radius_type'],
                              self.featurize_options['normalize_formula'],
                              self.featurize_options['silent'])

        pvsk_features = pvsk.featurize(self.featurize_options['sites'],
                                       self.featurize_options['ox_stats'])

        mm_features = []
        for site in self.featurize_options['sites']:

            vo_features = self.ValenceOrbital.featurize(
                pvsk.site_composition[site]
            )  #avg and frac s, p , d, f electrons
            vo_features += [sum(vo_features[0:3])
                            ]  #avg total valence electrons
            ao_features = self.AtomicOrbitals.featurize(
                pvsk.site_composition[site]
            )  #H**O and LUMO character and energy levels (from atomic orbitals)
            ao_features = [
                ao_features[i] for i in range(len(ao_features))
                if i not in (0, 1, 3, 4)
            ]  #exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals
            ce_features = self.CohesiveEnergy.featurize(
                pvsk.site_composition[site], formation_energy_per_atom=1e-10
            )  #avg elemental cohesive energy
            ep_features = self.ElementProperty.featurize(
                pvsk.site_composition[site])  #elemental property features
            mm_features += vo_features + ao_features + ce_features + ep_features

        features = list(pvsk_features) + mm_features

        return features

    @property
    def matminer_labels(self):
        """
		Feature labels for matminer-derived features
		"""
        labels = [
            #ValenceOrbital labels
            'valence_elec_s_mean',
            'valence_elec_p_mean',
            'valence_elec_d_mean',
            'valence_elec_f_mean',
            'valence_elec_s_frac',
            'valence_elec_p_frac',
            'valence_elec_d_frac',
            'valence_elec_f_frac',
            'valence_elec_tot_mean',
            #AtomicOrbitals labels
            #'HOMO_character',
            'HOMO_energy',
            #'LUMO_character',
            'LUMO_energy',
            'AO_gap',
            #CohesiveEnergy labels
            'cohesive_energy_mean'
        ]

        #ElementProperty labels
        labels += self.ElementProperty_custom_labels

        return labels

    @property
    def matminer_categories(self):
        """
		Feature categories for matminer-derived features
		"""
        categories = [
            #ValenceOrbital categories
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            #AtomicOrbitals categories
            #'HOMO_character',
            'electronic',
            #'LUMO_character',
            'electronic',
            'electronic',
            #CohesiveEnergy categories
            'bonding'
        ]

        #ElementProperty categories
        categories += self.ElementProperty_categories

        return categories

    @property
    def matminer_units(self):
        """
		Feature units for matminer-derived features
		"""
        units = [
            #ValenceOrbital units
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            #AtomicOrbitals units
            #'HOMO_character',
            'energy',
            #'LUMO_character',
            'energy',
            'energy',
            #CohesiveEnergy units
            'energy'
        ]

        #ElementProperty units
        units += self.ElementProperty_units

        return units

    def feature_labels(self):
        """
		Get list of feature labels
		"""
        try:
            pvsk_labels = Perovskite.from_preset(
                'BaCoO3', 'BCFZY',
                silent=True).feature_labels(self.featurize_options['sites'],
                                            self.featurize_options['ox_stats'])
        except KeyError:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before accessing feature labels'
            )

        mm_labels = []
        for site in self.featurize_options['sites']:
            if site == 'comp':
                site_label = 'comp'
            else:
                site_label = f'{site}site'
            mm_labels += [
                f'{site_label}_{label}' for label in self.matminer_labels
            ]

        return pvsk_labels + mm_labels

    def feature_categories(self):
        """
		Get list of feature categories. For quick filtering
		"""
        try:
            pvsk_categories = Perovskite.from_preset(
                'BaCoO3', 'BCFZY', silent=True).feature_categories(
                    self.featurize_options['sites'],
                    self.featurize_options['ox_stats'])
        except KeyError:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before accessing feature labels'
            )

        mm_categories = []
        for site in self.featurize_options['sites']:
            mm_categories += self.matminer_categories

        return pvsk_categories + mm_categories

    def feature_units(self):
        """
		Get list of feature labels. For dimensional analysis
		"""
        try:
            pvsk_units = Perovskite.from_preset(
                'BaCoO3', 'BCFZY',
                silent=True).feature_units(self.featurize_options['sites'],
                                           self.featurize_options['ox_stats'])
        except KeyError:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before accessing feature labels'
            )

        mm_units = []
        for site in self.featurize_options['sites']:
            mm_units += self.matminer_units

        return pvsk_units + mm_units

    def check_matminer_featurizers(self):
        """
		Check that features and feature order for matminer featurizers are as expected
		If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels()
		"""
        #verify that matminer feature labels haven't changed
        if self.ValenceOrbital.feature_labels() != [
                'avg s valence electrons', 'avg p valence electrons',
                'avg d valence electrons', 'avg f valence electrons',
                'frac s valence electrons', 'frac p valence electrons',
                'frac d valence electrons', 'frac f valence electrons'
        ]:
            raise Exception('ValenceOrbital features or labels have changed')

        if self.AtomicOrbitals.feature_labels() != [
                'HOMO_character', 'HOMO_element', 'HOMO_energy',
                'LUMO_character', 'LUMO_element', 'LUMO_energy', 'gap_AO'
        ]:
            raise Exception('AtomicOrbitals features or labels have changed')

        if self.CohesiveEnergy.feature_labels() != ['cohesive energy']:
            raise Exception('CohesiveEnergy features or labels have changed')

        self.ElementProperty_label_check()
Example #6
0
def predict_log10_eps(
    target: Union[Structure, Composition],
    dielectric_type: str,
    model_type: str,
) -> float:
    """
    :param target: structure or composition to predict dielectric constants
    :param dielectric_type: "el" or "ion"
    :param model_type: "comp" or "comp_st"
    :return: Descriptor vector
    """
    if dielectric_type not in ["el", "ion"]:
        raise ValueError(
            f'Specify dielectric type "el" or "ion"\nInput: {dielectric_type}')
    if model_type not in ["comp", "comp_st"]:
        raise ValueError(
            f'Specify regression_type "comp" or "comp_st"\nInput: {model_type}'
        )

    if model_type == "comp":
        if isinstance(target, Structure):
            comp = target.composition
        else:
            comp = target
        comp_oxi = comp.add_charges_from_oxi_state_guesses()
        if dielectric_type == "el":
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            valence = ScalarFeaturizer(ValenceOrbital(), comp)
            ion_prop = ScalarFeaturizer(IonProperty(), comp)
            en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi)
            oxi_state = ScalarFeaturizer(OxidationStates.from_preset("deml"),
                                         comp_oxi)
            atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp)
            descriptor = [
                ep.get_from_label("PymatgenData minimum X"),
                ep.get_from_label("PymatgenData range X"),
                ep.get_from_label("PymatgenData std_dev X"),
                ep.get_from_label("PymatgenData mean row"),
                ep.get_from_label("PymatgenData std_dev row"),
                ep.get_from_label("PymatgenData mean group"),
                ep.get_from_label("PymatgenData mean block"),
                ep.get_from_label("PymatgenData std_dev block"),
                ep.get_from_label("PymatgenData mean atomic_mass"),
                ep.get_from_label("PymatgenData std_dev atomic_mass"),
                ep.get_from_label("PymatgenData std_dev atomic_radius"),
                ep.get_from_label("PymatgenData minimum mendeleev_no"),
                ep.get_from_label("PymatgenData range mendeleev_no"),
                ep.get_from_label("PymatgenData std_dev mendeleev_no"),
                ep.get_from_label("PymatgenData mean thermal_conductivity"),
                ep.get_from_label("PymatgenData std_dev thermal_conductivity"),
                ep.get_from_label("PymatgenData mean melting_point"),
                ep.get_from_label("PymatgenData std_dev melting_point"),
                valence.get_from_label("avg s valence electrons"),
                valence.get_from_label("avg d valence electrons"),
                valence.get_from_label("frac s valence electrons"),
                valence.get_from_label("frac p valence electrons"),
                valence.get_from_label("frac d valence electrons"),
                ion_prop.get_from_label("avg ionic char"),
                TMetalFraction().featurize(comp)[0],
                en_diff.get_from_label("maximum EN difference"),
                en_diff.get_from_label("range EN difference"),
                en_diff.get_from_label("mean EN difference"),
                en_diff.get_from_label("std_dev EN difference"),
                BandCenter().featurize(comp)[0],
                oxi_state.get_from_label("std_dev oxidation state"),
                atomic_orbital.get_from_label("HOMO_energy"),
                atomic_orbital.get_from_label("LUMO_energy"),
                atomic_orbital.get_from_label("gap_AO"),
            ]
        elif dielectric_type == "ion":
            stoich = ScalarFeaturizer(Stoichiometry(), comp)
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            valence = ScalarFeaturizer(ValenceOrbital(), comp)
            ion_prop = ScalarFeaturizer(IonProperty(), comp)
            en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi)
            oxi_state = ScalarFeaturizer(OxidationStates.from_preset("deml"),
                                         comp_oxi)
            atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp)
            at_pack_eff = ScalarFeaturizer(AtomicPackingEfficiency(), comp)
            descriptor = [
                stoich.get_from_label("3-norm"),
                stoich.get_from_label("5-norm"),
                ep.get_from_label("PymatgenData mean X"),
                ep.get_from_label("PymatgenData mean row"),
                ep.get_from_label("PymatgenData std_dev row"),
                ep.get_from_label("PymatgenData std_dev group"),
                ep.get_from_label("PymatgenData mean block"),
                ep.get_from_label("PymatgenData std_dev block"),
                ep.get_from_label("PymatgenData maximum atomic_mass"),
                ep.get_from_label("PymatgenData range atomic_mass"),
                ep.get_from_label("PymatgenData mean atomic_mass"),
                ep.get_from_label("PymatgenData std_dev atomic_mass"),
                ep.get_from_label("PymatgenData maximum atomic_radius"),
                ep.get_from_label("PymatgenData range atomic_radius"),
                ep.get_from_label("PymatgenData mean atomic_radius"),
                ep.get_from_label("PymatgenData std_dev atomic_radius"),
                ep.get_from_label("PymatgenData minimum mendeleev_no"),
                ep.get_from_label("PymatgenData mean mendeleev_no"),
                ep.get_from_label("PymatgenData std_dev mendeleev_no"),
                ep.get_from_label("PymatgenData mean thermal_conductivity"),
                ep.get_from_label("PymatgenData std_dev thermal_conductivity"),
                ep.get_from_label("PymatgenData mean melting_point"),
                ep.get_from_label("PymatgenData std_dev melting_point"),
                valence.get_from_label("avg s valence electrons"),
                valence.get_from_label("frac s valence electrons"),
                valence.get_from_label("frac p valence electrons"),
                valence.get_from_label("frac d valence electrons"),
                ion_prop.get_from_label("avg ionic char"),
                TMetalFraction().featurize(comp)[0],
                en_diff.get_from_label("minimum EN difference"),
                en_diff.get_from_label("range EN difference"),
                en_diff.get_from_label("mean EN difference"),
                en_diff.get_from_label("std_dev EN difference"),
                oxi_state.get_from_label("range oxidation state"),
                oxi_state.get_from_label("std_dev oxidation state"),
                atomic_orbital.get_from_label("LUMO_energy"),
                atomic_orbital.get_from_label("gap_AO"),
                at_pack_eff.get_from_label("mean simul. packing efficiency"),
                at_pack_eff.get_from_label(
                    "mean abs simul. packing efficiency"),
                at_pack_eff.get_from_label(
                    "dist from 1 clusters |APE| < 0.010"),
                at_pack_eff.get_from_label(
                    "dist from 3 clusters |APE| < 0.010"),
                at_pack_eff.get_from_label(
                    "dist from 5 clusters |APE| < 0.010"),
            ]
    elif model_type == "comp_st":
        if isinstance(target, Composition):
            raise ValueError(
                'comp_st (Using compositional and structural descriptor) is specified, '
                'but target is composition')
        comp: Composition = target.composition
        comp_oxi = comp.add_charges_from_oxi_state_guesses()
        target_orig = deepcopy(target)
        target.add_oxidation_state_by_guess()
        if dielectric_type == "el":
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            valence = ScalarFeaturizer(ValenceOrbital(), comp)
            en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi)
            atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp)
            density = ScalarFeaturizer(DensityFeatures(), target)
            dist_btw_nn = MinimumRelativeDistances().featurize(target_orig)
            opsf = SiteFeaturizer(OPSiteFingerprint(), target)
            voro_fp = SiteFeaturizer(VoronoiFingerprint(use_symm_weights=True),
                                     target)
            gsf = SiteFeaturizer(GaussianSymmFunc(), target)
            lpd = SiteFeaturizer(
                LocalPropertyDifference.from_preset("ward-prb-2017"), target)
            descriptor = [
                ep.get_from_label("PymatgenData std_dev X"),
                ep.get_from_label("PymatgenData mean block"),
                ep.get_from_label("PymatgenData std_dev atomic_mass"),
                valence.get_from_label("frac d valence electrons"),
                TMetalFraction().featurize(comp)[0],
                en_diff.get_from_label("maximum EN difference"),
                en_diff.get_from_label("mean EN difference"),
                atomic_orbital.get_from_label("HOMO_energy"),
                atomic_orbital.get_from_label("LUMO_energy"),
                density.get_from_label("density"),
                np.mean(dist_btw_nn),
                np.std(dist_btw_nn),
                opsf.get_from_label_func("tetrahedral CN_4", np.max),
                opsf.get_from_label_func("rectangular see-saw-like CN_4",
                                         np.max),
                np.max([
                    EwaldSiteEnergy(accuracy=4).featurize(target, i)
                    for i in range(target.num_sites)
                ]),
                voro_fp.get_from_label_func("Voro_area_std_dev", np.max),
                voro_fp.get_from_label_func("Voro_area_std_dev", np.mean),
                voro_fp.get_from_label_func("Voro_dist_minimum", np.min),
                voro_fp.get_from_label_func("Voro_dist_minimum", np.std),
                gsf.get_from_label_func("G2_20.0", np.std),
                gsf.get_from_label_func("G2_80.0", np.max),
                gsf.get_from_label_func("G4_0.005_4.0_-1.0", np.mean),
                lpd.get_from_label_func("local difference in NdValence",
                                        np.mean),
                lpd.get_from_label_func("local difference in NValence",
                                        np.min),
                lpd.get_from_label_func("local difference in NValence",
                                        np.std),
                lpd.get_from_label_func("local difference in NdUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.min),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in GSmagmom",
                                        np.mean)
            ]
        elif dielectric_type == "ion":
            ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"),
                                  comp)
            atomic_orbitals = ScalarFeaturizer(AtomicOrbitals(), comp)
            density = ScalarFeaturizer(DensityFeatures(), target)
            str_het = ScalarFeaturizer(StructuralHeterogeneity(), target)
            opsf = SiteFeaturizer(OPSiteFingerprint(), target)
            voro_fp = SiteFeaturizer(VoronoiFingerprint(use_symm_weights=True),
                                     target)
            gsf = SiteFeaturizer(GaussianSymmFunc(), target)
            lpd = SiteFeaturizer(
                LocalPropertyDifference.from_preset("ward-prb-2017"), target)
            descriptor = [
                ep.get_from_label("PymatgenData std_dev row"),
                ep.get_from_label("PymatgenData mean thermal_conductivity"),
                ep.get_from_label("PymatgenData std_dev melting_point"),
                TMetalFraction().featurize(comp)[0],
                atomic_orbitals.get_from_label("gap_AO"),
                density.get_from_label("density"),
                density.get_from_label("packing fraction"),
                str_het.get_from_label("mean neighbor distance variation"),
                str_het.get_from_label("avg_dev neighbor distance variation"),
                opsf.get_from_label_func("sgl_bd CN_1", np.mean),
                opsf.get_from_label_func("bent 150 degrees CN_2", np.mean),
                opsf.get_from_label_func("linear CN_2", np.mean),
                opsf.get_from_label_func("trigonal planar CN_3", np.mean),
                opsf.get_from_label_func("pentagonal planar CN_5", np.std),
                opsf.get_from_label_func("octahedral CN_6", np.max),
                opsf.get_from_label_func("octahedral CN_6", np.std),
                opsf.get_from_label_func("q6 CN_12", np.mean),
                np.max([
                    EwaldSiteEnergy(accuracy=4).featurize(target, i)
                    for i in range(target.num_sites)
                ]),
                voro_fp.get_from_label_func("Symmetry_weighted_index_4",
                                            np.std),
                voro_fp.get_from_label_func("Voro_vol_maximum", np.mean),
                voro_fp.get_from_label_func("Voro_area_std_dev", np.mean),
                voro_fp.get_from_label_func("Voro_area_minimum", np.std),
                voro_fp.get_from_label_func("Voro_area_maximum", np.min),
                voro_fp.get_from_label_func("Voro_dist_std_dev", np.mean),
                gsf.get_from_label_func("G2_80.0", np.min),
                gsf.get_from_label_func("G4_0.005_4.0_1.0", np.std),
                lpd.get_from_label_func("local difference in Number", np.max),
                lpd.get_from_label_func("local difference in MendeleevNumber",
                                        np.max),
                lpd.get_from_label_func("local difference in MendeleevNumber",
                                        np.min),
                lpd.get_from_label_func("local difference in AtomicWeight",
                                        np.max),
                lpd.get_from_label_func("local difference in AtomicWeight",
                                        np.mean),
                lpd.get_from_label_func("local difference in MeltingT",
                                        np.mean),
                lpd.get_from_label_func("local difference in Row", np.max),
                lpd.get_from_label_func(
                    "local difference in Electronegativity", np.min),
                lpd.get_from_label_func("local difference in NValence",
                                        np.std),
                lpd.get_from_label_func("local difference in NsUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in NdUnfilled",
                                        np.max),
                lpd.get_from_label_func("local difference in NdUnfilled",
                                        np.std),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.max),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.min),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.mean),
                lpd.get_from_label_func("local difference in NUnfilled",
                                        np.std),
                lpd.get_from_label_func("local difference in GSvolume_pa",
                                        np.max),
                lpd.get_from_label_func("local difference in GSvolume_pa",
                                        np.min),
                lpd.get_from_label_func("local difference in SpaceGroupNumber",
                                        np.max),
            ]
    with open(
            f"{os.path.dirname(__file__)}/{dielectric_type}_{model_type}.joblib",
            "rb") as fr:
        model: RandomForestRegressor = joblib.load(fr)
    with open(
            f"{os.path.dirname(__file__)}/{dielectric_type}_{model_type}_scaler.joblib",
            "rb") as fr:
        scaler: StandardScaler = joblib.load(fr)
    descriptor = scaler.transform([descriptor])
    return model.predict(descriptor)[0]
Example #7
0
    def featurize_structures(self, featurizer=None, **kwargs):
        """
        Featurizes the hypothetical structures available from
        hypo_structures method. Hypothetical structures for which
        featurization fails is removed and valid structures are
        made available as valid_structures

        Args:
            featurizer (Featurizer): A MatMiner Featurizer.
                Defaults to MultipleFeaturizer with PRB Ward
                Voronoi descriptors.
            **kwargs (dict): kwargs passed to featurize_many
                method of featurizer.

        Returns:
            pandas.DataFrame: features
        """
        # Note the redundancy here is for pandas to work
        if self.hypo_structures is None:
            warnings.warn("No structures available. Generating structures.")
            self.get_structures()

        print("Generating features")
        featurizer = featurizer if featurizer else MultipleFeaturizer([
            SiteStatsFingerprint.from_preset(
                "CoordinationNumber_ward-prb-2017"),
            StructuralHeterogeneity(),
            ChemicalOrdering(),
            MaximumPackingEfficiency(),
            SiteStatsFingerprint.from_preset(
                "LocalPropertyDifference_ward-prb-2017"),
            StructureComposition(Stoichiometry()),
            StructureComposition(ElementProperty.from_preset("magpie")),
            StructureComposition(ValenceOrbital(props=['frac'])),
            StructureComposition(IonProperty(fast=True))
        ])

        features = featurizer.featurize_many(
            self.hypo_structures['pmg_structures'],
            ignore_errors=True,
            **kwargs)

        n_species, formula = [], []
        for s in self.hypo_structures['pmg_structures']:
            n_species.append(len(s.composition.elements))
            formula.append(s.composition.formula)

        self._features_df = pd.DataFrame.from_records(
            features, columns=featurizer.feature_labels())
        self._features_df.index = self.hypo_structures.index
        self._features_df['N_species'] = n_species
        self._features_df['Composition'] = formula
        self.features = self._features_df.dropna(axis=0, how='any')
        self.features = self.features.reindex(sorted(self.features.columns),
                                              axis=1)

        self._valid_structure_labels = list(self.features.index)
        self.valid_structures = self.hypo_structures.loc[
            self._valid_structure_labels]

        print("{} out of {} structures were successfully featurized.".format(
            self.features.shape[0], self._features_df.shape[0]))
        return self.features
def AddFeatures(df):  # Add features by Matminer
    from matminer.featurizers.conversions import StrToComposition
    df = StrToComposition().featurize_dataframe(df, "formula")

    from matminer.featurizers.composition import ElementProperty

    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df = ep_feat.featurize_dataframe(
        df, col_id="composition"
    )  # input the "composition" column to the featurizer

    from matminer.featurizers.conversions import CompositionToOxidComposition
    from matminer.featurizers.composition import OxidationStates

    df = CompositionToOxidComposition().featurize_dataframe(df, "composition")

    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, "composition_oxid")

    from matminer.featurizers.composition import ElectronAffinity

    ea_feat = ElectronAffinity()
    df = ea_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import BandCenter

    bc_feat = BandCenter()
    df = bc_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import CohesiveEnergy

    ce_feat = CohesiveEnergy()
    df = ce_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import Miedema

    m_feat = Miedema()
    df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True)

    from matminer.featurizers.composition import TMetalFraction

    tmf_feat = TMetalFraction()
    df = tmf_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.composition import ValenceOrbital

    vo_feat = ValenceOrbital()
    df = vo_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import YangSolidSolution

    yss_feat = YangSolidSolution()
    df = yss_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.structure import GlobalSymmetryFeatures

    # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features.

    gsf_feat = GlobalSymmetryFeatures()
    df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralComplexity
    sc_feat = StructuralComplexity()
    df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import ChemicalOrdering
    co_feat = ChemicalOrdering()
    df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MaximumPackingEfficiency
    mpe_feat = MaximumPackingEfficiency()
    df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MinimumRelativeDistances
    mrd_feat = MinimumRelativeDistances()
    df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralHeterogeneity
    sh_feat = StructuralHeterogeneity()
    df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import SiteStatsFingerprint

    from matminer.featurizers.site import AverageBondLength
    from pymatgen.analysis.local_env import CrystalNN
    bl_feat = SiteStatsFingerprint(
        AverageBondLength(CrystalNN(search_cutoff=20)))
    df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import AverageBondAngle
    ba_feat = SiteStatsFingerprint(
        AverageBondAngle(CrystalNN(search_cutoff=20)))
    df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import BondOrientationalParameter
    bop_feat = SiteStatsFingerprint(BondOrientationalParameter())
    df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import CoordinationNumber
    cn_feat = SiteStatsFingerprint(CoordinationNumber())
    df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import DensityFeatures
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True)
    return (df)
Example #9
0
    OxidCompositions = CompositionToOxidComposition()
    print(OxidCompositions.feature_labels())
    df = OxidCompositions.featurize_dataframe(df, 'composition')

    #CohesiveEnergy
    from matminer.featurizers.composition import CohesiveEnergy
    cohesive_energy = CohesiveEnergy()
    cohesive_energy.set_n_jobs(28)
    labels.append(cohesive_energy.feature_labels())
    df = cohesive_energy.featurize_dataframe(df,
                                             'composition',
                                             ignore_errors=True)

    #ValenceOrbital
    from matminer.featurizers.composition import ValenceOrbital
    valence_orbital = ValenceOrbital()
    valence_orbital.set_n_jobs(28)
    labels.append(valence_orbital.feature_labels())
    df = valence_orbital.featurize_dataframe(df,
                                             'composition',
                                             ignore_errors=True)

    #AtomicOrbital
    from matminer.featurizers.composition import AtomicOrbitals
    atomic_orbitals = AtomicOrbitals()
    atomic_orbitals.set_n_jobs(28)
    labels.append(atomic_orbitals.feature_labels())
    df = atomic_orbitals.featurize_dataframe(df,
                                             'composition',
                                             ignore_errors=True)
Example #10
0
class BCA_Featurizer(BaseFeaturizer):
	def __init__(self,radius_type='ionic_radius',normalize_formula=False):
		self.radius_type = radius_type
		self.normalize_formula = normalize_formula
		self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		#custom ElementProperty featurizer
		elemental_properties = ['BoilingT', 'MeltingT',
			'BulkModulus', 'ShearModulus', 
			'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
			'Density','MolarVolume',
			'FusionEnthalpy','HeatVaporization',
			'Polarizability', 
			'ThermalConductivity']
		self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties,
						  stats=["mean", "std_dev"])
		#check matminer featurizers
		self.check_matminer_featurizers()
		
	def featurize(self,composition):
		bca = BCA(composition,self.radius_type,self.normalize_formula)
		bca_features = bca.featurize()
		
		vo_features = self.ValenceOrbital.featurize(bca.metal_composition) #avg and frac s, p , d, f electrons for metals
		vo_features += [sum(vo_features[0:3])] #avg total valence electrons for metals
		ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) #H**O and LUMO character and energy levels for metals from atomic orbitals)
		ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)]#exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals
		ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) #avg metal elemental cohesive energy
		bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition)
		ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition)
		ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition)
		
		mm_features = vo_features + ao_features + ce_features + bc_features + ve_features + ep_features 
		
		return list(bca_features.values()) + mm_features
		
	@property
	def ElementProperty_custom_labels(self):
		"""
		Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
		elemental_property_label_map = {'BoilingT':'boil_temp','MeltingT':'melt_temp',
							'BulkModulus':'bulk_mod','ShearModulus':'shear_mod',
							'Row':'row','Column':'column','Number':'number','MendeleevNumber':'mendeleev','SpaceGroupNumber':'space_group',
							'Density':'density','MolarVolume':'molar_vol',
							'FusionEnthalpy':'H_fus','HeatVaporization':'H_vap',
							'Polarizability':'polarizability',
							'ThermalConductivity':'sigma_therm'}
							
		element_property_labels = list(map(elemental_property_label_map.get,self.ElementProperty.features))
		labels = []
		for attr in element_property_labels:
			for stat in self.ElementProperty.stats:
				if stat=='std_dev':
					stat = 'std'
				labels.append(f'M_{attr}_{stat}')
		for attr in element_property_labels:
			for stat in self.ElementProperty.stats:
				if stat=='std_dev':
					stat = 'std'
				labels.append(f'BCA_{attr}_{stat}')
		return labels
		
	@property
	def ElementProperty_units(self):
		"""
		Generate units for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
		elemental_property_unit_map = {'BoilingT':'temperature','MeltingT':'temperature',
							'BulkModulus':'pressure','ShearModulus':'pressure',
							'Row':'none','Column':'none','Number':'none','MendeleevNumber':'none','SpaceGroupNumber':'none',
							'Density':'density','MolarVolume':'volume',
							'FusionEnthalpy':'energy','HeatVaporization':'energy',
							'Polarizability':'polarizability',
							'ThermalConductivity':'therm'}
							
		element_property_units = list(map(elemental_property_unit_map.get,self.ElementProperty.features))
		units = []
		for ep_unit in element_property_units:
			for stat in self.ElementProperty.stats:
				units.append(ep_unit)
		return units*2
		
	def ElementProperty_label_check(self):
		"""
		Check that ElementProperty feature labels are as expected
		If not, features may not align with feature labels
		"""
		#ElementProperty.feature_labels() code as of 1/24/20
		labels = []
		for attr in self.ElementProperty.features:
			src = self.ElementProperty.data_source.__class__.__name__
			for stat in self.ElementProperty.stats:
				labels.append("{} {} {}".format(src, stat, attr))
		
		if labels!=self.ElementProperty.feature_labels():
			raise Exception('ElementProperty features or labels have changed')
	
	
	@property
	def matminer_labels(self):
		"""
		Feature labels for matminer-derived features
		"""
		labels = [
			#ValenceOrbital labels
			'M_ValenceElec_s_mean',
			'M_ValenceElec_p_mean',
			'M_ValenceElec_d_mean',
			'M_ValenceElec_f_mean',
			'M_ValenceElec_s_frac',
			'M_ValenceElec_p_frac',
			'M_ValenceElec_d_frac',
			'M_ValenceElec_f_frac',
			'M_ValenceElec_tot_mean',
			#AtomicOrbitals labels
			#'M_HOMO_character',
			'M_HOMO_energy',
			#'M_LUMO_character',
			'M_LUMO_energy',
			'M_AO_gap',
			#CohesiveEnergy labels
			'M_cohesive_energy_mean',
			#BandCenter labels
			'M_BandCenter',
			'BCA_BandCenter',
			#ValenceOrbitalEnergy labels
			'M_ValenceEnergy_mean',
			'BCA_ValenceEnergy_mean'
			]
			
		labels += self.ElementProperty_custom_labels
		
		return labels	
	
	@property
	def matminer_units(self):
		"""
		Feature units for matminer-derived features
		"""
		units = [
			#ValenceOrbital units
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			#AtomicOrbitals units
			#'M_HOMO_character',
			'energy',
			#'M_LUMO_character',
			'energy',
			'energy',
			#CohesiveEnergy units
			'energy',
			#BandCenter units
			'energy',
			'energy',
			#ValenceOrbitalEnergy units
			'energy',
			'energy'
			]
			
		units += self.ElementProperty_units
		
		return units
	
	def feature_labels(self):
		bca_feature_labels = list(BCA(mg.Composition('BaO'),self.radius_type,self.normalize_formula).featurize().keys())
		
		return bca_feature_labels + self.matminer_labels
		
	def feature_units(self):
		bca_units = BCA(mg.Composition('BaO')).feature_units()
		
		return bca_units + self.matminer_units
		
	def check_matminer_featurizers(self):
		"""
		Check that features and feature order for matminer featurizers are as expected
		If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels()
		"""
		#verify that matminer feature labels haven't changed
		if self.ValenceOrbital.feature_labels() != ['avg s valence electrons',
											 'avg p valence electrons',
											 'avg d valence electrons',
											 'avg f valence electrons',
											 'frac s valence electrons',
											 'frac p valence electrons',
											 'frac d valence electrons',
											 'frac f valence electrons']:
			raise Exception('ValenceOrbital features or labels have changed')
			
		if self.AtomicOrbitals.feature_labels() != ['HOMO_character',
											 'HOMO_element',
											 'HOMO_energy',
											 'LUMO_character',
											 'LUMO_element',
											 'LUMO_energy',
											 'gap_AO']:
			raise Exception('AtomicOrbitals features or labels have changed')

		if self.CohesiveEnergy.feature_labels() != ['cohesive energy']:
			raise Exception('CohesiveEnergy features or labels have changed')
			
		if self.BandCenter.feature_labels() != ['band center']:
			raise Exception('BandCenter features or labels have changed')
	
		self.ElementProperty_label_check()
		
	def citations(self):
		featurizers = [self.ValenceOrbital, self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy, BCA(mg.Composition('BaO'))]
		return list(np.unique(sum([f.citations() for f in featurizers],[])))
Example #11
0
                      'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSvolume_pa',
                      'SpaceGroupNumber', 'GSbandgap', 'GSmagmom')

#The following features will be created by using matminer package.
featurizer = MultipleFeaturizer([
    SiteStatsFingerprint(CoordinationNumber().from_preset('VoronoiNN'),
                         stats=('mean', 'std_dev', 'minimum', 'maximum')),
    StructuralHeterogeneity(),
    ChemicalOrdering(),
    MaximumPackingEfficiency(),
    SiteStatsFingerprint(
        LocalPropertyDifference(properties=element_properties),
        stats=('mean', 'std_dev', 'minimum', 'maximum', 'range')),
    StructureComposition(Stoichiometry()),
    StructureComposition(ElementProperty.from_preset("magpie")),
    StructureComposition(ValenceOrbital(props=['frac'])),
    StructureComposition(IonProperty(fast=True))
])

#Generate VT based features from the material's crystal lat_params.
feature_data = featurizer.featurize_dataframe(df,
                                              col_id=['structure'],
                                              ignore_errors=True)
#"lat_params","compound possible" and "material_id" are not resonable physical features, so we drop these three columns
feature_data = feature_data.drop(
    ["structure", "compound possible", "material_id"], axis=1)
#write the data into a csv file for later use
feature_data.to_csv("data_delta_e_data.csv", index=False)
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split, KFold, ShuffleSplit
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
Example #12
0
 def feature_labels(self):
     # Since we have more features than just element fractions, append 'fraction' to element symbols for clarity
     element_fraction_features = [e + " fraction" for e in ElementFraction().feature_labels()]
     valence_orbital_features = ValenceOrbital().feature_labels()
     return element_fraction_features+self._element_property_feature_labels+valence_orbital_features