def similarity(_parents, target): featurizer = MultipleFeaturizer([ SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"), StructuralHeterogeneity(), ChemicalOrdering(), MaximumPackingEfficiency(), SiteStatsFingerprint.from_preset( "LocalPropertyDifference_ward-prb-2017"), StructureComposition(Stoichiometry()), StructureComposition(ElementProperty.from_preset("magpie")), StructureComposition(ValenceOrbital(props=["frac"])), StructureComposition(IonProperty(fast=True)), ]) # HACK celery doesn't work with multiprocessing (used by matminer) try: from celery import current_task if current_task: featurizer.set_n_jobs(1) except ImportError: pass x_target = pd.DataFrame.from_records([featurizer.featurize(target)], columns=featurizer.feature_labels()) x_parent = pd.DataFrame.from_records( featurizer.featurize_many(_parents, ignore_errors=True, pbar=False), columns=featurizer.feature_labels(), ) nulls = x_parent[x_parent.isnull().any(axis=1)].index.values x_parent.fillna(100000, inplace=True) x_target = x_target.reindex(sorted(x_target.columns), axis=1) x_parent = x_parent.reindex(sorted(x_parent.columns), axis=1) with open(os.path.join(settings.rxn_files, "scaler2.pickle"), "rb") as f: scaler = pickle.load(f) with open(os.path.join(settings.rxn_files, "quantiles.pickle"), "rb") as f: quantiles = pickle.load(f) X = scaler.transform(x_parent.append(x_target)) D = [pairwise_distances(np.array([row, X[-1]]))[0, 1] for row in X[:-1]] _res = [] for d in D: _res.append(np.linspace(0, 1, 101)[np.abs(quantiles - d).argmin()]) _res = np.array(_res) _res[nulls] = -1 return _res
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer): """ Featurizer presets used for the paper 'Machine learning materials properties for small datasets' by Pierre-Paul De Breuck, Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020). Uses most of the featurizers implemented by matminer at the time of writing with their default hyperparameters and presets. """ from matminer.featurizers.composition import ( AtomicOrbitals, AtomicPackingEfficiency, BandCenter, # CohesiveEnergy, - This descriptor was not used in the paper preset # ElectronAffinity, - This descriptor was not used in the paper preset ElectronegativityDiff, ElementFraction, ElementProperty, IonProperty, Miedema, OxidationStates, Stoichiometry, TMetalFraction, ValenceOrbital, YangSolidSolution, ) from matminer.featurizers.structure import ( # BagofBonds, - This descriptor was not used in the paper preset BondFractions, ChemicalOrdering, CoulombMatrix, DensityFeatures, EwaldEnergy, GlobalSymmetryFeatures, MaximumPackingEfficiency, # PartialRadialDistributionFunction, RadialDistributionFunction, SineCoulombMatrix, StructuralHeterogeneity, XRDPowderPattern, ) from matminer.featurizers.site import ( AGNIFingerprints, AverageBondAngle, AverageBondLength, BondOrientationalParameter, ChemEnvSiteFingerprint, CoordinationNumber, CrystalNNFingerprint, GaussianSymmFunc, GeneralizedRadialDistributionFunction, LocalPropertyDifference, OPSiteFingerprint, VoronoiFingerprint, ) composition_featurizers = ( AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty.from_preset("magpie"), IonProperty(), Miedema(), Stoichiometry(), TMetalFraction(), ValenceOrbital(), YangSolidSolution(), ) oxide_composition_featurizers = ( ElectronegativityDiff(), OxidationStates(), ) structure_featurizers = ( DensityFeatures(), GlobalSymmetryFeatures(), RadialDistributionFunction(), CoulombMatrix(), # PartialRadialDistributionFunction(), SineCoulombMatrix(), EwaldEnergy(), BondFractions(), StructuralHeterogeneity(), MaximumPackingEfficiency(), ChemicalOrdering(), XRDPowderPattern(), # BagofBonds(), ) site_featurizers = ( AGNIFingerprints(), AverageBondAngle(VoronoiNN()), AverageBondLength(VoronoiNN()), BondOrientationalParameter(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), CrystalNNFingerprint.from_preset("ops"), GaussianSymmFunc(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), LocalPropertyDifference(), OPSiteFingerprint(), VoronoiFingerprint(), ) def featurize_composition(self, df): """ Applies the preset composition featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_composition(df) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df['AtomicOrbitals|HOMO_character'] = df[ 'AtomicOrbitals|HOMO_character'].map(_orbitals) df['AtomicOrbitals|LUMO_character'] = df[ 'AtomicOrbitals|LUMO_character'].map(_orbitals) df['AtomicOrbitals|HOMO_element'] = df[ 'AtomicOrbitals|HOMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z) df['AtomicOrbitals|LUMO_element'] = df[ 'AtomicOrbitals|LUMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z) df = df.replace([np.inf, -np.inf, np.nan], 0) return modnet.featurizers.clean_df(df) def featurize_structure(self, df): """ Applies the preset structural featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_structure(df) dist = df[ "RadialDistributionFunction|radial distribution function"].iloc[0][ 'distances'][:50] for i, d in enumerate(dist): _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format( d) df[_rdf_key] = df[ "RadialDistributionFunction|radial distribution function"].apply( lambda x: x['distribution'][i]) df = df.drop("RadialDistributionFunction|radial distribution function", axis=1) _crystal_system = { "cubic": 1, "tetragonal": 2, "orthorombic": 3, "hexagonal": 4, "trigonal": 5, "monoclinic": 6, "triclinic": 7 } def _int_map(x): if x == np.nan: return 0 elif x: return 1 else: return 0 df["GlobalSymmetryFeatures|crystal_system"] = df[ "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system) df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[ "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map) return modnet.featurizers.clean_df(df) def featurize_site(self, df): """ Applies the preset site featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ # rename some features for backwards compatibility with pretrained models aliases = { "GeneralizedRadialDistributionFunction": "GeneralizedRDF", "AGNIFingerprints": "AGNIFingerPrint", "BondOrientationalParameter": "BondOrientationParameter", "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc", } df = super().featurize_site(df, aliases=aliases) df = df.loc[:, (df != 0).any(axis=0)] return modnet.featurizers.clean_df(df)
ignore_errors=True) # -- end F6 # -- start F7 from matminer.featurizers.composition import ElectronAffinity ela_feat = ElectronAffinity() fdf = ela_feat.featurize_dataframe(fdf, col_id='composition', ignore_errors=True) # -- end F7 # -- start F9 from matminer.featurizers.composition import ValenceOrbital vlo_feat = ValenceOrbital() fdf = vlo_feat.featurize_dataframe(fdf, col_id='composition', ignore_errors=True) # -- end F9 # -- start F10 from matminer.featurizers.composition import IonProperty iop_feat = IonProperty() fdf = iop_feat.featurize_dataframe(fdf, col_id='composition', ignore_errors=True) # -- end F10 # -- start F12
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer): from matminer.featurizers.composition import ( AtomicOrbitals, AtomicPackingEfficiency, BandCenter, CohesiveEnergy, ElectronAffinity, ElectronegativityDiff, ElementFraction, ElementProperty, IonProperty, Miedema, OxidationStates, Stoichiometry, TMetalFraction, ValenceOrbital, YangSolidSolution, ) from matminer.featurizers.structure import ( BagofBonds, BondFractions, ChemicalOrdering, CoulombMatrix, DensityFeatures, EwaldEnergy, GlobalSymmetryFeatures, MaximumPackingEfficiency, PartialRadialDistributionFunction, RadialDistributionFunction, SineCoulombMatrix, StructuralHeterogeneity, XRDPowderPattern, ) from matminer.featurizers.site import ( AGNIFingerprints, AverageBondAngle, AverageBondLength, BondOrientationalParameter, ChemEnvSiteFingerprint, CoordinationNumber, CrystalNNFingerprint, GaussianSymmFunc, GeneralizedRadialDistributionFunction, LocalPropertyDifference, OPSiteFingerprint, VoronoiFingerprint, ) from matminer.featurizers.dos import ( DOSFeaturizer, SiteDOS, Hybridization, DosAsymmetry, ) from matminer.featurizers.bandstructure import ( BandFeaturizer, BranchPointEnergy ) composition_featurizers = ( AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty.from_preset("magpie"), IonProperty(), Miedema(), Stoichiometry(), TMetalFraction(), ValenceOrbital(), YangSolidSolution(), ) oxid_composition_featurizers = ( ElectronegativityDiff(), OxidationStates(), ) structure_featurizers = ( DensityFeatures(), GlobalSymmetryFeatures(), RadialDistributionFunction(), CoulombMatrix(), #PartialRadialDistributionFunction(), #Introduces a large amount of features SineCoulombMatrix(), EwaldEnergy(), BondFractions(), StructuralHeterogeneity(), MaximumPackingEfficiency(), ChemicalOrdering(), XRDPowderPattern(), ) site_featurizers = ( AGNIFingerprints(), AverageBondAngle(VoronoiNN()), AverageBondLength(VoronoiNN()), BondOrientationalParameter(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), CrystalNNFingerprint.from_preset("ops"), GaussianSymmFunc(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), LocalPropertyDifference(), OPSiteFingerprint(), VoronoiFingerprint(), ) dos_featurizers = ( DOSFeaturizer(), SiteDOS(), Hybridization() ) band_featurizers = ( BandFeaturizer(), BranchPointEnergy() ) def __init__(self, n_jobs=None): self._n_jobs = n_jobs def featurize_composition(self, df): """Applies the preset composition featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_composition(df) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map( _orbitals ) df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map( _orbitals ) df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) return clean_df(df) def featurize_structure(self, df): """Applies the preset structural featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_structure(df) dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][ "distances" ][:50] for i, d in enumerate(dist): _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format( d ) df[_rdf_key] = df[ "RadialDistributionFunction|radial distribution function" ].apply(lambda x: x["distribution"][i]) df = df.drop("RadialDistributionFunction|radial distribution function", axis=1) _crystal_system = { "cubic": 1, "tetragonal": 2, "orthorombic": 3, "hexagonal": 4, "trigonal": 5, "monoclinic": 6, "triclinic": 7, } def _int_map(x): if x == np.nan: return 0 elif x: return 1 else: return 0 df["GlobalSymmetryFeatures|crystal_system"] = df[ "GlobalSymmetryFeatures|crystal_system" ].map(_crystal_system) df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[ "GlobalSymmetryFeatures|is_centrosymmetric" ].map(_int_map) return clean_df(df) def featurize_dos(self, df): """Applies the presetdos featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_dos(df) hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"] one_hot = pd.get_dummies(df[hotencodeColumns]) df = df.drop(hotencodeColumns, axis = 1).join(one_hot) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["DOSFeaturizer|vbm_character_1"] = df[ "DOSFeaturizer|vbm_character_1" ].map(_orbitals) df["DOSFeaturizer|cbm_character_1"] = df[ "DOSFeaturizer|cbm_character_1" ].map(_orbitals) # Splitting one feature into several floating features # e.g. number;number;number into three columns splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"] for column in splitColumns: try: newColumns = df[column].str.split(";", n = 2, expand = True) for i in range(0,3): df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float) except: continue df = df.drop(splitColumns, axis=1) df = df.drop(["dos"], axis=1) return clean_df(df) def featurize_bandstructure(self, df): """Applies the preset band structure featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_bandstructure(df) def _int_map(x): if str(x) == "False": return 0 elif str(x) == "True": return 1 df["BandFeaturizer|is_gap_direct"] = df[ "BandFeaturizer|is_gap_direct" ].map(_int_map) df = df.drop(["bandstructure"], axis=1) return clean_df(df) def featurize_site(self, df): """Applies the preset site featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ aliases = { "GeneralizedRadialDistributionFunction": "GeneralizedRDF", "AGNIFingerprints": "AGNIFingerPrint", "BondOrientationalParameter": "BondOrientationParameter", "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc", } df = super().featurize_site(df, aliases=aliases) df = df.loc[:, (df != 0).any(axis=0)] return clean_df(df)
class PerovskiteProperty(BaseFeaturizer): """ Class to calculate perovskite features. Includes custom features from the Perovskite class and generic features from ElementProperty, AtomicOrbitals, ValenceOrbital, and CohesiveEnergy matminer featurizers. Options for initializing: ordered_formula_featurizer(): for featurizing ordered formulas cation_site_featurizer(): for featurizing unordered formulas based on user-provided cation site assignments from_preset(): load a preset The class can also be called manually, but be aware that different parameter sets are required for an ordered formula featurizer instance than for a cation site featurizer instance. Parameters: ----------- cation_site: dict of site assignments for cations, i.e. {el:site}. Elements not in cation_site are assumed to be anions on X-site site_ox_lim: dict of oxidation state limits for each site, i.e. {site:[min,max]}. Elements on sites are limited to oxidation states within these limits site_base_ox: dict of base oxidation state for each site, i.e. {site:ox}. Used for determining aliovalent ions and acceptor/donor dopants ordered_formulas: if True, determine cation site assignments from order A_site_occupancy: Number of atoms on A site. Used when ordered_formulas is True anions: list of anions. Used when ordered_formulas is True Parameters for ordered formula featurizer: site_ox_lim, site_base_ox, A_site_occupancy, anions Parameters for cation site featurizer: cation_site, site_ox_lim, site_base_ox """ def __init__(self, cation_site=None, site_ox_lim={ 'A': [0, 10], 'B': [0, 10], 'X': [-10, 0] }, site_base_ox={ 'A': 2, 'B': 4, 'X': -2 }, ordered_formulas=False, A_site_occupancy=1, anions=None): if cation_site is None and ordered_formulas is False: raise ValueError( 'Either cation sites must be assigned, or formulas must be ordered. Otherwise site assignments can not be determined' ) self.cation_site = cation_site self.site_ox_lim = site_ox_lim self.site_base_ox = site_base_ox self.ordered_formulas = ordered_formulas self.A_site_occupancy = A_site_occupancy self.anions = anions #matminer featurizers self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() #custom ElementProperty featurizer elemental_properties = [ 'BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber', 'Density', 'MolarVolume', 'FusionEnthalpy', 'HeatVaporization', 'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'Polarizability', 'ThermalConductivity' ] self.ElementProperty = ElementProperty( data_source='magpie', features=elemental_properties, stats=["mean", "std_dev", "range"]) self.check_matminer_featurizers() self.featurize_options = {} @classmethod def from_preset(cls, preset_name): """ Initialize from preset Parameters: ----------- preset_name: name of preset to load. Currently accepts 'BCFZY' """ if preset_name == 'BCFZY': #Ba(Co,Fe,Zr,Y)O_3-d system cation_site = { 'Ba': 'A', 'Co': 'B', 'Fe': 'B', 'Zr': 'B', 'Y': 'B' } site_ox_lim = {'A': [2, 2], 'B': [2, 4], 'X': [-2, -2]} site_base_ox = {'A': 2, 'B': 4, 'X': -2} else: raise ValueError("Invalid preset_name specified!") return cls(cation_site, site_ox_lim, site_base_ox) @classmethod def ordered_formula_featurizer(cls, A_site_occupancy=1, anions=None, site_ox_lim={ 'A': [0, 10], 'B': [0, 10], 'X': [-10, 0] }, site_base_ox={ 'A': 2, 'B': 4, 'X': -2 }): """ Convenience method for instantiating a featurizer for ordered formulas """ return cls(cation_site=None, site_ox_lim=site_ox_lim, site_base_ox=site_base_ox, ordered_formulas=True, A_site_occupancy=A_site_occupancy, anions=anions) @classmethod def cation_site_featurizer(cls, cation_site, site_ox_lim={ 'A': [0, 10], 'B': [0, 10], 'X': [-10, 0] }, site_base_ox={ 'A': 2, 'B': 4, 'X': -2 }): """ Convenience method for instantiating a featurizer for unordered formulas, based on site assignments """ return cls(cation_site, site_ox_lim, site_base_ox) @property def ElementProperty_custom_labels(self): """ Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class """ elemental_property_label_map = { 'BoilingT': 'boil_temp', 'MeltingT': 'melt_temp', 'BulkModulus': 'bulk_mod', 'ShearModulus': 'shear_mod', 'Row': 'row', 'Column': 'column', 'Number': 'number', 'MendeleevNumber': 'mendeleev', 'SpaceGroupNumber': 'space_group', 'Density': 'density', 'MolarVolume': 'molar_vol', 'FusionEnthalpy': 'H_fus', 'HeatVaporization': 'H_vap', 'NsUnfilled': 'valence_unfilled_s', 'NpUnfilled': 'valence_unfilled_p', 'NdUnfilled': 'valence_unfilled_d', 'NfUnfilled': 'valence_unfilled_f', 'Polarizability': 'polarizability', 'ThermalConductivity': 'sigma_therm' } element_property_labels = list( map(elemental_property_label_map.get, self.ElementProperty.features)) labels = [] for attr in element_property_labels: for stat in self.ElementProperty.stats: if stat == 'std_dev': stat = 'std' labels.append(f'{attr}_{stat}') return labels @property def ElementProperty_categories(self): """ Generate categories for ElementProperty featurizer """ elemental_property_category_map = { 'BoilingT': 'elemental', 'MeltingT': 'elemental', 'BulkModulus': 'elemental', 'ShearModulus': 'elemental', 'Row': 'periodic', 'Column': 'periodic', 'Number': 'periodic', 'MendeleevNumber': 'periodic', 'SpaceGroupNumber': 'periodic', 'Density': 'elemental', 'MolarVolume': 'elemental', 'FusionEnthalpy': 'elemental', 'HeatVaporization': 'elemental', 'NsUnfilled': 'electronic', 'NpUnfilled': 'electronic', 'NdUnfilled': 'electronic', 'NfUnfilled': 'electronic', 'Polarizability': 'elemental', 'ThermalConductivity': 'elemental' } element_property_categories = list( map(elemental_property_category_map.get, self.ElementProperty.features)) categories = [] for ep_cat in element_property_categories: for stat in self.ElementProperty.stats: categories.append(ep_cat) return categories @property def ElementProperty_units(self): """ Generate units for ElementProperty featurizer """ elemental_property_unit_map = { 'BoilingT': 'temp', 'MeltingT': 'temp', 'BulkModulus': 'pressure', 'ShearModulus': 'pressure', 'Row': 'none', 'Column': 'none', 'Number': 'none', 'MendeleevNumber': 'none', 'SpaceGroupNumber': 'none', 'Density': 'density', 'MolarVolume': 'volume', 'FusionEnthalpy': 'energy', 'HeatVaporization': 'energy', 'NsUnfilled': 'none', 'NpUnfilled': 'none', 'NdUnfilled': 'none', 'NfUnfilled': 'none', 'Polarizability': 'polarizability', #complex units - doesn't matter 'ThermalConductivity': 'therm' } #complex units - doesn't matter element_property_units = list( map(elemental_property_unit_map.get, self.ElementProperty.features)) units = [] for ep_unit in element_property_units: for stat in self.ElementProperty.stats: units.append(ep_unit) return units def ElementProperty_label_check(self): """ Check that ElementProperty feature labels are as expected If not, features may not align with feature labels """ #ElementProperty.feature_labels() code as of 2/17/19 labels = [] for attr in self.ElementProperty.features: src = self.ElementProperty.data_source.__class__.__name__ for stat in self.ElementProperty.stats: labels.append("{} {} {}".format(src, stat, attr)) if labels != self.ElementProperty.feature_labels(): raise Exception('ElementProperty features or labels have changed') def set_featurize_options( self, sites, ox_stats=['min', 'max', 'mean', 'median', 'std', 'range'], ep_stats=["mean", "std_dev", "range"], radius_type='ionic_radius', normalize_formula=True, silent=True, categories=None): """ Set options for featurization. Since these options should be the same for all compositions in a batch, set for the featurizer instance rather than passing as args to featurize() so that they do not have to be duplicated in every row of a DataFrame when calling featurize_dataframe(). Since these options change the number and meaning of features returned, it's also safest to set for the whole instance for consistency. Parameters: ----------- sites: list or string of sites to featurize. Any combination of 'A', 'B', 'X', and/or 'comp' accepted. Composition-level, oxidation-state-dependent features are always calculated by the Perovskite class. Passing '' or [] will return only these features. Specifying 'A','B', and/or 'X' sites will calculate site-level features for these sites (oxidation-state independent and dependent features, and matminer features). Including 'comp' will calculate oxidation-state-independent features and matminer features for the full composition. ox_stats: list of aggregate functions to apply to oxidation state combinations for feature generation using Perovskite class. Options: 'min','max','mean','median','std','range' ep_stats: ElementProperty stats. Options: "minimum", "maximum", "range", "mean", "avg_dev", "mode" radius_type: Shannon radius type to use in features. Accepts 'crystal_radius' or 'ionic_radius' normalize_formula: if True, normalize formula such that higher occupancy cation site has one formula unit (applies to Perovskite class only) silent: if False, print informational messages from Perovksite class categories: list of feature categories to return. If None, return all. Options: 'bonding','structure','charge','composition','electronic','elemental','periodic' """ feat_options = dict(sites=sites, ox_stats=ox_stats, radius_type=radius_type, normalize_formula=normalize_formula, silent=silent) self.featurize_options.update(feat_options) self.ElementProperty.stats = ep_stats def featurize(self, formula): """ Calculate features Parameters: ----------- formula: chemical formula string Returns: list of feature values """ if self.featurize_options == {}: raise Exception( 'Featurize options have not been set. Use set_featurize_options before featurizing' ) if self.ordered_formulas is True: pvsk = Perovskite.from_ordered_formula( formula, self.A_site_occupancy, self.anions, site_ox_lim=self.site_ox_lim, site_base_ox=self.site_base_ox, radius_type=self.featurize_options['radius_type'], silent=self.featurize_options['silent']) elif self.ordered_formulas is False: pvsk = Perovskite(formula, self.cation_site, self.site_ox_lim, self.site_base_ox, self.featurize_options['radius_type'], self.featurize_options['normalize_formula'], self.featurize_options['silent']) pvsk_features = pvsk.featurize(self.featurize_options['sites'], self.featurize_options['ox_stats']) mm_features = [] for site in self.featurize_options['sites']: vo_features = self.ValenceOrbital.featurize( pvsk.site_composition[site] ) #avg and frac s, p , d, f electrons vo_features += [sum(vo_features[0:3]) ] #avg total valence electrons ao_features = self.AtomicOrbitals.featurize( pvsk.site_composition[site] ) #H**O and LUMO character and energy levels (from atomic orbitals) ao_features = [ ao_features[i] for i in range(len(ao_features)) if i not in (0, 1, 3, 4) ] #exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals ce_features = self.CohesiveEnergy.featurize( pvsk.site_composition[site], formation_energy_per_atom=1e-10 ) #avg elemental cohesive energy ep_features = self.ElementProperty.featurize( pvsk.site_composition[site]) #elemental property features mm_features += vo_features + ao_features + ce_features + ep_features features = list(pvsk_features) + mm_features return features @property def matminer_labels(self): """ Feature labels for matminer-derived features """ labels = [ #ValenceOrbital labels 'valence_elec_s_mean', 'valence_elec_p_mean', 'valence_elec_d_mean', 'valence_elec_f_mean', 'valence_elec_s_frac', 'valence_elec_p_frac', 'valence_elec_d_frac', 'valence_elec_f_frac', 'valence_elec_tot_mean', #AtomicOrbitals labels #'HOMO_character', 'HOMO_energy', #'LUMO_character', 'LUMO_energy', 'AO_gap', #CohesiveEnergy labels 'cohesive_energy_mean' ] #ElementProperty labels labels += self.ElementProperty_custom_labels return labels @property def matminer_categories(self): """ Feature categories for matminer-derived features """ categories = [ #ValenceOrbital categories 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', #AtomicOrbitals categories #'HOMO_character', 'electronic', #'LUMO_character', 'electronic', 'electronic', #CohesiveEnergy categories 'bonding' ] #ElementProperty categories categories += self.ElementProperty_categories return categories @property def matminer_units(self): """ Feature units for matminer-derived features """ units = [ #ValenceOrbital units 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', #AtomicOrbitals units #'HOMO_character', 'energy', #'LUMO_character', 'energy', 'energy', #CohesiveEnergy units 'energy' ] #ElementProperty units units += self.ElementProperty_units return units def feature_labels(self): """ Get list of feature labels """ try: pvsk_labels = Perovskite.from_preset( 'BaCoO3', 'BCFZY', silent=True).feature_labels(self.featurize_options['sites'], self.featurize_options['ox_stats']) except KeyError: raise Exception( 'Featurize options have not been set. Use set_featurize_options before accessing feature labels' ) mm_labels = [] for site in self.featurize_options['sites']: if site == 'comp': site_label = 'comp' else: site_label = f'{site}site' mm_labels += [ f'{site_label}_{label}' for label in self.matminer_labels ] return pvsk_labels + mm_labels def feature_categories(self): """ Get list of feature categories. For quick filtering """ try: pvsk_categories = Perovskite.from_preset( 'BaCoO3', 'BCFZY', silent=True).feature_categories( self.featurize_options['sites'], self.featurize_options['ox_stats']) except KeyError: raise Exception( 'Featurize options have not been set. Use set_featurize_options before accessing feature labels' ) mm_categories = [] for site in self.featurize_options['sites']: mm_categories += self.matminer_categories return pvsk_categories + mm_categories def feature_units(self): """ Get list of feature labels. For dimensional analysis """ try: pvsk_units = Perovskite.from_preset( 'BaCoO3', 'BCFZY', silent=True).feature_units(self.featurize_options['sites'], self.featurize_options['ox_stats']) except KeyError: raise Exception( 'Featurize options have not been set. Use set_featurize_options before accessing feature labels' ) mm_units = [] for site in self.featurize_options['sites']: mm_units += self.matminer_units return pvsk_units + mm_units def check_matminer_featurizers(self): """ Check that features and feature order for matminer featurizers are as expected If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels() """ #verify that matminer feature labels haven't changed if self.ValenceOrbital.feature_labels() != [ 'avg s valence electrons', 'avg p valence electrons', 'avg d valence electrons', 'avg f valence electrons', 'frac s valence electrons', 'frac p valence electrons', 'frac d valence electrons', 'frac f valence electrons' ]: raise Exception('ValenceOrbital features or labels have changed') if self.AtomicOrbitals.feature_labels() != [ 'HOMO_character', 'HOMO_element', 'HOMO_energy', 'LUMO_character', 'LUMO_element', 'LUMO_energy', 'gap_AO' ]: raise Exception('AtomicOrbitals features or labels have changed') if self.CohesiveEnergy.feature_labels() != ['cohesive energy']: raise Exception('CohesiveEnergy features or labels have changed') self.ElementProperty_label_check()
def predict_log10_eps( target: Union[Structure, Composition], dielectric_type: str, model_type: str, ) -> float: """ :param target: structure or composition to predict dielectric constants :param dielectric_type: "el" or "ion" :param model_type: "comp" or "comp_st" :return: Descriptor vector """ if dielectric_type not in ["el", "ion"]: raise ValueError( f'Specify dielectric type "el" or "ion"\nInput: {dielectric_type}') if model_type not in ["comp", "comp_st"]: raise ValueError( f'Specify regression_type "comp" or "comp_st"\nInput: {model_type}' ) if model_type == "comp": if isinstance(target, Structure): comp = target.composition else: comp = target comp_oxi = comp.add_charges_from_oxi_state_guesses() if dielectric_type == "el": ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"), comp) valence = ScalarFeaturizer(ValenceOrbital(), comp) ion_prop = ScalarFeaturizer(IonProperty(), comp) en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi) oxi_state = ScalarFeaturizer(OxidationStates.from_preset("deml"), comp_oxi) atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp) descriptor = [ ep.get_from_label("PymatgenData minimum X"), ep.get_from_label("PymatgenData range X"), ep.get_from_label("PymatgenData std_dev X"), ep.get_from_label("PymatgenData mean row"), ep.get_from_label("PymatgenData std_dev row"), ep.get_from_label("PymatgenData mean group"), ep.get_from_label("PymatgenData mean block"), ep.get_from_label("PymatgenData std_dev block"), ep.get_from_label("PymatgenData mean atomic_mass"), ep.get_from_label("PymatgenData std_dev atomic_mass"), ep.get_from_label("PymatgenData std_dev atomic_radius"), ep.get_from_label("PymatgenData minimum mendeleev_no"), ep.get_from_label("PymatgenData range mendeleev_no"), ep.get_from_label("PymatgenData std_dev mendeleev_no"), ep.get_from_label("PymatgenData mean thermal_conductivity"), ep.get_from_label("PymatgenData std_dev thermal_conductivity"), ep.get_from_label("PymatgenData mean melting_point"), ep.get_from_label("PymatgenData std_dev melting_point"), valence.get_from_label("avg s valence electrons"), valence.get_from_label("avg d valence electrons"), valence.get_from_label("frac s valence electrons"), valence.get_from_label("frac p valence electrons"), valence.get_from_label("frac d valence electrons"), ion_prop.get_from_label("avg ionic char"), TMetalFraction().featurize(comp)[0], en_diff.get_from_label("maximum EN difference"), en_diff.get_from_label("range EN difference"), en_diff.get_from_label("mean EN difference"), en_diff.get_from_label("std_dev EN difference"), BandCenter().featurize(comp)[0], oxi_state.get_from_label("std_dev oxidation state"), atomic_orbital.get_from_label("HOMO_energy"), atomic_orbital.get_from_label("LUMO_energy"), atomic_orbital.get_from_label("gap_AO"), ] elif dielectric_type == "ion": stoich = ScalarFeaturizer(Stoichiometry(), comp) ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"), comp) valence = ScalarFeaturizer(ValenceOrbital(), comp) ion_prop = ScalarFeaturizer(IonProperty(), comp) en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi) oxi_state = ScalarFeaturizer(OxidationStates.from_preset("deml"), comp_oxi) atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp) at_pack_eff = ScalarFeaturizer(AtomicPackingEfficiency(), comp) descriptor = [ stoich.get_from_label("3-norm"), stoich.get_from_label("5-norm"), ep.get_from_label("PymatgenData mean X"), ep.get_from_label("PymatgenData mean row"), ep.get_from_label("PymatgenData std_dev row"), ep.get_from_label("PymatgenData std_dev group"), ep.get_from_label("PymatgenData mean block"), ep.get_from_label("PymatgenData std_dev block"), ep.get_from_label("PymatgenData maximum atomic_mass"), ep.get_from_label("PymatgenData range atomic_mass"), ep.get_from_label("PymatgenData mean atomic_mass"), ep.get_from_label("PymatgenData std_dev atomic_mass"), ep.get_from_label("PymatgenData maximum atomic_radius"), ep.get_from_label("PymatgenData range atomic_radius"), ep.get_from_label("PymatgenData mean atomic_radius"), ep.get_from_label("PymatgenData std_dev atomic_radius"), ep.get_from_label("PymatgenData minimum mendeleev_no"), ep.get_from_label("PymatgenData mean mendeleev_no"), ep.get_from_label("PymatgenData std_dev mendeleev_no"), ep.get_from_label("PymatgenData mean thermal_conductivity"), ep.get_from_label("PymatgenData std_dev thermal_conductivity"), ep.get_from_label("PymatgenData mean melting_point"), ep.get_from_label("PymatgenData std_dev melting_point"), valence.get_from_label("avg s valence electrons"), valence.get_from_label("frac s valence electrons"), valence.get_from_label("frac p valence electrons"), valence.get_from_label("frac d valence electrons"), ion_prop.get_from_label("avg ionic char"), TMetalFraction().featurize(comp)[0], en_diff.get_from_label("minimum EN difference"), en_diff.get_from_label("range EN difference"), en_diff.get_from_label("mean EN difference"), en_diff.get_from_label("std_dev EN difference"), oxi_state.get_from_label("range oxidation state"), oxi_state.get_from_label("std_dev oxidation state"), atomic_orbital.get_from_label("LUMO_energy"), atomic_orbital.get_from_label("gap_AO"), at_pack_eff.get_from_label("mean simul. packing efficiency"), at_pack_eff.get_from_label( "mean abs simul. packing efficiency"), at_pack_eff.get_from_label( "dist from 1 clusters |APE| < 0.010"), at_pack_eff.get_from_label( "dist from 3 clusters |APE| < 0.010"), at_pack_eff.get_from_label( "dist from 5 clusters |APE| < 0.010"), ] elif model_type == "comp_st": if isinstance(target, Composition): raise ValueError( 'comp_st (Using compositional and structural descriptor) is specified, ' 'but target is composition') comp: Composition = target.composition comp_oxi = comp.add_charges_from_oxi_state_guesses() target_orig = deepcopy(target) target.add_oxidation_state_by_guess() if dielectric_type == "el": ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"), comp) valence = ScalarFeaturizer(ValenceOrbital(), comp) en_diff = ScalarFeaturizer(ElectronegativityDiff(), comp_oxi) atomic_orbital = ScalarFeaturizer(AtomicOrbitals(), comp) density = ScalarFeaturizer(DensityFeatures(), target) dist_btw_nn = MinimumRelativeDistances().featurize(target_orig) opsf = SiteFeaturizer(OPSiteFingerprint(), target) voro_fp = SiteFeaturizer(VoronoiFingerprint(use_symm_weights=True), target) gsf = SiteFeaturizer(GaussianSymmFunc(), target) lpd = SiteFeaturizer( LocalPropertyDifference.from_preset("ward-prb-2017"), target) descriptor = [ ep.get_from_label("PymatgenData std_dev X"), ep.get_from_label("PymatgenData mean block"), ep.get_from_label("PymatgenData std_dev atomic_mass"), valence.get_from_label("frac d valence electrons"), TMetalFraction().featurize(comp)[0], en_diff.get_from_label("maximum EN difference"), en_diff.get_from_label("mean EN difference"), atomic_orbital.get_from_label("HOMO_energy"), atomic_orbital.get_from_label("LUMO_energy"), density.get_from_label("density"), np.mean(dist_btw_nn), np.std(dist_btw_nn), opsf.get_from_label_func("tetrahedral CN_4", np.max), opsf.get_from_label_func("rectangular see-saw-like CN_4", np.max), np.max([ EwaldSiteEnergy(accuracy=4).featurize(target, i) for i in range(target.num_sites) ]), voro_fp.get_from_label_func("Voro_area_std_dev", np.max), voro_fp.get_from_label_func("Voro_area_std_dev", np.mean), voro_fp.get_from_label_func("Voro_dist_minimum", np.min), voro_fp.get_from_label_func("Voro_dist_minimum", np.std), gsf.get_from_label_func("G2_20.0", np.std), gsf.get_from_label_func("G2_80.0", np.max), gsf.get_from_label_func("G4_0.005_4.0_-1.0", np.mean), lpd.get_from_label_func("local difference in NdValence", np.mean), lpd.get_from_label_func("local difference in NValence", np.min), lpd.get_from_label_func("local difference in NValence", np.std), lpd.get_from_label_func("local difference in NdUnfilled", np.mean), lpd.get_from_label_func("local difference in NUnfilled", np.min), lpd.get_from_label_func("local difference in NUnfilled", np.mean), lpd.get_from_label_func("local difference in GSmagmom", np.mean) ] elif dielectric_type == "ion": ep = ScalarFeaturizer(ElementProperty.from_preset("matminer"), comp) atomic_orbitals = ScalarFeaturizer(AtomicOrbitals(), comp) density = ScalarFeaturizer(DensityFeatures(), target) str_het = ScalarFeaturizer(StructuralHeterogeneity(), target) opsf = SiteFeaturizer(OPSiteFingerprint(), target) voro_fp = SiteFeaturizer(VoronoiFingerprint(use_symm_weights=True), target) gsf = SiteFeaturizer(GaussianSymmFunc(), target) lpd = SiteFeaturizer( LocalPropertyDifference.from_preset("ward-prb-2017"), target) descriptor = [ ep.get_from_label("PymatgenData std_dev row"), ep.get_from_label("PymatgenData mean thermal_conductivity"), ep.get_from_label("PymatgenData std_dev melting_point"), TMetalFraction().featurize(comp)[0], atomic_orbitals.get_from_label("gap_AO"), density.get_from_label("density"), density.get_from_label("packing fraction"), str_het.get_from_label("mean neighbor distance variation"), str_het.get_from_label("avg_dev neighbor distance variation"), opsf.get_from_label_func("sgl_bd CN_1", np.mean), opsf.get_from_label_func("bent 150 degrees CN_2", np.mean), opsf.get_from_label_func("linear CN_2", np.mean), opsf.get_from_label_func("trigonal planar CN_3", np.mean), opsf.get_from_label_func("pentagonal planar CN_5", np.std), opsf.get_from_label_func("octahedral CN_6", np.max), opsf.get_from_label_func("octahedral CN_6", np.std), opsf.get_from_label_func("q6 CN_12", np.mean), np.max([ EwaldSiteEnergy(accuracy=4).featurize(target, i) for i in range(target.num_sites) ]), voro_fp.get_from_label_func("Symmetry_weighted_index_4", np.std), voro_fp.get_from_label_func("Voro_vol_maximum", np.mean), voro_fp.get_from_label_func("Voro_area_std_dev", np.mean), voro_fp.get_from_label_func("Voro_area_minimum", np.std), voro_fp.get_from_label_func("Voro_area_maximum", np.min), voro_fp.get_from_label_func("Voro_dist_std_dev", np.mean), gsf.get_from_label_func("G2_80.0", np.min), gsf.get_from_label_func("G4_0.005_4.0_1.0", np.std), lpd.get_from_label_func("local difference in Number", np.max), lpd.get_from_label_func("local difference in MendeleevNumber", np.max), lpd.get_from_label_func("local difference in MendeleevNumber", np.min), lpd.get_from_label_func("local difference in AtomicWeight", np.max), lpd.get_from_label_func("local difference in AtomicWeight", np.mean), lpd.get_from_label_func("local difference in MeltingT", np.mean), lpd.get_from_label_func("local difference in Row", np.max), lpd.get_from_label_func( "local difference in Electronegativity", np.min), lpd.get_from_label_func("local difference in NValence", np.std), lpd.get_from_label_func("local difference in NsUnfilled", np.mean), lpd.get_from_label_func("local difference in NdUnfilled", np.max), lpd.get_from_label_func("local difference in NdUnfilled", np.std), lpd.get_from_label_func("local difference in NUnfilled", np.max), lpd.get_from_label_func("local difference in NUnfilled", np.min), lpd.get_from_label_func("local difference in NUnfilled", np.mean), lpd.get_from_label_func("local difference in NUnfilled", np.std), lpd.get_from_label_func("local difference in GSvolume_pa", np.max), lpd.get_from_label_func("local difference in GSvolume_pa", np.min), lpd.get_from_label_func("local difference in SpaceGroupNumber", np.max), ] with open( f"{os.path.dirname(__file__)}/{dielectric_type}_{model_type}.joblib", "rb") as fr: model: RandomForestRegressor = joblib.load(fr) with open( f"{os.path.dirname(__file__)}/{dielectric_type}_{model_type}_scaler.joblib", "rb") as fr: scaler: StandardScaler = joblib.load(fr) descriptor = scaler.transform([descriptor]) return model.predict(descriptor)[0]
def featurize_structures(self, featurizer=None, **kwargs): """ Featurizes the hypothetical structures available from hypo_structures method. Hypothetical structures for which featurization fails is removed and valid structures are made available as valid_structures Args: featurizer (Featurizer): A MatMiner Featurizer. Defaults to MultipleFeaturizer with PRB Ward Voronoi descriptors. **kwargs (dict): kwargs passed to featurize_many method of featurizer. Returns: pandas.DataFrame: features """ # Note the redundancy here is for pandas to work if self.hypo_structures is None: warnings.warn("No structures available. Generating structures.") self.get_structures() print("Generating features") featurizer = featurizer if featurizer else MultipleFeaturizer([ SiteStatsFingerprint.from_preset( "CoordinationNumber_ward-prb-2017"), StructuralHeterogeneity(), ChemicalOrdering(), MaximumPackingEfficiency(), SiteStatsFingerprint.from_preset( "LocalPropertyDifference_ward-prb-2017"), StructureComposition(Stoichiometry()), StructureComposition(ElementProperty.from_preset("magpie")), StructureComposition(ValenceOrbital(props=['frac'])), StructureComposition(IonProperty(fast=True)) ]) features = featurizer.featurize_many( self.hypo_structures['pmg_structures'], ignore_errors=True, **kwargs) n_species, formula = [], [] for s in self.hypo_structures['pmg_structures']: n_species.append(len(s.composition.elements)) formula.append(s.composition.formula) self._features_df = pd.DataFrame.from_records( features, columns=featurizer.feature_labels()) self._features_df.index = self.hypo_structures.index self._features_df['N_species'] = n_species self._features_df['Composition'] = formula self.features = self._features_df.dropna(axis=0, how='any') self.features = self.features.reindex(sorted(self.features.columns), axis=1) self._valid_structure_labels = list(self.features.index) self.valid_structures = self.hypo_structures.loc[ self._valid_structure_labels] print("{} out of {} structures were successfully featurized.".format( self.features.shape[0], self._features_df.shape[0])) return self.features
def AddFeatures(df): # Add features by Matminer from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, "formula") from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe( df, col_id="composition" ) # input the "composition" column to the featurizer from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.composition import ElectronAffinity ea_feat = ElectronAffinity() df = ea_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import BandCenter bc_feat = BandCenter() df = bc_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import CohesiveEnergy ce_feat = CohesiveEnergy() df = ce_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import Miedema m_feat = Miedema() df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import TMetalFraction tmf_feat = TMetalFraction() df = tmf_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import ValenceOrbital vo_feat = ValenceOrbital() df = vo_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import YangSolidSolution yss_feat = YangSolidSolution() df = yss_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.structure import GlobalSymmetryFeatures # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features. gsf_feat = GlobalSymmetryFeatures() df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralComplexity sc_feat = StructuralComplexity() df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import ChemicalOrdering co_feat = ChemicalOrdering() df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MaximumPackingEfficiency mpe_feat = MaximumPackingEfficiency() df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MinimumRelativeDistances mrd_feat = MinimumRelativeDistances() df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralHeterogeneity sh_feat = StructuralHeterogeneity() df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import SiteStatsFingerprint from matminer.featurizers.site import AverageBondLength from pymatgen.analysis.local_env import CrystalNN bl_feat = SiteStatsFingerprint( AverageBondLength(CrystalNN(search_cutoff=20))) df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import AverageBondAngle ba_feat = SiteStatsFingerprint( AverageBondAngle(CrystalNN(search_cutoff=20))) df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import BondOrientationalParameter bop_feat = SiteStatsFingerprint(BondOrientationalParameter()) df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import CoordinationNumber cn_feat = SiteStatsFingerprint(CoordinationNumber()) df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True) return (df)
OxidCompositions = CompositionToOxidComposition() print(OxidCompositions.feature_labels()) df = OxidCompositions.featurize_dataframe(df, 'composition') #CohesiveEnergy from matminer.featurizers.composition import CohesiveEnergy cohesive_energy = CohesiveEnergy() cohesive_energy.set_n_jobs(28) labels.append(cohesive_energy.feature_labels()) df = cohesive_energy.featurize_dataframe(df, 'composition', ignore_errors=True) #ValenceOrbital from matminer.featurizers.composition import ValenceOrbital valence_orbital = ValenceOrbital() valence_orbital.set_n_jobs(28) labels.append(valence_orbital.feature_labels()) df = valence_orbital.featurize_dataframe(df, 'composition', ignore_errors=True) #AtomicOrbital from matminer.featurizers.composition import AtomicOrbitals atomic_orbitals = AtomicOrbitals() atomic_orbitals.set_n_jobs(28) labels.append(atomic_orbitals.feature_labels()) df = atomic_orbitals.featurize_dataframe(df, 'composition', ignore_errors=True)
class BCA_Featurizer(BaseFeaturizer): def __init__(self,radius_type='ionic_radius',normalize_formula=False): self.radius_type = radius_type self.normalize_formula = normalize_formula self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() self.BandCenter = BandCenter() self.ValenceOrbitalEnergy = ValenceOrbitalEnergy() #custom ElementProperty featurizer elemental_properties = ['BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber', 'Density','MolarVolume', 'FusionEnthalpy','HeatVaporization', 'Polarizability', 'ThermalConductivity'] self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties, stats=["mean", "std_dev"]) #check matminer featurizers self.check_matminer_featurizers() def featurize(self,composition): bca = BCA(composition,self.radius_type,self.normalize_formula) bca_features = bca.featurize() vo_features = self.ValenceOrbital.featurize(bca.metal_composition) #avg and frac s, p , d, f electrons for metals vo_features += [sum(vo_features[0:3])] #avg total valence electrons for metals ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) #H**O and LUMO character and energy levels for metals from atomic orbitals) ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)]#exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) #avg metal elemental cohesive energy bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition) ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition) ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition) mm_features = vo_features + ao_features + ce_features + bc_features + ve_features + ep_features return list(bca_features.values()) + mm_features @property def ElementProperty_custom_labels(self): """ Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class """ elemental_property_label_map = {'BoilingT':'boil_temp','MeltingT':'melt_temp', 'BulkModulus':'bulk_mod','ShearModulus':'shear_mod', 'Row':'row','Column':'column','Number':'number','MendeleevNumber':'mendeleev','SpaceGroupNumber':'space_group', 'Density':'density','MolarVolume':'molar_vol', 'FusionEnthalpy':'H_fus','HeatVaporization':'H_vap', 'Polarizability':'polarizability', 'ThermalConductivity':'sigma_therm'} element_property_labels = list(map(elemental_property_label_map.get,self.ElementProperty.features)) labels = [] for attr in element_property_labels: for stat in self.ElementProperty.stats: if stat=='std_dev': stat = 'std' labels.append(f'M_{attr}_{stat}') for attr in element_property_labels: for stat in self.ElementProperty.stats: if stat=='std_dev': stat = 'std' labels.append(f'BCA_{attr}_{stat}') return labels @property def ElementProperty_units(self): """ Generate units for ElementProperty featurizer that follow same naming convention as Perovskite class """ elemental_property_unit_map = {'BoilingT':'temperature','MeltingT':'temperature', 'BulkModulus':'pressure','ShearModulus':'pressure', 'Row':'none','Column':'none','Number':'none','MendeleevNumber':'none','SpaceGroupNumber':'none', 'Density':'density','MolarVolume':'volume', 'FusionEnthalpy':'energy','HeatVaporization':'energy', 'Polarizability':'polarizability', 'ThermalConductivity':'therm'} element_property_units = list(map(elemental_property_unit_map.get,self.ElementProperty.features)) units = [] for ep_unit in element_property_units: for stat in self.ElementProperty.stats: units.append(ep_unit) return units*2 def ElementProperty_label_check(self): """ Check that ElementProperty feature labels are as expected If not, features may not align with feature labels """ #ElementProperty.feature_labels() code as of 1/24/20 labels = [] for attr in self.ElementProperty.features: src = self.ElementProperty.data_source.__class__.__name__ for stat in self.ElementProperty.stats: labels.append("{} {} {}".format(src, stat, attr)) if labels!=self.ElementProperty.feature_labels(): raise Exception('ElementProperty features or labels have changed') @property def matminer_labels(self): """ Feature labels for matminer-derived features """ labels = [ #ValenceOrbital labels 'M_ValenceElec_s_mean', 'M_ValenceElec_p_mean', 'M_ValenceElec_d_mean', 'M_ValenceElec_f_mean', 'M_ValenceElec_s_frac', 'M_ValenceElec_p_frac', 'M_ValenceElec_d_frac', 'M_ValenceElec_f_frac', 'M_ValenceElec_tot_mean', #AtomicOrbitals labels #'M_HOMO_character', 'M_HOMO_energy', #'M_LUMO_character', 'M_LUMO_energy', 'M_AO_gap', #CohesiveEnergy labels 'M_cohesive_energy_mean', #BandCenter labels 'M_BandCenter', 'BCA_BandCenter', #ValenceOrbitalEnergy labels 'M_ValenceEnergy_mean', 'BCA_ValenceEnergy_mean' ] labels += self.ElementProperty_custom_labels return labels @property def matminer_units(self): """ Feature units for matminer-derived features """ units = [ #ValenceOrbital units 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', #AtomicOrbitals units #'M_HOMO_character', 'energy', #'M_LUMO_character', 'energy', 'energy', #CohesiveEnergy units 'energy', #BandCenter units 'energy', 'energy', #ValenceOrbitalEnergy units 'energy', 'energy' ] units += self.ElementProperty_units return units def feature_labels(self): bca_feature_labels = list(BCA(mg.Composition('BaO'),self.radius_type,self.normalize_formula).featurize().keys()) return bca_feature_labels + self.matminer_labels def feature_units(self): bca_units = BCA(mg.Composition('BaO')).feature_units() return bca_units + self.matminer_units def check_matminer_featurizers(self): """ Check that features and feature order for matminer featurizers are as expected If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels() """ #verify that matminer feature labels haven't changed if self.ValenceOrbital.feature_labels() != ['avg s valence electrons', 'avg p valence electrons', 'avg d valence electrons', 'avg f valence electrons', 'frac s valence electrons', 'frac p valence electrons', 'frac d valence electrons', 'frac f valence electrons']: raise Exception('ValenceOrbital features or labels have changed') if self.AtomicOrbitals.feature_labels() != ['HOMO_character', 'HOMO_element', 'HOMO_energy', 'LUMO_character', 'LUMO_element', 'LUMO_energy', 'gap_AO']: raise Exception('AtomicOrbitals features or labels have changed') if self.CohesiveEnergy.feature_labels() != ['cohesive energy']: raise Exception('CohesiveEnergy features or labels have changed') if self.BandCenter.feature_labels() != ['band center']: raise Exception('BandCenter features or labels have changed') self.ElementProperty_label_check() def citations(self): featurizers = [self.ValenceOrbital, self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy, BCA(mg.Composition('BaO'))] return list(np.unique(sum([f.citations() for f in featurizers],[])))
'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSvolume_pa', 'SpaceGroupNumber', 'GSbandgap', 'GSmagmom') #The following features will be created by using matminer package. featurizer = MultipleFeaturizer([ SiteStatsFingerprint(CoordinationNumber().from_preset('VoronoiNN'), stats=('mean', 'std_dev', 'minimum', 'maximum')), StructuralHeterogeneity(), ChemicalOrdering(), MaximumPackingEfficiency(), SiteStatsFingerprint( LocalPropertyDifference(properties=element_properties), stats=('mean', 'std_dev', 'minimum', 'maximum', 'range')), StructureComposition(Stoichiometry()), StructureComposition(ElementProperty.from_preset("magpie")), StructureComposition(ValenceOrbital(props=['frac'])), StructureComposition(IonProperty(fast=True)) ]) #Generate VT based features from the material's crystal lat_params. feature_data = featurizer.featurize_dataframe(df, col_id=['structure'], ignore_errors=True) #"lat_params","compound possible" and "material_id" are not resonable physical features, so we drop these three columns feature_data = feature_data.drop( ["structure", "compound possible", "material_id"], axis=1) #write the data into a csv file for later use feature_data.to_csv("data_delta_e_data.csv", index=False) from sklearn.model_selection import KFold, cross_val_score from sklearn.model_selection import train_test_split, KFold, ShuffleSplit from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
def feature_labels(self): # Since we have more features than just element fractions, append 'fraction' to element symbols for clarity element_fraction_features = [e + " fraction" for e in ElementFraction().feature_labels()] valence_orbital_features = ValenceOrbital().feature_labels() return element_fraction_features+self._element_property_feature_labels+valence_orbital_features