def __init__(self, cation_site=None, site_ox_lim={ 'A': [0, 10], 'B': [0, 10], 'X': [-10, 0] }, site_base_ox={ 'A': 2, 'B': 4, 'X': -2 }, ordered_formulas=False, A_site_occupancy=1, anions=None): if cation_site is None and ordered_formulas is False: raise ValueError( 'Either cation sites must be assigned, or formulas must be ordered. Otherwise site assignments can not be determined' ) self.cation_site = cation_site self.site_ox_lim = site_ox_lim self.site_base_ox = site_base_ox self.ordered_formulas = ordered_formulas self.A_site_occupancy = A_site_occupancy self.anions = anions #matminer featurizers self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() #custom ElementProperty featurizer elemental_properties = [ 'BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber', 'Density', 'MolarVolume', 'FusionEnthalpy', 'HeatVaporization', 'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'Polarizability', 'ThermalConductivity' ] self.ElementProperty = ElementProperty( data_source='magpie', features=elemental_properties, stats=["mean", "std_dev", "range"]) self.check_matminer_featurizers() self.featurize_options = {}
def test_cohesive_energy(self): mpr = MPRester() if not mpr.api_key: raise SkipTest( "Materials Project API key not set; Skipping cohesive energy test" ) df_cohesive_energy = CohesiveEnergy().featurize_dataframe( self.df, col_id="composition") self.assertAlmostEqual(df_cohesive_energy["cohesive energy"][0], 5.179, 2)
def __init__(self,radius_type='ionic_radius',normalize_formula=False): self.radius_type = radius_type self.normalize_formula = normalize_formula self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() self.BandCenter = BandCenter() self.ValenceOrbitalEnergy = ValenceOrbitalEnergy() #custom ElementProperty featurizer elemental_properties = ['BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber', 'Density','MolarVolume', 'FusionEnthalpy','HeatVaporization', 'Polarizability', 'ThermalConductivity'] self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties, stats=["mean", "std_dev"]) #check matminer featurizers self.check_matminer_featurizers()
def __init__(self,normalize_formula=False): self.normalize_formula = normalize_formula # don't need ValenceOrbital - valence counts etc. covered in ElementProperty.from_preset('magpie') # self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() self.BandCenter = BandCenter() self.ValenceOrbitalEnergy = ValenceOrbitalEnergy() # ElementProperty featurizer with magpie properties plus additional properties self.ElementProperty = ElementProperty.from_preset('magpie') self.ElementProperty.features += ['BoilingT', 'BulkModulus', 'ShearModulus', 'Density','MolarVolume', 'FusionEnthalpy','HeatVaporization', 'Polarizability', 'ThermalConductivity'] # range, min, max are irrelevant inside the ternary # self.ElementProperty.stats = ['mean', 'avg_dev','mode'] # check matminer featurizers self.check_matminer_featurizers()
def _extract_features(self, df_input): """ Extract features using Matminer from the 'structure' column in df_input Args: df_input (DataFrame): Pandas DataFrame whcih conatains features from Materials Project Database of the input samples Returns: df_extracted (DataFrame): Pandas DataFrame which contains features of input samples extracted using Matminer """ # Dropping the 'theoretical' column df_input.drop(columns=["theoretical"], inplace=True) # Extracting the features dfeat = DensityFeatures() symmfeat = GlobalSymmetryFeatures() mfeat = Meredig() cefeat = CohesiveEnergy() df_input["density"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[0]) df_input["vpa"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[1]) df_input["packing fraction"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[2]) df_input["spacegroup_num"] = df_input.structure.apply( lambda x: symmfeat.featurize(x)[0]) df_input["cohesive_energy"] = df_input.apply( lambda x: cefeat.featurize( x.structure.composition, formation_energy_per_atom=x.formation_energy_per_atom, )[0], axis=1, ) df_input["mean AtomicWeight"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-17]) df_input["range AtomicRadius"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-12]) df_input["mean AtomicRadius"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-11]) df_input["range Electronegativity"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-10]) df_input["mean Electronegativity"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-9]) # Drop 'structure' column df_input.drop(columns=["structure"], inplace=True) # ignore compounds that failed to featurize df_extracted = df_input.fillna( df_input.mean()).query("cohesive_energy > 0.0") # Re-arranging the 'PU Label' column pu_label = df_extracted["PU_label"] df_extracted = df_extracted.drop(["PU_label"], axis=1) df_extracted["PU_label"] = pu_label # Drop the icsd_ids column df_extracted.drop(columns=["icsd_ids"], inplace=True) return df_extracted
class PerovskiteProperty(BaseFeaturizer): """ Class to calculate perovskite features. Includes custom features from the Perovskite class and generic features from ElementProperty, AtomicOrbitals, ValenceOrbital, and CohesiveEnergy matminer featurizers. Options for initializing: ordered_formula_featurizer(): for featurizing ordered formulas cation_site_featurizer(): for featurizing unordered formulas based on user-provided cation site assignments from_preset(): load a preset The class can also be called manually, but be aware that different parameter sets are required for an ordered formula featurizer instance than for a cation site featurizer instance. Parameters: ----------- cation_site: dict of site assignments for cations, i.e. {el:site}. Elements not in cation_site are assumed to be anions on X-site site_ox_lim: dict of oxidation state limits for each site, i.e. {site:[min,max]}. Elements on sites are limited to oxidation states within these limits site_base_ox: dict of base oxidation state for each site, i.e. {site:ox}. Used for determining aliovalent ions and acceptor/donor dopants ordered_formulas: if True, determine cation site assignments from order A_site_occupancy: Number of atoms on A site. Used when ordered_formulas is True anions: list of anions. Used when ordered_formulas is True Parameters for ordered formula featurizer: site_ox_lim, site_base_ox, A_site_occupancy, anions Parameters for cation site featurizer: cation_site, site_ox_lim, site_base_ox """ def __init__(self, cation_site=None, site_ox_lim={ 'A': [0, 10], 'B': [0, 10], 'X': [-10, 0] }, site_base_ox={ 'A': 2, 'B': 4, 'X': -2 }, ordered_formulas=False, A_site_occupancy=1, anions=None): if cation_site is None and ordered_formulas is False: raise ValueError( 'Either cation sites must be assigned, or formulas must be ordered. Otherwise site assignments can not be determined' ) self.cation_site = cation_site self.site_ox_lim = site_ox_lim self.site_base_ox = site_base_ox self.ordered_formulas = ordered_formulas self.A_site_occupancy = A_site_occupancy self.anions = anions #matminer featurizers self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() #custom ElementProperty featurizer elemental_properties = [ 'BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber', 'Density', 'MolarVolume', 'FusionEnthalpy', 'HeatVaporization', 'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'Polarizability', 'ThermalConductivity' ] self.ElementProperty = ElementProperty( data_source='magpie', features=elemental_properties, stats=["mean", "std_dev", "range"]) self.check_matminer_featurizers() self.featurize_options = {} @classmethod def from_preset(cls, preset_name): """ Initialize from preset Parameters: ----------- preset_name: name of preset to load. Currently accepts 'BCFZY' """ if preset_name == 'BCFZY': #Ba(Co,Fe,Zr,Y)O_3-d system cation_site = { 'Ba': 'A', 'Co': 'B', 'Fe': 'B', 'Zr': 'B', 'Y': 'B' } site_ox_lim = {'A': [2, 2], 'B': [2, 4], 'X': [-2, -2]} site_base_ox = {'A': 2, 'B': 4, 'X': -2} else: raise ValueError("Invalid preset_name specified!") return cls(cation_site, site_ox_lim, site_base_ox) @classmethod def ordered_formula_featurizer(cls, A_site_occupancy=1, anions=None, site_ox_lim={ 'A': [0, 10], 'B': [0, 10], 'X': [-10, 0] }, site_base_ox={ 'A': 2, 'B': 4, 'X': -2 }): """ Convenience method for instantiating a featurizer for ordered formulas """ return cls(cation_site=None, site_ox_lim=site_ox_lim, site_base_ox=site_base_ox, ordered_formulas=True, A_site_occupancy=A_site_occupancy, anions=anions) @classmethod def cation_site_featurizer(cls, cation_site, site_ox_lim={ 'A': [0, 10], 'B': [0, 10], 'X': [-10, 0] }, site_base_ox={ 'A': 2, 'B': 4, 'X': -2 }): """ Convenience method for instantiating a featurizer for unordered formulas, based on site assignments """ return cls(cation_site, site_ox_lim, site_base_ox) @property def ElementProperty_custom_labels(self): """ Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class """ elemental_property_label_map = { 'BoilingT': 'boil_temp', 'MeltingT': 'melt_temp', 'BulkModulus': 'bulk_mod', 'ShearModulus': 'shear_mod', 'Row': 'row', 'Column': 'column', 'Number': 'number', 'MendeleevNumber': 'mendeleev', 'SpaceGroupNumber': 'space_group', 'Density': 'density', 'MolarVolume': 'molar_vol', 'FusionEnthalpy': 'H_fus', 'HeatVaporization': 'H_vap', 'NsUnfilled': 'valence_unfilled_s', 'NpUnfilled': 'valence_unfilled_p', 'NdUnfilled': 'valence_unfilled_d', 'NfUnfilled': 'valence_unfilled_f', 'Polarizability': 'polarizability', 'ThermalConductivity': 'sigma_therm' } element_property_labels = list( map(elemental_property_label_map.get, self.ElementProperty.features)) labels = [] for attr in element_property_labels: for stat in self.ElementProperty.stats: if stat == 'std_dev': stat = 'std' labels.append(f'{attr}_{stat}') return labels @property def ElementProperty_categories(self): """ Generate categories for ElementProperty featurizer """ elemental_property_category_map = { 'BoilingT': 'elemental', 'MeltingT': 'elemental', 'BulkModulus': 'elemental', 'ShearModulus': 'elemental', 'Row': 'periodic', 'Column': 'periodic', 'Number': 'periodic', 'MendeleevNumber': 'periodic', 'SpaceGroupNumber': 'periodic', 'Density': 'elemental', 'MolarVolume': 'elemental', 'FusionEnthalpy': 'elemental', 'HeatVaporization': 'elemental', 'NsUnfilled': 'electronic', 'NpUnfilled': 'electronic', 'NdUnfilled': 'electronic', 'NfUnfilled': 'electronic', 'Polarizability': 'elemental', 'ThermalConductivity': 'elemental' } element_property_categories = list( map(elemental_property_category_map.get, self.ElementProperty.features)) categories = [] for ep_cat in element_property_categories: for stat in self.ElementProperty.stats: categories.append(ep_cat) return categories @property def ElementProperty_units(self): """ Generate units for ElementProperty featurizer """ elemental_property_unit_map = { 'BoilingT': 'temp', 'MeltingT': 'temp', 'BulkModulus': 'pressure', 'ShearModulus': 'pressure', 'Row': 'none', 'Column': 'none', 'Number': 'none', 'MendeleevNumber': 'none', 'SpaceGroupNumber': 'none', 'Density': 'density', 'MolarVolume': 'volume', 'FusionEnthalpy': 'energy', 'HeatVaporization': 'energy', 'NsUnfilled': 'none', 'NpUnfilled': 'none', 'NdUnfilled': 'none', 'NfUnfilled': 'none', 'Polarizability': 'polarizability', #complex units - doesn't matter 'ThermalConductivity': 'therm' } #complex units - doesn't matter element_property_units = list( map(elemental_property_unit_map.get, self.ElementProperty.features)) units = [] for ep_unit in element_property_units: for stat in self.ElementProperty.stats: units.append(ep_unit) return units def ElementProperty_label_check(self): """ Check that ElementProperty feature labels are as expected If not, features may not align with feature labels """ #ElementProperty.feature_labels() code as of 2/17/19 labels = [] for attr in self.ElementProperty.features: src = self.ElementProperty.data_source.__class__.__name__ for stat in self.ElementProperty.stats: labels.append("{} {} {}".format(src, stat, attr)) if labels != self.ElementProperty.feature_labels(): raise Exception('ElementProperty features or labels have changed') def set_featurize_options( self, sites, ox_stats=['min', 'max', 'mean', 'median', 'std', 'range'], ep_stats=["mean", "std_dev", "range"], radius_type='ionic_radius', normalize_formula=True, silent=True, categories=None): """ Set options for featurization. Since these options should be the same for all compositions in a batch, set for the featurizer instance rather than passing as args to featurize() so that they do not have to be duplicated in every row of a DataFrame when calling featurize_dataframe(). Since these options change the number and meaning of features returned, it's also safest to set for the whole instance for consistency. Parameters: ----------- sites: list or string of sites to featurize. Any combination of 'A', 'B', 'X', and/or 'comp' accepted. Composition-level, oxidation-state-dependent features are always calculated by the Perovskite class. Passing '' or [] will return only these features. Specifying 'A','B', and/or 'X' sites will calculate site-level features for these sites (oxidation-state independent and dependent features, and matminer features). Including 'comp' will calculate oxidation-state-independent features and matminer features for the full composition. ox_stats: list of aggregate functions to apply to oxidation state combinations for feature generation using Perovskite class. Options: 'min','max','mean','median','std','range' ep_stats: ElementProperty stats. Options: "minimum", "maximum", "range", "mean", "avg_dev", "mode" radius_type: Shannon radius type to use in features. Accepts 'crystal_radius' or 'ionic_radius' normalize_formula: if True, normalize formula such that higher occupancy cation site has one formula unit (applies to Perovskite class only) silent: if False, print informational messages from Perovksite class categories: list of feature categories to return. If None, return all. Options: 'bonding','structure','charge','composition','electronic','elemental','periodic' """ feat_options = dict(sites=sites, ox_stats=ox_stats, radius_type=radius_type, normalize_formula=normalize_formula, silent=silent) self.featurize_options.update(feat_options) self.ElementProperty.stats = ep_stats def featurize(self, formula): """ Calculate features Parameters: ----------- formula: chemical formula string Returns: list of feature values """ if self.featurize_options == {}: raise Exception( 'Featurize options have not been set. Use set_featurize_options before featurizing' ) if self.ordered_formulas is True: pvsk = Perovskite.from_ordered_formula( formula, self.A_site_occupancy, self.anions, site_ox_lim=self.site_ox_lim, site_base_ox=self.site_base_ox, radius_type=self.featurize_options['radius_type'], silent=self.featurize_options['silent']) elif self.ordered_formulas is False: pvsk = Perovskite(formula, self.cation_site, self.site_ox_lim, self.site_base_ox, self.featurize_options['radius_type'], self.featurize_options['normalize_formula'], self.featurize_options['silent']) pvsk_features = pvsk.featurize(self.featurize_options['sites'], self.featurize_options['ox_stats']) mm_features = [] for site in self.featurize_options['sites']: vo_features = self.ValenceOrbital.featurize( pvsk.site_composition[site] ) #avg and frac s, p , d, f electrons vo_features += [sum(vo_features[0:3]) ] #avg total valence electrons ao_features = self.AtomicOrbitals.featurize( pvsk.site_composition[site] ) #H**O and LUMO character and energy levels (from atomic orbitals) ao_features = [ ao_features[i] for i in range(len(ao_features)) if i not in (0, 1, 3, 4) ] #exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals ce_features = self.CohesiveEnergy.featurize( pvsk.site_composition[site], formation_energy_per_atom=1e-10 ) #avg elemental cohesive energy ep_features = self.ElementProperty.featurize( pvsk.site_composition[site]) #elemental property features mm_features += vo_features + ao_features + ce_features + ep_features features = list(pvsk_features) + mm_features return features @property def matminer_labels(self): """ Feature labels for matminer-derived features """ labels = [ #ValenceOrbital labels 'valence_elec_s_mean', 'valence_elec_p_mean', 'valence_elec_d_mean', 'valence_elec_f_mean', 'valence_elec_s_frac', 'valence_elec_p_frac', 'valence_elec_d_frac', 'valence_elec_f_frac', 'valence_elec_tot_mean', #AtomicOrbitals labels #'HOMO_character', 'HOMO_energy', #'LUMO_character', 'LUMO_energy', 'AO_gap', #CohesiveEnergy labels 'cohesive_energy_mean' ] #ElementProperty labels labels += self.ElementProperty_custom_labels return labels @property def matminer_categories(self): """ Feature categories for matminer-derived features """ categories = [ #ValenceOrbital categories 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', 'electronic', #AtomicOrbitals categories #'HOMO_character', 'electronic', #'LUMO_character', 'electronic', 'electronic', #CohesiveEnergy categories 'bonding' ] #ElementProperty categories categories += self.ElementProperty_categories return categories @property def matminer_units(self): """ Feature units for matminer-derived features """ units = [ #ValenceOrbital units 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', #AtomicOrbitals units #'HOMO_character', 'energy', #'LUMO_character', 'energy', 'energy', #CohesiveEnergy units 'energy' ] #ElementProperty units units += self.ElementProperty_units return units def feature_labels(self): """ Get list of feature labels """ try: pvsk_labels = Perovskite.from_preset( 'BaCoO3', 'BCFZY', silent=True).feature_labels(self.featurize_options['sites'], self.featurize_options['ox_stats']) except KeyError: raise Exception( 'Featurize options have not been set. Use set_featurize_options before accessing feature labels' ) mm_labels = [] for site in self.featurize_options['sites']: if site == 'comp': site_label = 'comp' else: site_label = f'{site}site' mm_labels += [ f'{site_label}_{label}' for label in self.matminer_labels ] return pvsk_labels + mm_labels def feature_categories(self): """ Get list of feature categories. For quick filtering """ try: pvsk_categories = Perovskite.from_preset( 'BaCoO3', 'BCFZY', silent=True).feature_categories( self.featurize_options['sites'], self.featurize_options['ox_stats']) except KeyError: raise Exception( 'Featurize options have not been set. Use set_featurize_options before accessing feature labels' ) mm_categories = [] for site in self.featurize_options['sites']: mm_categories += self.matminer_categories return pvsk_categories + mm_categories def feature_units(self): """ Get list of feature labels. For dimensional analysis """ try: pvsk_units = Perovskite.from_preset( 'BaCoO3', 'BCFZY', silent=True).feature_units(self.featurize_options['sites'], self.featurize_options['ox_stats']) except KeyError: raise Exception( 'Featurize options have not been set. Use set_featurize_options before accessing feature labels' ) mm_units = [] for site in self.featurize_options['sites']: mm_units += self.matminer_units return pvsk_units + mm_units def check_matminer_featurizers(self): """ Check that features and feature order for matminer featurizers are as expected If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels() """ #verify that matminer feature labels haven't changed if self.ValenceOrbital.feature_labels() != [ 'avg s valence electrons', 'avg p valence electrons', 'avg d valence electrons', 'avg f valence electrons', 'frac s valence electrons', 'frac p valence electrons', 'frac d valence electrons', 'frac f valence electrons' ]: raise Exception('ValenceOrbital features or labels have changed') if self.AtomicOrbitals.feature_labels() != [ 'HOMO_character', 'HOMO_element', 'HOMO_energy', 'LUMO_character', 'LUMO_element', 'LUMO_energy', 'gap_AO' ]: raise Exception('AtomicOrbitals features or labels have changed') if self.CohesiveEnergy.feature_labels() != ['cohesive energy']: raise Exception('CohesiveEnergy features or labels have changed') self.ElementProperty_label_check()
def AddFeatures(df): # Add features by Matminer from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, "formula") from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe( df, col_id="composition" ) # input the "composition" column to the featurizer from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.composition import ElectronAffinity ea_feat = ElectronAffinity() df = ea_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import BandCenter bc_feat = BandCenter() df = bc_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import CohesiveEnergy ce_feat = CohesiveEnergy() df = ce_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import Miedema m_feat = Miedema() df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import TMetalFraction tmf_feat = TMetalFraction() df = tmf_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import ValenceOrbital vo_feat = ValenceOrbital() df = vo_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import YangSolidSolution yss_feat = YangSolidSolution() df = yss_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.structure import GlobalSymmetryFeatures # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features. gsf_feat = GlobalSymmetryFeatures() df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralComplexity sc_feat = StructuralComplexity() df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import ChemicalOrdering co_feat = ChemicalOrdering() df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MaximumPackingEfficiency mpe_feat = MaximumPackingEfficiency() df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MinimumRelativeDistances mrd_feat = MinimumRelativeDistances() df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralHeterogeneity sh_feat = StructuralHeterogeneity() df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import SiteStatsFingerprint from matminer.featurizers.site import AverageBondLength from pymatgen.analysis.local_env import CrystalNN bl_feat = SiteStatsFingerprint( AverageBondLength(CrystalNN(search_cutoff=20))) df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import AverageBondAngle ba_feat = SiteStatsFingerprint( AverageBondAngle(CrystalNN(search_cutoff=20))) df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import BondOrientationalParameter bop_feat = SiteStatsFingerprint(BondOrientationalParameter()) df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import CoordinationNumber cn_feat = SiteStatsFingerprint(CoordinationNumber()) df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True) return (df)
#df = structural_heterogeneity.featurize_dataframe(df, 'structures',ignore_errors=False) #convert structure to composition from matminer.featurizers.conversions import StructureToComposition structures_to_compositions = StructureToComposition() df = structures_to_compositions.featurize_dataframe(df, 'structures') #convert composition to oxidcomposition from matminer.featurizers.conversions import CompositionToOxidComposition OxidCompositions = CompositionToOxidComposition() print(OxidCompositions.feature_labels()) df = OxidCompositions.featurize_dataframe(df, 'composition') #CohesiveEnergy from matminer.featurizers.composition import CohesiveEnergy cohesive_energy = CohesiveEnergy() cohesive_energy.set_n_jobs(28) labels.append(cohesive_energy.feature_labels()) df = cohesive_energy.featurize_dataframe(df, 'composition', ignore_errors=True) #ValenceOrbital from matminer.featurizers.composition import ValenceOrbital valence_orbital = ValenceOrbital() valence_orbital.set_n_jobs(28) labels.append(valence_orbital.feature_labels()) df = valence_orbital.featurize_dataframe(df, 'composition', ignore_errors=True)
def test_cohesive_energy(self): df_cohesive_energy = CohesiveEnergy().featurize_dataframe( self.df, col_id="composition") self.assertAlmostEqual(df_cohesive_energy["Cohesive Energy"][0], -18.24568582)
class GenericFeaturizer(BaseFeaturizer): """ Featurizer to use generic properties available in matminer featurizers; no features from BCA class utilized """ def __init__(self,normalize_formula=False): self.normalize_formula = normalize_formula # don't need ValenceOrbital - valence counts etc. covered in ElementProperty.from_preset('magpie') # self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() self.BandCenter = BandCenter() self.ValenceOrbitalEnergy = ValenceOrbitalEnergy() # ElementProperty featurizer with magpie properties plus additional properties self.ElementProperty = ElementProperty.from_preset('magpie') self.ElementProperty.features += ['BoilingT', 'BulkModulus', 'ShearModulus', 'Density','MolarVolume', 'FusionEnthalpy','HeatVaporization', 'Polarizability', 'ThermalConductivity'] # range, min, max are irrelevant inside the ternary # self.ElementProperty.stats = ['mean', 'avg_dev','mode'] # check matminer featurizers self.check_matminer_featurizers() def featurize(self,composition): # use BCA just to get composition and metal_composition bca = BCA(composition,'ionic_radius',self.normalize_formula) ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) # H**O and LUMO character and energy levels for metals from atomic orbitals) ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)] # exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) # avg metal elemental cohesive energy bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition) ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition) ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition) mm_features = ao_features + ce_features + bc_features + ve_features + ep_features return mm_features def feature_labels(self): """ Feature labels for matminer-derived features """ labels = [ #AtomicOrbitals labels #'M_HOMO_character', 'M_HOMO_energy', #'M_LUMO_character', 'M_LUMO_energy', 'M_AO_gap', #CohesiveEnergy labels 'M_cohesive_energy_mean', #BandCenter labels 'M_BandCenter', 'BCA_BandCenter', #ValenceOrbitalEnergy labels 'M_ValenceEnergy_mean', 'BCA_ValenceEnergy_mean' ] labels += [f'M {l}' for l in self.ElementProperty.feature_labels()] labels += [f'BCA {l}' for l in self.ElementProperty.feature_labels()] return labels @property def matminer_units(self): """ Feature units for matminer-derived features """ units = [ #ValenceOrbital units 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', #AtomicOrbitals units #'M_HOMO_character', 'energy', #'M_LUMO_character', 'energy', 'energy', #CohesiveEnergy units 'energy', #BandCenter units 'energy', 'energy', #ValenceOrbitalEnergy units 'energy', 'energy' ] units += self.ElementProperty_units return units def feature_units(self): bca_units = BCA(mg.Composition('BaO')).feature_units() return bca_units + self.matminer_units def check_matminer_featurizers(self): """ Check that features and feature order for matminer featurizers are as expected If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels() """ #verify that matminer feature labels haven't changed if self.AtomicOrbitals.feature_labels() != ['HOMO_character', 'HOMO_element', 'HOMO_energy', 'LUMO_character', 'LUMO_element', 'LUMO_energy', 'gap_AO']: raise Exception('AtomicOrbitals features or labels have changed') if self.CohesiveEnergy.feature_labels() != ['cohesive energy']: raise Exception('CohesiveEnergy features or labels have changed') if self.BandCenter.feature_labels() != ['band center']: raise Exception('BandCenter features or labels have changed') def citations(self): featurizers = [self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy] citations = sum([f.citations() for f in featurizers],[]) # add pymatgen citation citations += [ "@article{Ong2012b," "author = {Ong, Shyue Ping and Richards, William Davidson and Jain, Anubhav and Hautier, Geoffroy and Kocher, Michael and Cholia, Shreyas and Gunter, Dan and Chevrier, Vincent L. and Persson, Kristin A. and Ceder, Gerbrand}," "doi = {10.1016/j.commatsci.2012.10.028}," "file = {:Users/shyue/Mendeley Desktop/Ong et al/Computational Materials Science/2013 - Ong et al. - Python Materials Genomics (pymatgen) A robust, open-source python library for materials analysis.pdf:pdf;:Users/shyue/Mendeley Desktop/Ong et al/Computational Materials Science/2013 - Ong et al. - Python Materials Genomics (pymatgen) A robust, open-source python library for materials analysis(2).pdf:pdf}," "issn = {09270256}," "journal = {Computational Materials Science}," "month = feb," "pages = {314--319}," "title = {{Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis}}," "url = {http://linkinghub.elsevier.com/retrieve/pii/S0927025612006295}," "volume = {68}," "year = {2013}" "}" ] return list(np.unique(citations))
class BCA_Featurizer(BaseFeaturizer): def __init__(self,radius_type='ionic_radius',normalize_formula=False): self.radius_type = radius_type self.normalize_formula = normalize_formula self.ValenceOrbital = ValenceOrbital() self.AtomicOrbitals = AtomicOrbitalsMod() self.CohesiveEnergy = CohesiveEnergy() self.BandCenter = BandCenter() self.ValenceOrbitalEnergy = ValenceOrbitalEnergy() #custom ElementProperty featurizer elemental_properties = ['BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber', 'Density','MolarVolume', 'FusionEnthalpy','HeatVaporization', 'Polarizability', 'ThermalConductivity'] self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties, stats=["mean", "std_dev"]) #check matminer featurizers self.check_matminer_featurizers() def featurize(self,composition): bca = BCA(composition,self.radius_type,self.normalize_formula) bca_features = bca.featurize() vo_features = self.ValenceOrbital.featurize(bca.metal_composition) #avg and frac s, p , d, f electrons for metals vo_features += [sum(vo_features[0:3])] #avg total valence electrons for metals ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) #H**O and LUMO character and energy levels for metals from atomic orbitals) ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)]#exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) #avg metal elemental cohesive energy bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition) ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition) ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition) mm_features = vo_features + ao_features + ce_features + bc_features + ve_features + ep_features return list(bca_features.values()) + mm_features @property def ElementProperty_custom_labels(self): """ Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class """ elemental_property_label_map = {'BoilingT':'boil_temp','MeltingT':'melt_temp', 'BulkModulus':'bulk_mod','ShearModulus':'shear_mod', 'Row':'row','Column':'column','Number':'number','MendeleevNumber':'mendeleev','SpaceGroupNumber':'space_group', 'Density':'density','MolarVolume':'molar_vol', 'FusionEnthalpy':'H_fus','HeatVaporization':'H_vap', 'Polarizability':'polarizability', 'ThermalConductivity':'sigma_therm'} element_property_labels = list(map(elemental_property_label_map.get,self.ElementProperty.features)) labels = [] for attr in element_property_labels: for stat in self.ElementProperty.stats: if stat=='std_dev': stat = 'std' labels.append(f'M_{attr}_{stat}') for attr in element_property_labels: for stat in self.ElementProperty.stats: if stat=='std_dev': stat = 'std' labels.append(f'BCA_{attr}_{stat}') return labels @property def ElementProperty_units(self): """ Generate units for ElementProperty featurizer that follow same naming convention as Perovskite class """ elemental_property_unit_map = {'BoilingT':'temperature','MeltingT':'temperature', 'BulkModulus':'pressure','ShearModulus':'pressure', 'Row':'none','Column':'none','Number':'none','MendeleevNumber':'none','SpaceGroupNumber':'none', 'Density':'density','MolarVolume':'volume', 'FusionEnthalpy':'energy','HeatVaporization':'energy', 'Polarizability':'polarizability', 'ThermalConductivity':'therm'} element_property_units = list(map(elemental_property_unit_map.get,self.ElementProperty.features)) units = [] for ep_unit in element_property_units: for stat in self.ElementProperty.stats: units.append(ep_unit) return units*2 def ElementProperty_label_check(self): """ Check that ElementProperty feature labels are as expected If not, features may not align with feature labels """ #ElementProperty.feature_labels() code as of 1/24/20 labels = [] for attr in self.ElementProperty.features: src = self.ElementProperty.data_source.__class__.__name__ for stat in self.ElementProperty.stats: labels.append("{} {} {}".format(src, stat, attr)) if labels!=self.ElementProperty.feature_labels(): raise Exception('ElementProperty features or labels have changed') @property def matminer_labels(self): """ Feature labels for matminer-derived features """ labels = [ #ValenceOrbital labels 'M_ValenceElec_s_mean', 'M_ValenceElec_p_mean', 'M_ValenceElec_d_mean', 'M_ValenceElec_f_mean', 'M_ValenceElec_s_frac', 'M_ValenceElec_p_frac', 'M_ValenceElec_d_frac', 'M_ValenceElec_f_frac', 'M_ValenceElec_tot_mean', #AtomicOrbitals labels #'M_HOMO_character', 'M_HOMO_energy', #'M_LUMO_character', 'M_LUMO_energy', 'M_AO_gap', #CohesiveEnergy labels 'M_cohesive_energy_mean', #BandCenter labels 'M_BandCenter', 'BCA_BandCenter', #ValenceOrbitalEnergy labels 'M_ValenceEnergy_mean', 'BCA_ValenceEnergy_mean' ] labels += self.ElementProperty_custom_labels return labels @property def matminer_units(self): """ Feature units for matminer-derived features """ units = [ #ValenceOrbital units 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', 'none', #AtomicOrbitals units #'M_HOMO_character', 'energy', #'M_LUMO_character', 'energy', 'energy', #CohesiveEnergy units 'energy', #BandCenter units 'energy', 'energy', #ValenceOrbitalEnergy units 'energy', 'energy' ] units += self.ElementProperty_units return units def feature_labels(self): bca_feature_labels = list(BCA(mg.Composition('BaO'),self.radius_type,self.normalize_formula).featurize().keys()) return bca_feature_labels + self.matminer_labels def feature_units(self): bca_units = BCA(mg.Composition('BaO')).feature_units() return bca_units + self.matminer_units def check_matminer_featurizers(self): """ Check that features and feature order for matminer featurizers are as expected If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels() """ #verify that matminer feature labels haven't changed if self.ValenceOrbital.feature_labels() != ['avg s valence electrons', 'avg p valence electrons', 'avg d valence electrons', 'avg f valence electrons', 'frac s valence electrons', 'frac p valence electrons', 'frac d valence electrons', 'frac f valence electrons']: raise Exception('ValenceOrbital features or labels have changed') if self.AtomicOrbitals.feature_labels() != ['HOMO_character', 'HOMO_element', 'HOMO_energy', 'LUMO_character', 'LUMO_element', 'LUMO_energy', 'gap_AO']: raise Exception('AtomicOrbitals features or labels have changed') if self.CohesiveEnergy.feature_labels() != ['cohesive energy']: raise Exception('CohesiveEnergy features or labels have changed') if self.BandCenter.feature_labels() != ['band center']: raise Exception('BandCenter features or labels have changed') self.ElementProperty_label_check() def citations(self): featurizers = [self.ValenceOrbital, self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy, BCA(mg.Composition('BaO'))] return list(np.unique(sum([f.citations() for f in featurizers],[])))