Beispiel #1
0
    def _extract_features(self, df_input):
        """
        Extract features using Matminer from the 'structure' column in
            df_input

         Args:
             df_input (DataFrame): Pandas DataFrame whcih conatains features
                from Materials Project Database of the input samples

         Returns:
             df_extracted (DataFrame): Pandas DataFrame which contains
                features of input samples extracted using Matminer

        """

        # Dropping the 'theoretical' column
        df_input.drop(columns=["theoretical"], inplace=True)

        # Extracting the features
        dfeat = DensityFeatures()
        symmfeat = GlobalSymmetryFeatures()
        mfeat = Meredig()
        cefeat = CohesiveEnergy()

        df_input["density"] = df_input.structure.apply(
            lambda x: dfeat.featurize(x)[0])
        df_input["vpa"] = df_input.structure.apply(
            lambda x: dfeat.featurize(x)[1])
        df_input["packing fraction"] = df_input.structure.apply(
            lambda x: dfeat.featurize(x)[2])
        df_input["spacegroup_num"] = df_input.structure.apply(
            lambda x: symmfeat.featurize(x)[0])
        df_input["cohesive_energy"] = df_input.apply(
            lambda x: cefeat.featurize(
                x.structure.composition,
                formation_energy_per_atom=x.formation_energy_per_atom,
            )[0],
            axis=1,
        )
        df_input["mean AtomicWeight"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-17])
        df_input["range AtomicRadius"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-12])
        df_input["mean AtomicRadius"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-11])
        df_input["range Electronegativity"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-10])
        df_input["mean Electronegativity"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-9])

        # Drop 'structure' column
        df_input.drop(columns=["structure"], inplace=True)

        # ignore compounds that failed to featurize
        df_extracted = df_input.fillna(
            df_input.mean()).query("cohesive_energy > 0.0")

        # Re-arranging the 'PU Label' column
        pu_label = df_extracted["PU_label"]
        df_extracted = df_extracted.drop(["PU_label"], axis=1)
        df_extracted["PU_label"] = pu_label

        # Drop the icsd_ids column
        df_extracted.drop(columns=["icsd_ids"], inplace=True)

        return df_extracted
Beispiel #2
0
class GenericFeaturizer(BaseFeaturizer):
	"""
	Featurizer to use generic properties available in matminer featurizers; no features from BCA class utilized
	"""
	def __init__(self,normalize_formula=False):
		self.normalize_formula = normalize_formula
		# don't need ValenceOrbital - valence counts etc. covered in ElementProperty.from_preset('magpie')
		# self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		# ElementProperty featurizer with magpie properties plus additional properties
		self.ElementProperty = ElementProperty.from_preset('magpie')
		self.ElementProperty.features += ['BoilingT', 
					'BulkModulus', 'ShearModulus', 
					'Density','MolarVolume',
					'FusionEnthalpy','HeatVaporization',
					'Polarizability', 
					'ThermalConductivity']
		# range, min, max are irrelevant inside the ternary
		# self.ElementProperty.stats = ['mean', 'avg_dev','mode']

		# check matminer featurizers
		self.check_matminer_featurizers()
		
	def featurize(self,composition):
		# use BCA just to get composition and metal_composition
		bca = BCA(composition,'ionic_radius',self.normalize_formula)
		
		ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) # H**O and LUMO character and energy levels for metals from atomic orbitals)
		ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)] # exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals
		ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) # avg metal elemental cohesive energy
		bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition)
		ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition)
		ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition)
		
		mm_features = ao_features + ce_features + bc_features + ve_features + ep_features 
		
		return mm_features
	
	def feature_labels(self):
		"""
		Feature labels for matminer-derived features
		"""
		labels = [
			#AtomicOrbitals labels
			#'M_HOMO_character',
			'M_HOMO_energy',
			#'M_LUMO_character',
			'M_LUMO_energy',
			'M_AO_gap',
			#CohesiveEnergy labels
			'M_cohesive_energy_mean',
			#BandCenter labels
			'M_BandCenter',
			'BCA_BandCenter',
			#ValenceOrbitalEnergy labels
			'M_ValenceEnergy_mean',
			'BCA_ValenceEnergy_mean'
			]
			
		labels += [f'M {l}' for l in self.ElementProperty.feature_labels()]
		labels += [f'BCA {l}' for l in self.ElementProperty.feature_labels()]
		
		return labels	
	
	@property
	def matminer_units(self):
		"""
		Feature units for matminer-derived features
		"""
		units = [
			#ValenceOrbital units
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			#AtomicOrbitals units
			#'M_HOMO_character',
			'energy',
			#'M_LUMO_character',
			'energy',
			'energy',
			#CohesiveEnergy units
			'energy',
			#BandCenter units
			'energy',
			'energy',
			#ValenceOrbitalEnergy units
			'energy',
			'energy'
			]
			
		units += self.ElementProperty_units
		
		return units
		
	def feature_units(self):
		bca_units = BCA(mg.Composition('BaO')).feature_units()
		
		return bca_units + self.matminer_units
		
	def check_matminer_featurizers(self):
		"""
		Check that features and feature order for matminer featurizers are as expected
		If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels()
		"""
		#verify that matminer feature labels haven't changed
		if self.AtomicOrbitals.feature_labels() != ['HOMO_character',
											 'HOMO_element',
											 'HOMO_energy',
											 'LUMO_character',
											 'LUMO_element',
											 'LUMO_energy',
											 'gap_AO']:
			raise Exception('AtomicOrbitals features or labels have changed')

		if self.CohesiveEnergy.feature_labels() != ['cohesive energy']:
			raise Exception('CohesiveEnergy features or labels have changed')
			
		if self.BandCenter.feature_labels() != ['band center']:
			raise Exception('BandCenter features or labels have changed')
			
	def citations(self):
		featurizers = [self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy]
		citations = sum([f.citations() for f in featurizers],[])
		# add pymatgen citation
		citations += [
			"@article{Ong2012b,"
				"author = {Ong, Shyue Ping and Richards, William Davidson and Jain, Anubhav and Hautier, Geoffroy and Kocher, Michael and Cholia, Shreyas and Gunter, Dan and Chevrier, Vincent L. and Persson, Kristin A. and Ceder, Gerbrand},"
				"doi = {10.1016/j.commatsci.2012.10.028},"
				"file = {:Users/shyue/Mendeley Desktop/Ong et al/Computational Materials Science/2013 - Ong et al. - Python Materials Genomics (pymatgen) A robust, open-source python library for materials analysis.pdf:pdf;:Users/shyue/Mendeley Desktop/Ong et al/Computational Materials Science/2013 - Ong et al. - Python Materials Genomics (pymatgen) A robust, open-source python library for materials analysis(2).pdf:pdf},"
				"issn = {09270256},"
				"journal = {Computational Materials Science},"
				"month = feb,"
				"pages = {314--319},"
				"title = {{Python Materials Genomics (pymatgen): A robust, open-source python library for materials analysis}},"
				"url = {http://linkinghub.elsevier.com/retrieve/pii/S0927025612006295},"
				"volume = {68},"
				"year = {2013}"
			"}"
			]
		return list(np.unique(citations))
class PerovskiteProperty(BaseFeaturizer):
    """
	Class to calculate perovskite features. Includes custom features from the Perovskite class and generic features from ElementProperty, 
	AtomicOrbitals, ValenceOrbital, and CohesiveEnergy matminer featurizers.
	
	Options for initializing:
		ordered_formula_featurizer(): for featurizing ordered formulas
		cation_site_featurizer(): for featurizing unordered formulas based on user-provided cation site assignments
		from_preset(): load a preset
		The class can also be called manually, but be aware that different parameter sets are required for an ordered formula featurizer instance than for a cation site featurizer instance.	
	
	Parameters:
	-----------
	cation_site: dict of site assignments for cations, i.e. {el:site}. Elements not in cation_site are assumed to be anions on X-site
	site_ox_lim: dict of oxidation state limits for each site, i.e. {site:[min,max]}. Elements on sites are limited to oxidation states within these limits
	site_base_ox: dict of base oxidation state for each site, i.e. {site:ox}. Used for determining aliovalent ions and acceptor/donor dopants
	ordered_formulas: if True, determine cation site assignments from order 
	A_site_occupancy: Number of atoms on A site. Used when ordered_formulas is True
	anions: list of anions. Used when ordered_formulas is True
	
	Parameters for ordered formula featurizer: site_ox_lim, site_base_ox, A_site_occupancy, anions
	Parameters for cation site featurizer: cation_site, site_ox_lim, site_base_ox
	"""
    def __init__(self,
                 cation_site=None,
                 site_ox_lim={
                     'A': [0, 10],
                     'B': [0, 10],
                     'X': [-10, 0]
                 },
                 site_base_ox={
                     'A': 2,
                     'B': 4,
                     'X': -2
                 },
                 ordered_formulas=False,
                 A_site_occupancy=1,
                 anions=None):

        if cation_site is None and ordered_formulas is False:
            raise ValueError(
                'Either cation sites must be assigned, or formulas must be ordered. Otherwise site assignments can not be determined'
            )

        self.cation_site = cation_site
        self.site_ox_lim = site_ox_lim
        self.site_base_ox = site_base_ox
        self.ordered_formulas = ordered_formulas
        self.A_site_occupancy = A_site_occupancy
        self.anions = anions

        #matminer featurizers
        self.ValenceOrbital = ValenceOrbital()
        self.AtomicOrbitals = AtomicOrbitalsMod()
        self.CohesiveEnergy = CohesiveEnergy()
        #custom ElementProperty featurizer
        elemental_properties = [
            'BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row',
            'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
            'Density', 'MolarVolume', 'FusionEnthalpy', 'HeatVaporization',
            'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled',
            'Polarizability', 'ThermalConductivity'
        ]
        self.ElementProperty = ElementProperty(
            data_source='magpie',
            features=elemental_properties,
            stats=["mean", "std_dev", "range"])

        self.check_matminer_featurizers()
        self.featurize_options = {}

    @classmethod
    def from_preset(cls, preset_name):
        """
		Initialize from preset
		
		Parameters:
		-----------
		preset_name: name of preset to load. Currently accepts 'BCFZY'
		"""
        if preset_name == 'BCFZY':
            #Ba(Co,Fe,Zr,Y)O_3-d system
            cation_site = {
                'Ba': 'A',
                'Co': 'B',
                'Fe': 'B',
                'Zr': 'B',
                'Y': 'B'
            }
            site_ox_lim = {'A': [2, 2], 'B': [2, 4], 'X': [-2, -2]}
            site_base_ox = {'A': 2, 'B': 4, 'X': -2}
        else:
            raise ValueError("Invalid preset_name specified!")

        return cls(cation_site, site_ox_lim, site_base_ox)

    @classmethod
    def ordered_formula_featurizer(cls,
                                   A_site_occupancy=1,
                                   anions=None,
                                   site_ox_lim={
                                       'A': [0, 10],
                                       'B': [0, 10],
                                       'X': [-10, 0]
                                   },
                                   site_base_ox={
                                       'A': 2,
                                       'B': 4,
                                       'X': -2
                                   }):
        """
		Convenience method for instantiating a featurizer for ordered formulas
		"""
        return cls(cation_site=None,
                   site_ox_lim=site_ox_lim,
                   site_base_ox=site_base_ox,
                   ordered_formulas=True,
                   A_site_occupancy=A_site_occupancy,
                   anions=anions)

    @classmethod
    def cation_site_featurizer(cls,
                               cation_site,
                               site_ox_lim={
                                   'A': [0, 10],
                                   'B': [0, 10],
                                   'X': [-10, 0]
                               },
                               site_base_ox={
                                   'A': 2,
                                   'B': 4,
                                   'X': -2
                               }):
        """
		Convenience method for instantiating a featurizer for unordered formulas, based on site assignments
		"""
        return cls(cation_site, site_ox_lim, site_base_ox)

    @property
    def ElementProperty_custom_labels(self):
        """
		Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
        elemental_property_label_map = {
            'BoilingT': 'boil_temp',
            'MeltingT': 'melt_temp',
            'BulkModulus': 'bulk_mod',
            'ShearModulus': 'shear_mod',
            'Row': 'row',
            'Column': 'column',
            'Number': 'number',
            'MendeleevNumber': 'mendeleev',
            'SpaceGroupNumber': 'space_group',
            'Density': 'density',
            'MolarVolume': 'molar_vol',
            'FusionEnthalpy': 'H_fus',
            'HeatVaporization': 'H_vap',
            'NsUnfilled': 'valence_unfilled_s',
            'NpUnfilled': 'valence_unfilled_p',
            'NdUnfilled': 'valence_unfilled_d',
            'NfUnfilled': 'valence_unfilled_f',
            'Polarizability': 'polarizability',
            'ThermalConductivity': 'sigma_therm'
        }

        element_property_labels = list(
            map(elemental_property_label_map.get,
                self.ElementProperty.features))
        labels = []
        for attr in element_property_labels:
            for stat in self.ElementProperty.stats:
                if stat == 'std_dev':
                    stat = 'std'
                labels.append(f'{attr}_{stat}')
        return labels

    @property
    def ElementProperty_categories(self):
        """
		Generate categories for ElementProperty featurizer
		"""
        elemental_property_category_map = {
            'BoilingT': 'elemental',
            'MeltingT': 'elemental',
            'BulkModulus': 'elemental',
            'ShearModulus': 'elemental',
            'Row': 'periodic',
            'Column': 'periodic',
            'Number': 'periodic',
            'MendeleevNumber': 'periodic',
            'SpaceGroupNumber': 'periodic',
            'Density': 'elemental',
            'MolarVolume': 'elemental',
            'FusionEnthalpy': 'elemental',
            'HeatVaporization': 'elemental',
            'NsUnfilled': 'electronic',
            'NpUnfilled': 'electronic',
            'NdUnfilled': 'electronic',
            'NfUnfilled': 'electronic',
            'Polarizability': 'elemental',
            'ThermalConductivity': 'elemental'
        }

        element_property_categories = list(
            map(elemental_property_category_map.get,
                self.ElementProperty.features))
        categories = []
        for ep_cat in element_property_categories:
            for stat in self.ElementProperty.stats:
                categories.append(ep_cat)
        return categories

    @property
    def ElementProperty_units(self):
        """
		Generate units for ElementProperty featurizer
		"""
        elemental_property_unit_map = {
            'BoilingT': 'temp',
            'MeltingT': 'temp',
            'BulkModulus': 'pressure',
            'ShearModulus': 'pressure',
            'Row': 'none',
            'Column': 'none',
            'Number': 'none',
            'MendeleevNumber': 'none',
            'SpaceGroupNumber': 'none',
            'Density': 'density',
            'MolarVolume': 'volume',
            'FusionEnthalpy': 'energy',
            'HeatVaporization': 'energy',
            'NsUnfilled': 'none',
            'NpUnfilled': 'none',
            'NdUnfilled': 'none',
            'NfUnfilled': 'none',
            'Polarizability':
            'polarizability',  #complex units - doesn't matter
            'ThermalConductivity': 'therm'
        }  #complex units - doesn't matter

        element_property_units = list(
            map(elemental_property_unit_map.get,
                self.ElementProperty.features))
        units = []
        for ep_unit in element_property_units:
            for stat in self.ElementProperty.stats:
                units.append(ep_unit)
        return units

    def ElementProperty_label_check(self):
        """
		Check that ElementProperty feature labels are as expected
		If not, features may not align with feature labels
		"""
        #ElementProperty.feature_labels() code as of 2/17/19
        labels = []
        for attr in self.ElementProperty.features:
            src = self.ElementProperty.data_source.__class__.__name__
            for stat in self.ElementProperty.stats:
                labels.append("{} {} {}".format(src, stat, attr))

        if labels != self.ElementProperty.feature_labels():
            raise Exception('ElementProperty features or labels have changed')

    def set_featurize_options(
            self,
            sites,
            ox_stats=['min', 'max', 'mean', 'median', 'std', 'range'],
            ep_stats=["mean", "std_dev", "range"],
            radius_type='ionic_radius',
            normalize_formula=True,
            silent=True,
            categories=None):
        """
		Set options for featurization. Since these options should be the same for all compositions in a batch, set for the featurizer instance rather than passing as args to featurize()
		so that they do not have to be duplicated in every row of a DataFrame when calling featurize_dataframe().
		Since these options change the number and meaning of features returned, it's also safest to set for the whole instance for consistency.
		
		Parameters:
		-----------
		sites: list or string of sites to featurize. Any combination of 'A', 'B', 'X', and/or 'comp' accepted. 
			Composition-level, oxidation-state-dependent features are always calculated by the Perovskite class. Passing '' or [] will return only these features.
			Specifying 'A','B', and/or 'X' sites will calculate site-level features for these sites (oxidation-state independent and dependent features, and matminer features).
			Including 'comp' will calculate oxidation-state-independent features and matminer features for the full composition.
		ox_stats: list of aggregate functions to apply to oxidation state combinations for feature generation using Perovskite class. 
			Options: 'min','max','mean','median','std','range'
		ep_stats: ElementProperty stats. Options: "minimum", "maximum", "range", "mean", "avg_dev", "mode"
		radius_type: Shannon radius type to use in features. Accepts 'crystal_radius' or 'ionic_radius'
		normalize_formula: if True, normalize formula such that higher occupancy cation site has one formula unit (applies to Perovskite class only)
		silent: if False, print informational messages from Perovksite class
		categories: list of feature categories to return. If None, return all. Options: 'bonding','structure','charge','composition','electronic','elemental','periodic'
		"""

        feat_options = dict(sites=sites,
                            ox_stats=ox_stats,
                            radius_type=radius_type,
                            normalize_formula=normalize_formula,
                            silent=silent)
        self.featurize_options.update(feat_options)
        self.ElementProperty.stats = ep_stats

    def featurize(self, formula):
        """
		Calculate features
		
		Parameters:
		-----------
		formula: chemical formula string
		
		Returns: list of feature values
		"""
        if self.featurize_options == {}:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before featurizing'
            )

        if self.ordered_formulas is True:
            pvsk = Perovskite.from_ordered_formula(
                formula,
                self.A_site_occupancy,
                self.anions,
                site_ox_lim=self.site_ox_lim,
                site_base_ox=self.site_base_ox,
                radius_type=self.featurize_options['radius_type'],
                silent=self.featurize_options['silent'])
        elif self.ordered_formulas is False:
            pvsk = Perovskite(formula, self.cation_site, self.site_ox_lim,
                              self.site_base_ox,
                              self.featurize_options['radius_type'],
                              self.featurize_options['normalize_formula'],
                              self.featurize_options['silent'])

        pvsk_features = pvsk.featurize(self.featurize_options['sites'],
                                       self.featurize_options['ox_stats'])

        mm_features = []
        for site in self.featurize_options['sites']:

            vo_features = self.ValenceOrbital.featurize(
                pvsk.site_composition[site]
            )  #avg and frac s, p , d, f electrons
            vo_features += [sum(vo_features[0:3])
                            ]  #avg total valence electrons
            ao_features = self.AtomicOrbitals.featurize(
                pvsk.site_composition[site]
            )  #H**O and LUMO character and energy levels (from atomic orbitals)
            ao_features = [
                ao_features[i] for i in range(len(ao_features))
                if i not in (0, 1, 3, 4)
            ]  #exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals
            ce_features = self.CohesiveEnergy.featurize(
                pvsk.site_composition[site], formation_energy_per_atom=1e-10
            )  #avg elemental cohesive energy
            ep_features = self.ElementProperty.featurize(
                pvsk.site_composition[site])  #elemental property features
            mm_features += vo_features + ao_features + ce_features + ep_features

        features = list(pvsk_features) + mm_features

        return features

    @property
    def matminer_labels(self):
        """
		Feature labels for matminer-derived features
		"""
        labels = [
            #ValenceOrbital labels
            'valence_elec_s_mean',
            'valence_elec_p_mean',
            'valence_elec_d_mean',
            'valence_elec_f_mean',
            'valence_elec_s_frac',
            'valence_elec_p_frac',
            'valence_elec_d_frac',
            'valence_elec_f_frac',
            'valence_elec_tot_mean',
            #AtomicOrbitals labels
            #'HOMO_character',
            'HOMO_energy',
            #'LUMO_character',
            'LUMO_energy',
            'AO_gap',
            #CohesiveEnergy labels
            'cohesive_energy_mean'
        ]

        #ElementProperty labels
        labels += self.ElementProperty_custom_labels

        return labels

    @property
    def matminer_categories(self):
        """
		Feature categories for matminer-derived features
		"""
        categories = [
            #ValenceOrbital categories
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            'electronic',
            #AtomicOrbitals categories
            #'HOMO_character',
            'electronic',
            #'LUMO_character',
            'electronic',
            'electronic',
            #CohesiveEnergy categories
            'bonding'
        ]

        #ElementProperty categories
        categories += self.ElementProperty_categories

        return categories

    @property
    def matminer_units(self):
        """
		Feature units for matminer-derived features
		"""
        units = [
            #ValenceOrbital units
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            'none',
            #AtomicOrbitals units
            #'HOMO_character',
            'energy',
            #'LUMO_character',
            'energy',
            'energy',
            #CohesiveEnergy units
            'energy'
        ]

        #ElementProperty units
        units += self.ElementProperty_units

        return units

    def feature_labels(self):
        """
		Get list of feature labels
		"""
        try:
            pvsk_labels = Perovskite.from_preset(
                'BaCoO3', 'BCFZY',
                silent=True).feature_labels(self.featurize_options['sites'],
                                            self.featurize_options['ox_stats'])
        except KeyError:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before accessing feature labels'
            )

        mm_labels = []
        for site in self.featurize_options['sites']:
            if site == 'comp':
                site_label = 'comp'
            else:
                site_label = f'{site}site'
            mm_labels += [
                f'{site_label}_{label}' for label in self.matminer_labels
            ]

        return pvsk_labels + mm_labels

    def feature_categories(self):
        """
		Get list of feature categories. For quick filtering
		"""
        try:
            pvsk_categories = Perovskite.from_preset(
                'BaCoO3', 'BCFZY', silent=True).feature_categories(
                    self.featurize_options['sites'],
                    self.featurize_options['ox_stats'])
        except KeyError:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before accessing feature labels'
            )

        mm_categories = []
        for site in self.featurize_options['sites']:
            mm_categories += self.matminer_categories

        return pvsk_categories + mm_categories

    def feature_units(self):
        """
		Get list of feature labels. For dimensional analysis
		"""
        try:
            pvsk_units = Perovskite.from_preset(
                'BaCoO3', 'BCFZY',
                silent=True).feature_units(self.featurize_options['sites'],
                                           self.featurize_options['ox_stats'])
        except KeyError:
            raise Exception(
                'Featurize options have not been set. Use set_featurize_options before accessing feature labels'
            )

        mm_units = []
        for site in self.featurize_options['sites']:
            mm_units += self.matminer_units

        return pvsk_units + mm_units

    def check_matminer_featurizers(self):
        """
		Check that features and feature order for matminer featurizers are as expected
		If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels()
		"""
        #verify that matminer feature labels haven't changed
        if self.ValenceOrbital.feature_labels() != [
                'avg s valence electrons', 'avg p valence electrons',
                'avg d valence electrons', 'avg f valence electrons',
                'frac s valence electrons', 'frac p valence electrons',
                'frac d valence electrons', 'frac f valence electrons'
        ]:
            raise Exception('ValenceOrbital features or labels have changed')

        if self.AtomicOrbitals.feature_labels() != [
                'HOMO_character', 'HOMO_element', 'HOMO_energy',
                'LUMO_character', 'LUMO_element', 'LUMO_energy', 'gap_AO'
        ]:
            raise Exception('AtomicOrbitals features or labels have changed')

        if self.CohesiveEnergy.feature_labels() != ['cohesive energy']:
            raise Exception('CohesiveEnergy features or labels have changed')

        self.ElementProperty_label_check()
Beispiel #4
0
class BCA_Featurizer(BaseFeaturizer):
	def __init__(self,radius_type='ionic_radius',normalize_formula=False):
		self.radius_type = radius_type
		self.normalize_formula = normalize_formula
		self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		#custom ElementProperty featurizer
		elemental_properties = ['BoilingT', 'MeltingT',
			'BulkModulus', 'ShearModulus', 
			'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
			'Density','MolarVolume',
			'FusionEnthalpy','HeatVaporization',
			'Polarizability', 
			'ThermalConductivity']
		self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties,
						  stats=["mean", "std_dev"])
		#check matminer featurizers
		self.check_matminer_featurizers()
		
	def featurize(self,composition):
		bca = BCA(composition,self.radius_type,self.normalize_formula)
		bca_features = bca.featurize()
		
		vo_features = self.ValenceOrbital.featurize(bca.metal_composition) #avg and frac s, p , d, f electrons for metals
		vo_features += [sum(vo_features[0:3])] #avg total valence electrons for metals
		ao_features = self.AtomicOrbitals.featurize(bca.metal_composition) #H**O and LUMO character and energy levels for metals from atomic orbitals)
		ao_features = [ao_features[i] for i in range(len(ao_features)) if i not in (0,1,3,4)]#exclude HOMO_character,HOMO_element, LUMO_character, LUMO_element - categoricals
		ce_features = self.CohesiveEnergy.featurize(bca.metal_composition,formation_energy_per_atom=1e-10) #avg metal elemental cohesive energy
		bc_features = self.BandCenter.featurize(bca.metal_composition) + self.BandCenter.featurize(bca.composition)
		ve_features = self.ValenceOrbitalEnergy.featurize(bca.metal_composition) + self.ValenceOrbitalEnergy.featurize(bca.composition)
		ep_features = self.ElementProperty.featurize(bca.metal_composition) + self.ElementProperty.featurize(bca.composition)
		
		mm_features = vo_features + ao_features + ce_features + bc_features + ve_features + ep_features 
		
		return list(bca_features.values()) + mm_features
		
	@property
	def ElementProperty_custom_labels(self):
		"""
		Generate custom labels for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
		elemental_property_label_map = {'BoilingT':'boil_temp','MeltingT':'melt_temp',
							'BulkModulus':'bulk_mod','ShearModulus':'shear_mod',
							'Row':'row','Column':'column','Number':'number','MendeleevNumber':'mendeleev','SpaceGroupNumber':'space_group',
							'Density':'density','MolarVolume':'molar_vol',
							'FusionEnthalpy':'H_fus','HeatVaporization':'H_vap',
							'Polarizability':'polarizability',
							'ThermalConductivity':'sigma_therm'}
							
		element_property_labels = list(map(elemental_property_label_map.get,self.ElementProperty.features))
		labels = []
		for attr in element_property_labels:
			for stat in self.ElementProperty.stats:
				if stat=='std_dev':
					stat = 'std'
				labels.append(f'M_{attr}_{stat}')
		for attr in element_property_labels:
			for stat in self.ElementProperty.stats:
				if stat=='std_dev':
					stat = 'std'
				labels.append(f'BCA_{attr}_{stat}')
		return labels
		
	@property
	def ElementProperty_units(self):
		"""
		Generate units for ElementProperty featurizer that follow same naming convention as Perovskite class
		"""
		elemental_property_unit_map = {'BoilingT':'temperature','MeltingT':'temperature',
							'BulkModulus':'pressure','ShearModulus':'pressure',
							'Row':'none','Column':'none','Number':'none','MendeleevNumber':'none','SpaceGroupNumber':'none',
							'Density':'density','MolarVolume':'volume',
							'FusionEnthalpy':'energy','HeatVaporization':'energy',
							'Polarizability':'polarizability',
							'ThermalConductivity':'therm'}
							
		element_property_units = list(map(elemental_property_unit_map.get,self.ElementProperty.features))
		units = []
		for ep_unit in element_property_units:
			for stat in self.ElementProperty.stats:
				units.append(ep_unit)
		return units*2
		
	def ElementProperty_label_check(self):
		"""
		Check that ElementProperty feature labels are as expected
		If not, features may not align with feature labels
		"""
		#ElementProperty.feature_labels() code as of 1/24/20
		labels = []
		for attr in self.ElementProperty.features:
			src = self.ElementProperty.data_source.__class__.__name__
			for stat in self.ElementProperty.stats:
				labels.append("{} {} {}".format(src, stat, attr))
		
		if labels!=self.ElementProperty.feature_labels():
			raise Exception('ElementProperty features or labels have changed')
	
	
	@property
	def matminer_labels(self):
		"""
		Feature labels for matminer-derived features
		"""
		labels = [
			#ValenceOrbital labels
			'M_ValenceElec_s_mean',
			'M_ValenceElec_p_mean',
			'M_ValenceElec_d_mean',
			'M_ValenceElec_f_mean',
			'M_ValenceElec_s_frac',
			'M_ValenceElec_p_frac',
			'M_ValenceElec_d_frac',
			'M_ValenceElec_f_frac',
			'M_ValenceElec_tot_mean',
			#AtomicOrbitals labels
			#'M_HOMO_character',
			'M_HOMO_energy',
			#'M_LUMO_character',
			'M_LUMO_energy',
			'M_AO_gap',
			#CohesiveEnergy labels
			'M_cohesive_energy_mean',
			#BandCenter labels
			'M_BandCenter',
			'BCA_BandCenter',
			#ValenceOrbitalEnergy labels
			'M_ValenceEnergy_mean',
			'BCA_ValenceEnergy_mean'
			]
			
		labels += self.ElementProperty_custom_labels
		
		return labels	
	
	@property
	def matminer_units(self):
		"""
		Feature units for matminer-derived features
		"""
		units = [
			#ValenceOrbital units
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			'none',
			#AtomicOrbitals units
			#'M_HOMO_character',
			'energy',
			#'M_LUMO_character',
			'energy',
			'energy',
			#CohesiveEnergy units
			'energy',
			#BandCenter units
			'energy',
			'energy',
			#ValenceOrbitalEnergy units
			'energy',
			'energy'
			]
			
		units += self.ElementProperty_units
		
		return units
	
	def feature_labels(self):
		bca_feature_labels = list(BCA(mg.Composition('BaO'),self.radius_type,self.normalize_formula).featurize().keys())
		
		return bca_feature_labels + self.matminer_labels
		
	def feature_units(self):
		bca_units = BCA(mg.Composition('BaO')).feature_units()
		
		return bca_units + self.matminer_units
		
	def check_matminer_featurizers(self):
		"""
		Check that features and feature order for matminer featurizers are as expected
		If features or feature order have changed, featurize() may return unexpected features that do not align with feature_labels()
		"""
		#verify that matminer feature labels haven't changed
		if self.ValenceOrbital.feature_labels() != ['avg s valence electrons',
											 'avg p valence electrons',
											 'avg d valence electrons',
											 'avg f valence electrons',
											 'frac s valence electrons',
											 'frac p valence electrons',
											 'frac d valence electrons',
											 'frac f valence electrons']:
			raise Exception('ValenceOrbital features or labels have changed')
			
		if self.AtomicOrbitals.feature_labels() != ['HOMO_character',
											 'HOMO_element',
											 'HOMO_energy',
											 'LUMO_character',
											 'LUMO_element',
											 'LUMO_energy',
											 'gap_AO']:
			raise Exception('AtomicOrbitals features or labels have changed')

		if self.CohesiveEnergy.feature_labels() != ['cohesive energy']:
			raise Exception('CohesiveEnergy features or labels have changed')
			
		if self.BandCenter.feature_labels() != ['band center']:
			raise Exception('BandCenter features or labels have changed')
	
		self.ElementProperty_label_check()
		
	def citations(self):
		featurizers = [self.ValenceOrbital, self.AtomicOrbitals, self.CohesiveEnergy, self.BandCenter, self.ValenceOrbitalEnergy, BCA(mg.Composition('BaO'))]
		return list(np.unique(sum([f.citations() for f in featurizers],[])))