def test_composition_to_oxidcomposition(self): df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition() df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({ "Fe3+": 2, "O2-": 3 })) # test error handling df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition(return_original_on_error=False, max_sites=2) self.assertRaises(ValueError, cto.featurize_dataframe, df, 'composition') # check non oxi state structure returned correctly cto = CompositionToOxidComposition(return_original_on_error=True, max_sites=2) df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({ "Fe": 2, "O": 3 }))
def test_composition_to_oxidcomposition(self): df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition() df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({ "Fe3+": 2, "O2-": 3 }))
def test_composition_to_oxidcomposition(self): df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition() df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({"Fe3+": 2, "O2-": 3})) # test error handling df = DataFrame(data={"composition": [Composition("Fe2O3")]}) cto = CompositionToOxidComposition( return_original_on_error=False, max_sites=2) self.assertRaises(ValueError, cto.featurize_dataframe, df, 'composition') # check non oxi state structure returned correctly cto = CompositionToOxidComposition( return_original_on_error=True, max_sites=2) df = cto.featurize_dataframe(df, 'composition') self.assertEqual(df["composition_oxid"].tolist()[0], Composition({"Fe": 2, "O": 3}))
def _tidy_column(self, df, featurizer_type): """ Various conversions to homogenize columns for featurization input. For example, take a column of compositions and ensure they are decorated with oxidation states, are not strings, etc. Args: df (pandas.DataFrame) featurizer_type: The key defining the featurizer input. For example, composition featurizers should have featurizer_type of "composition". Returns: df (pandas.DataFrame): DataFrame with featurizer_type column ready for featurization. """ # todo: Make the following conversions more robust (no [0] type checking) type_tester = df[featurizer_type].iloc[0] if featurizer_type == self.composition_col: # Convert formulas to composition objects if isinstance(type_tester, str): self.logger.info( self._log_prefix + "Compositions detected as strings. Attempting " "conversion to Composition objects...") stc = StrToComposition(overwrite_data=True, target_col_id=featurizer_type) df = stc.featurize_dataframe(df, featurizer_type, multiindex=self.multiindex, ignore_errors=True, inplace=False) elif isinstance(type_tester, dict): self.logger.info(self._log_prefix + "Compositions detected as dicts. Attempting " "conversion to Composition objects...") df[featurizer_type] = [ Composition.from_dict(d) for d in df[featurizer_type] ] # Convert non-oxidstate containing comps to oxidstate comps if self.guess_oxistates: self.logger.info( self._log_prefix + "Guessing oxidation states of compositions, as" " they were not present in input.") cto = CompositionToOxidComposition( target_col_id=featurizer_type, overwrite_data=True, return_original_on_error=True, max_sites=-50) try: df = cto.featurize_dataframe(df, featurizer_type, multiindex=self.multiindex, inplace=False) except Exception as e: self.logger.info(self._log_prefix + "Could not decorate oxidation states due " "to {}. Excluding featurizers based on " "composition oxistates".format(e)) classes_require_oxi = [ c.__class__.__name__ for c in CompositionFeaturizers().need_oxi ] self.exclude.extend(classes_require_oxi) else: # Convert structure/bs/dos dicts to objects (robust already) if isinstance(type_tester, (dict, str)): self.logger.info(self._log_prefix.capitalize() + "{} detected as string or dict. Attempting " "conversion to {} objects..." "".format(featurizer_type, featurizer_type)) if isinstance(type_tester, str): raise ValueError("{} column is type {}. Cannot convert." "".format(featurizer_type, type(type_tester))) dto = DictToObject(overwrite_data=True, target_col_id=featurizer_type) df = dto.featurize_dataframe(df, featurizer_type, inplace=False) # Decorate with oxidstates if featurizer_type == self.structure_col and \ self.guess_oxistates: self.logger.info( self._log_prefix + "Guessing oxidation states of structures if they were " "not present in input.") sto = StructureToOxidStructure( target_col_id=featurizer_type, overwrite_data=True, return_original_on_error=True, max_sites=-50) try: df = sto.featurize_dataframe( df, featurizer_type, multiindex=self.multiindex, inplace=False) except Exception as e: self.logger.info( self._log_prefix + "Could not decorate oxidation states on structures " "due to {}.".format(e)) return df
#from matminer.featurizers.structure import StructuralHeterogeneity #structural_heterogeneity = StructuralHeterogeneity() #structural_heterogeneity.set_n_jobs(28) #labels.append(structural_heterogeneity.feature_labels()) #df = structural_heterogeneity.featurize_dataframe(df, 'structures',ignore_errors=False) #convert structure to composition from matminer.featurizers.conversions import StructureToComposition structures_to_compositions = StructureToComposition() df = structures_to_compositions.featurize_dataframe(df, 'structures') #convert composition to oxidcomposition from matminer.featurizers.conversions import CompositionToOxidComposition OxidCompositions = CompositionToOxidComposition() print(OxidCompositions.feature_labels()) df = OxidCompositions.featurize_dataframe(df, 'composition') #CohesiveEnergy from matminer.featurizers.composition import CohesiveEnergy cohesive_energy = CohesiveEnergy() cohesive_energy.set_n_jobs(28) labels.append(cohesive_energy.feature_labels()) df = cohesive_energy.featurize_dataframe(df, 'composition', ignore_errors=True) #ValenceOrbital from matminer.featurizers.composition import ValenceOrbital valence_orbital = ValenceOrbital() valence_orbital.set_n_jobs(28) labels.append(valence_orbital.feature_labels())
df = df.drop(unwanted_columns, axis=1) from matminer.featurizers.conversions import StrToComposition sc_feat = StrToComposition() df = sc_feat.featurize_dataframe(df, col_id='formula') from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name='magpie') df = ep_feat.featurize_dataframe(df, col_id='composition') from matminer.featurizers.conversions import CompositionToOxidComposition co_feat = CompositionToOxidComposition() df = co_feat.featurize_dataframe(df, col_id='composition') from matminer.featurizers.composition import OxidationStates os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, col_id='composition_oxid') from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, col_id='structure') """ formula, structure, elastic_anisotropy, G_Reuss, G_VRH, G_Voigt, K_Reuss, K_VRH, K_Voigt, poisson_ratio, compliance_tensor, elastic_tensor, elastic_tensor_original, composition """