def test_composition_features(self): comp = ElementProperty.from_preset("magpie") f = StructureComposition(featurizer=comp) # Test the fitting (should not crash) f.fit([self.nacl, self.diamond]) # Test the features features = f.featurize(self.nacl) self.assertArrayAlmostEqual(comp.featurize(self.nacl.composition), features) # Test the citations/implementors self.assertEqual(comp.citations(), f.citations()) self.assertEqual(comp.implementors(), f.implementors())
def featurize_structures(self, featurizer=None, **kwargs): """ Featurizes the hypothetical structures available from hypo_structures method. Hypothetical structures for which featurization fails is removed and valid structures are made available as valid_structures Args: featurizer (Featurizer): A MatMiner Featurizer. Defaults to MultipleFeaturizer with PRB Ward Voronoi descriptors. **kwargs (dict): kwargs passed to featurize_many method of featurizer. Returns: pandas.DataFrame: features """ # Note the redundancy here is for pandas to work if self.hypo_structures is None: warnings.warn("No structures available. Generating structures.") self.get_structures() print("Generating features") featurizer = featurizer if featurizer else MultipleFeaturizer([ SiteStatsFingerprint.from_preset( "CoordinationNumber_ward-prb-2017"), StructuralHeterogeneity(), ChemicalOrdering(), MaximumPackingEfficiency(), SiteStatsFingerprint.from_preset( "LocalPropertyDifference_ward-prb-2017"), StructureComposition(Stoichiometry()), StructureComposition(ElementProperty.from_preset("magpie")), StructureComposition(ValenceOrbital(props=['frac'])), StructureComposition(IonProperty(fast=True)) ]) features = featurizer.featurize_many( self.hypo_structures['pmg_structures'], ignore_errors=True, **kwargs) n_species, formula = [], [] for s in self.hypo_structures['pmg_structures']: n_species.append(len(s.composition.elements)) formula.append(s.composition.formula) self._features_df = pd.DataFrame.from_records( features, columns=featurizer.feature_labels()) self._features_df.index = self.hypo_structures.index self._features_df['N_species'] = n_species self._features_df['Composition'] = formula self.features = self._features_df.dropna(axis=0, how='any') self.features = self.features.reindex(sorted(self.features.columns), axis=1) self._valid_structure_labels = list(self.features.index) self.valid_structures = self.hypo_structures.loc[ self._valid_structure_labels] print("{} out of {} structures were successfully featurized.".format( self.features.shape[0], self._features_df.shape[0])) return self.features
'MeltingT', 'NsValence', 'NpValence', 'NdValence', 'NfValence', 'NValence', 'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSvolume_pa', 'SpaceGroupNumber', 'GSbandgap', 'GSmagmom') #The following features will be created by using matminer package. featurizer = MultipleFeaturizer([ SiteStatsFingerprint(CoordinationNumber().from_preset('VoronoiNN'), stats=('mean', 'std_dev', 'minimum', 'maximum')), StructuralHeterogeneity(), ChemicalOrdering(), MaximumPackingEfficiency(), SiteStatsFingerprint( LocalPropertyDifference(properties=element_properties), stats=('mean', 'std_dev', 'minimum', 'maximum', 'range')), StructureComposition(Stoichiometry()), StructureComposition(ElementProperty.from_preset("magpie")), StructureComposition(ValenceOrbital(props=['frac'])), StructureComposition(IonProperty(fast=True)) ]) #Generate VT based features from the material's crystal lat_params. feature_data = featurizer.featurize_dataframe(df, col_id=['structure'], ignore_errors=True) #"lat_params","compound possible" and "material_id" are not resonable physical features, so we drop these three columns feature_data = feature_data.drop( ["structure", "compound possible", "material_id"], axis=1) #write the data into a csv file for later use feature_data.to_csv("data_delta_e_data.csv", index=False) from sklearn.model_selection import KFold, cross_val_score
#Grabs all CIF files in a directory CIFfiles = [] directoryname = '../examples/' #The directory it looks in allfiles = os.listdir(directoryname) for i in allfiles: if os.path.splitext(i)[-1] == '.cif': CIFfiles.append(i) #List of CIF files #Creates a list of pymatgen.structure objects and a name of each structure structlist = [] namelist = [] namecolumns = ['structure'] for i in CIFfiles: structlist.append([Structure.from_file(directoryname + i) ]) #Converts CIF to pymatgen structure object namelist.append(os.path.splitext(i)[0]) #Collects all the structure names #Creates Pandas dataframe with data being a list of structures and the row name being the structure name dftest = pd.DataFrame(data=structlist, index=namelist, columns=namecolumns) #Featurizes the structures featurizer = MultipleFeaturizer([ StructuralHeterogeneity(), #sets the featurizers that are going to be used StructureComposition(ElementProperty.from_preset('magpie')) ]) # This one also collects the composition from the structures #more featurizers can be added r = (featurizer.featurize_dataframe(dftest, ['structure']) ) #Featurizes entire Pands Dataframe