Exemple #1
0
    def test_composition_features(self):
        comp = ElementProperty.from_preset("magpie")
        f = StructureComposition(featurizer=comp)

        # Test the fitting (should not crash)
        f.fit([self.nacl, self.diamond])

        # Test the features
        features = f.featurize(self.nacl)
        self.assertArrayAlmostEqual(comp.featurize(self.nacl.composition),
                                    features)

        # Test the citations/implementors
        self.assertEqual(comp.citations(), f.citations())
        self.assertEqual(comp.implementors(), f.implementors())
Exemple #2
0
    def test_composition_features(self):
        comp = ElementProperty.from_preset("magpie")
        f = StructureComposition(featurizer=comp)

        # Test the fitting (should not crash)
        f.fit([self.nacl, self.diamond])

        # Test the features
        features = f.featurize(self.nacl)
        self.assertArrayAlmostEqual(comp.featurize(self.nacl.composition),
                                    features)

        # Test the citations/implementors
        self.assertEqual(comp.citations(), f.citations())
        self.assertEqual(comp.implementors(), f.implementors())
Exemple #3
0
    def featurize_structures(self, featurizer=None, **kwargs):
        """
        Featurizes the hypothetical structures available from
        hypo_structures method. Hypothetical structures for which
        featurization fails is removed and valid structures are
        made available as valid_structures

        Args:
            featurizer (Featurizer): A MatMiner Featurizer.
                Defaults to MultipleFeaturizer with PRB Ward
                Voronoi descriptors.
            **kwargs (dict): kwargs passed to featurize_many
                method of featurizer.

        Returns:
            pandas.DataFrame: features
        """
        # Note the redundancy here is for pandas to work
        if self.hypo_structures is None:
            warnings.warn("No structures available. Generating structures.")
            self.get_structures()

        print("Generating features")
        featurizer = featurizer if featurizer else MultipleFeaturizer([
            SiteStatsFingerprint.from_preset(
                "CoordinationNumber_ward-prb-2017"),
            StructuralHeterogeneity(),
            ChemicalOrdering(),
            MaximumPackingEfficiency(),
            SiteStatsFingerprint.from_preset(
                "LocalPropertyDifference_ward-prb-2017"),
            StructureComposition(Stoichiometry()),
            StructureComposition(ElementProperty.from_preset("magpie")),
            StructureComposition(ValenceOrbital(props=['frac'])),
            StructureComposition(IonProperty(fast=True))
        ])

        features = featurizer.featurize_many(
            self.hypo_structures['pmg_structures'],
            ignore_errors=True,
            **kwargs)

        n_species, formula = [], []
        for s in self.hypo_structures['pmg_structures']:
            n_species.append(len(s.composition.elements))
            formula.append(s.composition.formula)

        self._features_df = pd.DataFrame.from_records(
            features, columns=featurizer.feature_labels())
        self._features_df.index = self.hypo_structures.index
        self._features_df['N_species'] = n_species
        self._features_df['Composition'] = formula
        self.features = self._features_df.dropna(axis=0, how='any')
        self.features = self.features.reindex(sorted(self.features.columns),
                                              axis=1)

        self._valid_structure_labels = list(self.features.index)
        self.valid_structures = self.hypo_structures.loc[
            self._valid_structure_labels]

        print("{} out of {} structures were successfully featurized.".format(
            self.features.shape[0], self._features_df.shape[0]))
        return self.features
Exemple #4
0
                      'MeltingT', 'NsValence', 'NpValence', 'NdValence',
                      'NfValence', 'NValence', 'NsUnfilled', 'NpUnfilled',
                      'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSvolume_pa',
                      'SpaceGroupNumber', 'GSbandgap', 'GSmagmom')

#The following features will be created by using matminer package.
featurizer = MultipleFeaturizer([
    SiteStatsFingerprint(CoordinationNumber().from_preset('VoronoiNN'),
                         stats=('mean', 'std_dev', 'minimum', 'maximum')),
    StructuralHeterogeneity(),
    ChemicalOrdering(),
    MaximumPackingEfficiency(),
    SiteStatsFingerprint(
        LocalPropertyDifference(properties=element_properties),
        stats=('mean', 'std_dev', 'minimum', 'maximum', 'range')),
    StructureComposition(Stoichiometry()),
    StructureComposition(ElementProperty.from_preset("magpie")),
    StructureComposition(ValenceOrbital(props=['frac'])),
    StructureComposition(IonProperty(fast=True))
])

#Generate VT based features from the material's crystal lat_params.
feature_data = featurizer.featurize_dataframe(df,
                                              col_id=['structure'],
                                              ignore_errors=True)
#"lat_params","compound possible" and "material_id" are not resonable physical features, so we drop these three columns
feature_data = feature_data.drop(
    ["structure", "compound possible", "material_id"], axis=1)
#write the data into a csv file for later use
feature_data.to_csv("data_delta_e_data.csv", index=False)
from sklearn.model_selection import KFold, cross_val_score
#Grabs all CIF files in a directory
CIFfiles = []
directoryname = '../examples/'  #The directory it looks in
allfiles = os.listdir(directoryname)
for i in allfiles:
    if os.path.splitext(i)[-1] == '.cif':
        CIFfiles.append(i)  #List of CIF files

#Creates a list of pymatgen.structure objects and a name of each structure
structlist = []
namelist = []
namecolumns = ['structure']
for i in CIFfiles:
    structlist.append([Structure.from_file(directoryname + i)
                       ])  #Converts CIF to pymatgen structure object
    namelist.append(os.path.splitext(i)[0])  #Collects all the structure names

#Creates Pandas dataframe with data being a list of structures and the row name being the structure name
dftest = pd.DataFrame(data=structlist, index=namelist, columns=namecolumns)

#Featurizes the structures
featurizer = MultipleFeaturizer([
    StructuralHeterogeneity(),  #sets the featurizers that are going to be used
    StructureComposition(ElementProperty.from_preset('magpie'))
])  # This one also collects the composition from the structures
#more featurizers can be added

r = (featurizer.featurize_dataframe(dftest, ['structure'])
     )  #Featurizes entire Pands Dataframe