Ejemplo n.º 1
0
    def test_composition_to_oxidcomposition(self):
        df = DataFrame(data={"composition": [Composition("Fe2O3")]})
        cto = CompositionToOxidComposition()
        df = cto.featurize_dataframe(df, 'composition')
        self.assertEqual(df["composition_oxid"].tolist()[0],
                         Composition({
                             "Fe3+": 2,
                             "O2-": 3
                         }))

        # test error handling
        df = DataFrame(data={"composition": [Composition("Fe2O3")]})
        cto = CompositionToOxidComposition(return_original_on_error=False,
                                           max_sites=2)
        self.assertRaises(ValueError, cto.featurize_dataframe, df,
                          'composition')

        # check non oxi state structure returned correctly
        cto = CompositionToOxidComposition(return_original_on_error=True,
                                           max_sites=2)
        df = cto.featurize_dataframe(df, 'composition')
        self.assertEqual(df["composition_oxid"].tolist()[0],
                         Composition({
                             "Fe": 2,
                             "O": 3
                         }))
Ejemplo n.º 2
0
 def test_composition_to_oxidcomposition(self):
     df = DataFrame(data={"composition": [Composition("Fe2O3")]})
     cto = CompositionToOxidComposition()
     df = cto.featurize_dataframe(df, 'composition')
     self.assertEqual(df["composition_oxid"].tolist()[0],
                      Composition({
                          "Fe3+": 2,
                          "O2-": 3
                      }))
Ejemplo n.º 3
0
    def test_composition_to_oxidcomposition(self):
        df = DataFrame(data={"composition": [Composition("Fe2O3")]})
        cto = CompositionToOxidComposition()
        df = cto.featurize_dataframe(df, 'composition')
        self.assertEqual(df["composition_oxid"].tolist()[0],
                         Composition({"Fe3+": 2, "O2-": 3}))

        # test error handling
        df = DataFrame(data={"composition": [Composition("Fe2O3")]})
        cto = CompositionToOxidComposition(
            return_original_on_error=False, max_sites=2)
        self.assertRaises(ValueError, cto.featurize_dataframe, df,
                          'composition')

        # check non oxi state structure returned correctly
        cto = CompositionToOxidComposition(
            return_original_on_error=True, max_sites=2)
        df = cto.featurize_dataframe(df, 'composition')
        self.assertEqual(df["composition_oxid"].tolist()[0],
                         Composition({"Fe": 2, "O": 3}))
Ejemplo n.º 4
0
    def _tidy_column(self, df, featurizer_type):
        """
        Various conversions to homogenize columns for featurization input.
        For example, take a column of compositions and ensure they are decorated
        with oxidation states, are not strings, etc.

        Args:
            df (pandas.DataFrame)
            featurizer_type: The key defining the featurizer input. For example,
                composition featurizers should have featurizer_type of
                "composition".

        Returns:
            df (pandas.DataFrame): DataFrame with featurizer_type column
                ready for featurization.
        """
        # todo: Make the following conversions more robust (no [0] type checking)
        type_tester = df[featurizer_type].iloc[0]

        if featurizer_type == self.composition_col:
            # Convert formulas to composition objects
            if isinstance(type_tester, str):
                self.logger.info(
                    self._log_prefix +
                    "Compositions detected as strings. Attempting "
                    "conversion to Composition objects...")
                stc = StrToComposition(overwrite_data=True,
                                       target_col_id=featurizer_type)
                df = stc.featurize_dataframe(df,
                                             featurizer_type,
                                             multiindex=self.multiindex,
                                             ignore_errors=True,
                                             inplace=False)

            elif isinstance(type_tester, dict):
                self.logger.info(self._log_prefix +
                                 "Compositions detected as dicts. Attempting "
                                 "conversion to Composition objects...")
                df[featurizer_type] = [
                    Composition.from_dict(d) for d in df[featurizer_type]
                ]

            # Convert non-oxidstate containing comps to oxidstate comps
            if self.guess_oxistates:
                self.logger.info(
                    self._log_prefix +
                    "Guessing oxidation states of compositions, as"
                    " they were not present in input.")
                cto = CompositionToOxidComposition(
                    target_col_id=featurizer_type,
                    overwrite_data=True,
                    return_original_on_error=True,
                    max_sites=-50)
                try:
                    df = cto.featurize_dataframe(df,
                                                 featurizer_type,
                                                 multiindex=self.multiindex,
                                                 inplace=False)
                except Exception as e:
                    self.logger.info(self._log_prefix +
                                     "Could not decorate oxidation states due "
                                     "to {}. Excluding featurizers based on "
                                     "composition oxistates".format(e))
                    classes_require_oxi = [
                        c.__class__.__name__
                        for c in CompositionFeaturizers().need_oxi
                    ]
                    self.exclude.extend(classes_require_oxi)

        else:
            # Convert structure/bs/dos dicts to objects (robust already)
            if isinstance(type_tester, (dict, str)):
                self.logger.info(self._log_prefix.capitalize() +
                                 "{} detected as string or dict. Attempting "
                                 "conversion to {} objects..."
                                 "".format(featurizer_type, featurizer_type))
                if isinstance(type_tester, str):
                    raise ValueError("{} column is type {}. Cannot convert."
                                     "".format(featurizer_type,
                                               type(type_tester)))
                dto = DictToObject(overwrite_data=True,
                                   target_col_id=featurizer_type)
                df = dto.featurize_dataframe(df,
                                             featurizer_type,
                                             inplace=False)

                # Decorate with oxidstates
                if featurizer_type == self.structure_col and \
                        self.guess_oxistates:
                    self.logger.info(
                        self._log_prefix +
                        "Guessing oxidation states of structures if they were "
                        "not present in input.")
                    sto = StructureToOxidStructure(
                        target_col_id=featurizer_type,
                        overwrite_data=True,
                        return_original_on_error=True,
                        max_sites=-50)
                    try:
                        df = sto.featurize_dataframe(
                            df,
                            featurizer_type,
                            multiindex=self.multiindex,
                            inplace=False)
                    except Exception as e:
                        self.logger.info(
                            self._log_prefix +
                            "Could not decorate oxidation states on structures "
                            "due to {}.".format(e))
        return df
Ejemplo n.º 5
0
    #from matminer.featurizers.structure import StructuralHeterogeneity
    #structural_heterogeneity = StructuralHeterogeneity()
    #structural_heterogeneity.set_n_jobs(28)
    #labels.append(structural_heterogeneity.feature_labels())
    #df  = structural_heterogeneity.featurize_dataframe(df, 'structures',ignore_errors=False)

    #convert structure to composition
    from matminer.featurizers.conversions import StructureToComposition
    structures_to_compositions = StructureToComposition()
    df = structures_to_compositions.featurize_dataframe(df, 'structures')

    #convert composition to oxidcomposition
    from matminer.featurizers.conversions import CompositionToOxidComposition
    OxidCompositions = CompositionToOxidComposition()
    print(OxidCompositions.feature_labels())
    df = OxidCompositions.featurize_dataframe(df, 'composition')

    #CohesiveEnergy
    from matminer.featurizers.composition import CohesiveEnergy
    cohesive_energy = CohesiveEnergy()
    cohesive_energy.set_n_jobs(28)
    labels.append(cohesive_energy.feature_labels())
    df = cohesive_energy.featurize_dataframe(df,
                                             'composition',
                                             ignore_errors=True)

    #ValenceOrbital
    from matminer.featurizers.composition import ValenceOrbital
    valence_orbital = ValenceOrbital()
    valence_orbital.set_n_jobs(28)
    labels.append(valence_orbital.feature_labels())
df = df.drop(unwanted_columns, axis=1)

from matminer.featurizers.conversions import StrToComposition

sc_feat = StrToComposition()
df = sc_feat.featurize_dataframe(df, col_id='formula')

from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name='magpie')
df = ep_feat.featurize_dataframe(df, col_id='composition')

from matminer.featurizers.conversions import CompositionToOxidComposition

co_feat = CompositionToOxidComposition()
df = co_feat.featurize_dataframe(df, col_id='composition')

from matminer.featurizers.composition import OxidationStates

os_feat = OxidationStates()
df = os_feat.featurize_dataframe(df, col_id='composition_oxid')

from matminer.featurizers.structure import DensityFeatures

df_feat = DensityFeatures()
df = df_feat.featurize_dataframe(df, col_id='structure')

"""
formula, structure, elastic_anisotropy, G_Reuss, G_VRH, G_Voigt, K_Reuss, K_VRH, K_Voigt,
poisson_ratio, compliance_tensor, elastic_tensor, elastic_tensor_original, composition
"""