Esempio n. 1
0
    def test_conversion_multiindex(self):
        d = {'comp_str': ["Fe2", "MnO2"]}

        df_1lvl = DataFrame(data=d)

        df_1lvl = StrToComposition().featurize_dataframe(df_1lvl,
                                                         'comp_str',
                                                         multiindex=True)
        self.assertEqual(
            df_1lvl[("StrToComposition", "composition")].tolist(),
            [Composition("Fe2"), Composition("MnO2")])

        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product(
            (["custom"], df_2lvl.columns.values))

        df_2lvl = StrToComposition().featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True)
        self.assertEqual(
            df_2lvl[("StrToComposition", "composition")].tolist(),
            [Composition("Fe2"), Composition("MnO2")])

        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product(
            (["custom"], df_2lvl.columns.values))

        sto = StrToComposition(target_col_id='test')
        df_2lvl = sto.featurize_dataframe(df_2lvl, ("custom", "comp_str"),
                                          multiindex=True)
        self.assertEqual(
            df_2lvl[("StrToComposition", "test")].tolist(),
            [Composition("Fe2"), Composition("MnO2")])

        # if two level multiindex provided as target, it should be written there
        # here we test converting multiindex in place
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product(
            (["custom"], df_2lvl.columns.values))

        sto = StrToComposition(target_col_id=None, overwrite_data=True)
        df_2lvl = sto.featurize_dataframe(df_2lvl, ("custom", "comp_str"),
                                          multiindex=True)
        self.assertEqual(
            df_2lvl[("custom",
                     "comp_str")][df_2lvl[("custom",
                                           "comp_str")].columns[3]].tolist(),
            [Composition("Fe2"), Composition("MnO2")])

        # Try inplace multiindex conversion with return errors
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product(
            (["custom"], df_2lvl.columns.values))

        sto = StrToComposition(target_col_id=None, overwrite_data=True)
        df_2lvl = sto.featurize_dataframe(df_2lvl, ("custom", "comp_str"),
                                          multiindex=True,
                                          return_errors=True,
                                          ignore_errors=True)
        self.assertTrue(
            all(df_2lvl[("custom", "StrToComposition Exceptions")].isnull()))
Esempio n. 2
0
    def has_polymorphs(self):
        """Determine if a task's raw data contains polymorphs.

        Returns:
            (bool) If true, contains polymorphs.
        """
        checker_key = "pmg_composition"
        self._check_is_loaded()
        if self.metadata.input_type == "composition":
            stc = StrToComposition(target_col_id=checker_key, reduce=True)
            comps = stc.featurize_dataframe(self.df,
                                            "composition")[checker_key].values
        elif self.metadata.input_type == "structure":
            stc = StructureToComposition(target_col_id=checker_key,
                                         reduce=True)
            comps = stc.featurize_dataframe(self.df,
                                            "structure")[checker_key].values
        else:
            raise ValueError(
                "Cannot check for polymorphs without input type in "
                "(structure, composition)!")

        unique_comps = set(comps)
        if len(unique_comps) != len(comps):
            return True
        else:
            return False
Esempio n. 3
0
    def test_conversion_multiindex(self):
        d = {'comp_str': ["Fe2", "MnO2"]}

        df_1lvl = DataFrame(data=d)

        df_1lvl = StrToComposition().featurize_dataframe(
            df_1lvl, 'comp_str', multiindex=True)
        self.assertEqual(df_1lvl[("StrToComposition", "composition")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        df_2lvl = StrToComposition().featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True)
        self.assertEqual(df_2lvl[("StrToComposition", "composition")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        sto = StrToComposition(target_col_id='test')
        df_2lvl = sto.featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True)
        self.assertEqual(df_2lvl[("StrToComposition", "test")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        # if two level multiindex provided as target, it should be written there
        # here we test converting multiindex in place
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        sto = StrToComposition(target_col_id=None, overwrite_data=True)

        df_2lvl = sto.featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True, inplace=False)
        self.assertEqual(df_2lvl[("custom", "comp_str")].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        # Try inplace multiindex conversion with return errors
        df_2lvl = DataFrame(data=d)
        df_2lvl.columns = MultiIndex.from_product((["custom"],
                                                   df_2lvl.columns.values))

        sto = StrToComposition(target_col_id=None, overwrite_data=True)
        df_2lvl = sto.featurize_dataframe(
            df_2lvl, ("custom", "comp_str"), multiindex=True,
            return_errors=True, ignore_errors=True)

        self.assertTrue(
            all(df_2lvl[("custom", "StrToComposition Exceptions")].isnull()))
Esempio n. 4
0
class FeatureGenerator:
    """
        A wraper class to generate multiple type of elemental features
    """
    def __init__(self):
        self.feature_calculators = MultipleFeaturizer([
            cf.ElementProperty.from_preset(preset_name="magpie"),
            cf.Stoichiometry(),
            cf.ValenceOrbital(props=['frac']),
            cf.IonProperty(fast=True),
            cf.BandCenter(),
            cf.ElementFraction(),
        ])

        self.str2composition = StrToComposition()

    def generate(self, df: pd.DataFrame, ignore_errors: bool = False):
        """
            generate feature from a dataframe with a "formula" column that contains 
            chemical formulas of the compositions.
        """
        df = self.str2composition.featurize_dataframe(
            df, "formula", ignore_errors=ignore_errors)
        df = df.dropna()
        df = self.feature_calculators.featurize_dataframe(
            df, col_id='composition', ignore_errors=ignore_errors)
        df["NComp"] = df["composition"].apply(len)
        return df
Esempio n. 5
0
    def test_conversion_overwrite(self):
        # Test with overwrite
        d = {'comp_str': ["Fe2", "MnO2"]}
        df = DataFrame(data=d)

        stc = StrToComposition(target_col_id='comp_str', overwrite_data=False)
        with self.assertRaises(ValueError):
            df = stc.featurize_dataframe(df, 'comp_str', inplace=True)

        with self.assertRaises(ValueError):
            df = stc.featurize_dataframe(df, 'comp_str', inplace=False)

        stc = StrToComposition(target_col_id='comp_str', overwrite_data=True)

        dfres_ipt = df.copy()
        stc.featurize_dataframe(dfres_ipt, 'comp_str', inplace=True)
        self.assertListEqual(dfres_ipt.columns.tolist(), ["comp_str"])

        dfres_ipf = stc.featurize_dataframe(df, 'comp_str', inplace=False)
        self.assertListEqual(dfres_ipf.columns.tolist(), ["comp_str"])
Esempio n. 6
0
    def test_str_to_composition(self):
        d = {'comp_str': ["Fe2", "MnO2"]}

        df = DataFrame(data=d)
        df = StrToComposition().featurize_dataframe(df, 'comp_str')

        self.assertEqual(df["composition"].tolist(),
                         [Composition("Fe2"), Composition("MnO2")])

        stc = StrToComposition(reduce=True, target_col_id='composition_red')
        df = stc.featurize_dataframe(df, 'comp_str')

        self.assertEqual(df["composition_red"].tolist(),
                         [Composition("Fe"), Composition("MnO2")])
Esempio n. 7
0
class FeatureGenerator:
    """
        A wraper class to generate multiple type of elemental features
    """
    def __init__(self):
        self.feature_calculators = MultipleFeaturizer([
            cf.ElementProperty.from_preset(preset_name="magpie"),
            cf.Stoichiometry(),
            cf.ValenceOrbital(props=['frac']),
            cf.IonProperty(fast=True),
            cf.BandCenter(),
            cf.ElementFraction(),
        ])

        self.str2composition = StrToComposition()

    def generate(self,
                 df: pd.DataFrame,
                 ignore_errors: bool = False,
                 drop_mode=True):
        """
            generate feature from a dataframe with a "formula" column that contains 
            chemical formulas of the compositions.

            df : a dataframe with a column name formula
            ignore_errors : ignore errors when generating features
            drop_mode : drop property that generated from mode aggregation function

        """
        df = self.str2composition.featurize_dataframe(
            df, "formula", ignore_errors=ignore_errors)
        df = df.dropna()
        df = self.feature_calculators.featurize_dataframe(
            df, col_id='composition', ignore_errors=ignore_errors)
        df["NComp"] = df["composition"].apply(len)
        if drop_mode:
            df = df.drop(columns=[
                c for c in df.columns if "mode" in c and c.startswith("Magpie")
            ])
        return df
Esempio n. 8
0
    def _tidy_column(self, df, featurizer_type):
        """
        Various conversions to homogenize columns for featurization input.
        For example, take a column of compositions and ensure they are decorated
        with oxidation states, are not strings, etc.

        Args:
            df (pandas.DataFrame)
            featurizer_type: The key defining the featurizer input. For example,
                composition featurizers should have featurizer_type of
                "composition".

        Returns:
            df (pandas.DataFrame): DataFrame with featurizer_type column
                ready for featurization.
        """
        # todo: Make the following conversions more robust (no [0] type checking)
        type_tester = df[featurizer_type].iloc[0]

        if featurizer_type == self.composition_col:
            # Convert formulas to composition objects
            if isinstance(type_tester, str):
                self.logger.info(
                    self._log_prefix +
                    "Compositions detected as strings. Attempting "
                    "conversion to Composition objects...")
                stc = StrToComposition(overwrite_data=True,
                                       target_col_id=featurizer_type)
                df = stc.featurize_dataframe(df,
                                             featurizer_type,
                                             multiindex=self.multiindex,
                                             ignore_errors=True,
                                             inplace=False)

            elif isinstance(type_tester, dict):
                self.logger.info(self._log_prefix +
                                 "Compositions detected as dicts. Attempting "
                                 "conversion to Composition objects...")
                df[featurizer_type] = [
                    Composition.from_dict(d) for d in df[featurizer_type]
                ]

            # Convert non-oxidstate containing comps to oxidstate comps
            if self.guess_oxistates:
                self.logger.info(
                    self._log_prefix +
                    "Guessing oxidation states of compositions, as"
                    " they were not present in input.")
                cto = CompositionToOxidComposition(
                    target_col_id=featurizer_type,
                    overwrite_data=True,
                    return_original_on_error=True,
                    max_sites=-50)
                try:
                    df = cto.featurize_dataframe(df,
                                                 featurizer_type,
                                                 multiindex=self.multiindex,
                                                 inplace=False)
                except Exception as e:
                    self.logger.info(self._log_prefix +
                                     "Could not decorate oxidation states due "
                                     "to {}. Excluding featurizers based on "
                                     "composition oxistates".format(e))
                    classes_require_oxi = [
                        c.__class__.__name__
                        for c in CompositionFeaturizers().need_oxi
                    ]
                    self.exclude.extend(classes_require_oxi)

        else:
            # Convert structure/bs/dos dicts to objects (robust already)
            if isinstance(type_tester, (dict, str)):
                self.logger.info(self._log_prefix.capitalize() +
                                 "{} detected as string or dict. Attempting "
                                 "conversion to {} objects..."
                                 "".format(featurizer_type, featurizer_type))
                if isinstance(type_tester, str):
                    raise ValueError("{} column is type {}. Cannot convert."
                                     "".format(featurizer_type,
                                               type(type_tester)))
                dto = DictToObject(overwrite_data=True,
                                   target_col_id=featurizer_type)
                df = dto.featurize_dataframe(df,
                                             featurizer_type,
                                             inplace=False)

                # Decorate with oxidstates
                if featurizer_type == self.structure_col and \
                        self.guess_oxistates:
                    self.logger.info(
                        self._log_prefix +
                        "Guessing oxidation states of structures if they were "
                        "not present in input.")
                    sto = StructureToOxidStructure(
                        target_col_id=featurizer_type,
                        overwrite_data=True,
                        return_original_on_error=True,
                        max_sites=-50)
                    try:
                        df = sto.featurize_dataframe(
                            df,
                            featurizer_type,
                            multiindex=self.multiindex,
                            inplace=False)
                    except Exception as e:
                        self.logger.info(
                            self._log_prefix +
                            "Could not decorate oxidation states on structures "
                            "due to {}.".format(e))
        return df
"""
['_id', 'material_id', 'formula', 'nsites', 'space_group', 'volume',
       'structure', 'elastic_anisotropy', 'G_Reuss', 'G_VRH', 'G_Voigt',
       'K_Reuss', 'K_VRH', 'K_Voigt', 'poisson_ratio', 'compliance_tensor',
       'elastic_tensor', 'elastic_tensor_original', 'cif', 'kpoint_density',
       'poscar']
"""
unwanted_columns = ['_id', 'material_id', 'nsites', 'volume',
                    'cif', 'kpoint_density', 'poscar']
df = df.drop(unwanted_columns, axis=1)

from matminer.featurizers.conversions import StrToComposition

sc_feat = StrToComposition()
df = sc_feat.featurize_dataframe(df, col_id='formula')

from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name='magpie')
df = ep_feat.featurize_dataframe(df, col_id='composition')

from matminer.featurizers.conversions import CompositionToOxidComposition

co_feat = CompositionToOxidComposition()
df = co_feat.featurize_dataframe(df, col_id='composition')

from matminer.featurizers.composition import OxidationStates

os_feat = OxidationStates()
df = os_feat.featurize_dataframe(df, col_id='composition_oxid')