Esempio n. 1
0
 def test_StructureFeaturizers_needs_fitting(self):
     fset_nofit = StructureFeaturizers().best
     fset_needfit = StructureFeaturizers().all
     af_nofit = AutoFeaturizer(featurizers={"structure": fset_nofit})
     af_needfit = AutoFeaturizer(featurizers={"structure": fset_needfit})
     self.assertTrue(af_needfit.needs_fit)
     self.assertFalse(af_nofit.needs_fit)
Esempio n. 2
0
 def test_structure_featurizers(self):
     ignore = ['StructureComposition', 'CGCNNFeaturizer']
     ignore += [
         klass.__class__.__name__ for klass in StructureFeaturizers().matrix
     ]
     if not dscribe:
         ignore += ["SOAP"]
     true_feats = self.get_featurizers(sf, ignore)
     test_feats = self.allfs.structure
     self._test_features_implemented(test_feats, true_feats)
Esempio n. 3
0
 def test_presets(self):
     target = "K_VRH"
     df = copy.copy(self.test_df.iloc[:self.limit])
     af = AutoFeaturizer(preset="fast")
     df = af.fit_transform(df, target)
     known_feats = CompositionFeaturizers().fast + \
                   StructureFeaturizers().fast
     n_structure_featurizers = len(af.featurizers["structure"])
     n_composition_featurizers = len(af.featurizers["composition"])
     n_featurizers = n_structure_featurizers + n_composition_featurizers
     self.assertEqual(n_featurizers, len(known_feats))
Esempio n. 4
0
 def test_presets(self):
     target = "K_VRH"
     df = copy.copy(self.test_df.iloc[: self.limit])
     af = AutoFeaturizer(preset="express")
     df = af.fit_transform(df, target)
     known_feats = (
         CompositionFeaturizers().express + StructureFeaturizers().express
     )
     n_structure_featurizers = len(af.featurizers["structure"])
     n_composition_featurizers = len(af.featurizers["composition"])
     n_removed_featurizers = len(af.removed_featurizers)
     n_featurizers = (
         n_structure_featurizers
         + n_composition_featurizers
         + n_removed_featurizers
     )
     self.assertEqual(n_featurizers, len(known_feats))
Esempio n. 5
0
    def __init__(self,
                 cache_src=None,
                 preset=None,
                 featurizers=None,
                 exclude=None,
                 functionalize=False,
                 ignore_cols=None,
                 ignore_errors=True,
                 drop_inputs=True,
                 guess_oxistates=True,
                 multiindex=False,
                 do_precheck=True,
                 n_jobs=None,
                 logger=True,
                 composition_col="composition",
                 structure_col="structure",
                 bandstructure_col="bandstructure",
                 dos_col="dos"):

        if featurizers and preset:
            raise AutomatminerError("Featurizers and preset were both set. "
                                    "Please either use a preset ('express', "
                                    "'all', 'debug', 'heavy') or set "
                                    "featurizers manually.")
        if not featurizers and not preset:
            raise AutomatminerError("Please specify set(s) of featurizers to "
                                    "use either through the featurizers"
                                    "argument or through the preset argument.")

        self.cache_src = cache_src
        self.preset = "express" if preset is None else preset
        self._logger = self.get_logger(logger)
        self.featurizers = featurizers
        self.exclude = exclude if exclude else []
        self.functionalize = functionalize
        self.ignore_cols = ignore_cols or []
        self.is_fit = False
        self.fitted_input_df = None
        self.converted_input_df = None
        self.ignore_errors = ignore_errors
        self.drop_inputs = drop_inputs
        self.multiindex = multiindex
        self.do_precheck = do_precheck
        self.n_jobs = n_jobs
        self.guess_oxistates = guess_oxistates
        self.features = []
        self.auto_featurizer = True if self.featurizers is None else False
        self.removed_featurizers = None
        self.composition_col = composition_col
        self.structure_col = structure_col
        self.bandstruct_col = bandstructure_col
        self.dos_col = dos_col

        _supported_featurizers = {
            composition_col: CompositionFeaturizers,
            structure_col: StructureFeaturizers,
            bandstructure_col: BSFeaturizers,
            dos_col: DOSFeaturizers
        }

        # user-set featurizers
        if self.featurizers:
            if not isinstance(self.featurizers, dict):
                raise TypeError("Featurizers must be a dictionary with keys"
                                "matching your {}".format(_COMMON_COL_ERR_STR))

            invalid_ftypes = [
                f for f in self.featurizers.keys()
                if f not in _supported_featurizers.keys()
            ]
            if invalid_ftypes:
                raise KeyError(
                    "The following keys were specified as featurizer"
                    " types but were not set in {}"
                    "".format(_COMMON_COL_ERR_STR))

            for ftype, fset in self.featurizers.items():
                _allowed = [
                    f.__class__.__name__
                    for f in _supported_featurizers[ftype]().all
                ]
                for f in fset:
                    if f.__class__.__name__ not in _allowed:
                        raise ValueError(
                            "The {} featurizer {} is not supported by "
                            "AutoFeaturizer. Try updating your version of "
                            "automatminer and matminer.".format(ftype, f))

        # auto-set featurizers
        else:
            featurizers = dict()
            for featurizer_type in _supported_featurizers.keys():
                featurizer_set = _supported_featurizers[featurizer_type]
                featurizers[featurizer_type] = getattr(
                    featurizer_set(exclude=self.exclude), self.preset)
            self.featurizers = featurizers

        # Check if any featurizers need fitting (useful for MatPipe)
        needs_fit = False
        fittable_fs = StructureFeaturizers().need_fit
        self.fittable_fcls = set([f.__class__.__name__ for f in fittable_fs])

        # Currently structure featurizers are the only featurizer types which
        # can be fittable
        for f in self.featurizers[self.structure_col]:
            if f.__class__.__name__ in self.fittable_fcls:
                needs_fit = True
                break
        self.needs_fit = needs_fit

        if self.needs_fit and self.cache_src:
            self.logger.warn(self._log_prefix +
                             "Using cached features on fittable featurizers! "
                             "Please make sure you are not benchmarking with "
                             "these options enabled; it is likely you will be"
                             "leaking data (i.e., features) from the testing"
                             "sets into the training.")

        if self.cache_src and "json" not in self.cache_src.lower():
            raise ValueError("The cache_src filename does not contain json."
                             "JSON is the required file type for featurizer"
                             "caching.")

        self.min_precheck_frac = 0.9
Esempio n. 6
0
 def setUp(self):
     self.required_attrs = ["express", "heavy", "debug", "all"]
     self.c = CompositionFeaturizers()
     self.s = StructureFeaturizers()
     self.b = BSFeaturizers()
     self.d = DOSFeaturizers()