def setUp(self): df = load_dataset("elastic_tensor_2015").rename( columns={"formula": "composition"}) self.df = df[["composition", "K_VRH"]] self.df_struc = df[["composition", "structure", "K_VRH"]] self.extra_features = df["G_VRH"] self.target = "K_VRH" self.config = get_preset_config("debug_single") self.config_cached = get_preset_config("debug_single", cache_src=CACHE_SRC) self.pipe = MatPipe(**self.config) self.pipe_cached = MatPipe(**self.config_cached)
def __init__(self, autofeaturizer=None, cleaner=None, reducer=None, learner=None): transformers = [autofeaturizer, cleaner, reducer, learner] if not all(transformers): if any(transformers): raise AutomatminerError( "Please specify all dataframe" "transformers (autofeaturizer, learner," "reducer, and cleaner), or none (to use" "default).") else: config = get_preset_config("express") autofeaturizer = config["autofeaturizer"] cleaner = config["cleaner"] reducer = config["reducer"] learner = config["learner"] self.autofeaturizer = autofeaturizer self.cleaner = cleaner self.reducer = reducer self.learner = learner self.pre_fit_df = None self.post_fit_df = None self.ml_type = None self.target = None self.version = get_version() super(MatPipe, self).__init__()
def from_preset(preset: str = "express", **powerups): """ Get a preset MatPipe from a string using automatminer.presets.get_preset_config See get_preeset_config for more inspect. Args: preset (str): The preset configuration to use. Current presets are: - production - express (recommended for most problems) - express_single (no AutoML, XGBoost only) - heavy - debug - debug_single (no AutoML, XGBoost only) powerups (kwargs): General upgrades/changes to apply. Current powerups are: - cache_src (str): The cache source if you want to save features. - n_jobs (int): The number of parallel process to use when running. """ config = get_preset_config(preset, **powerups) return MatPipe(**config)
def __init__(self, autofeaturizer=None, cleaner=None, reducer=None, learner=None, logger=True, log_level=None): transformers = [autofeaturizer, cleaner, reducer, learner] if not all(transformers): if any(transformers): raise AutomatminerError("Please specify all dataframe" "transformers (autofeaturizer, learner," "reducer, and cleaner), or none (to use" "default).") else: config = get_preset_config("production") autofeaturizer = config["autofeaturizer"] cleaner = config["cleaner"] reducer = config["reducer"] learner = config["learner"] self._logger = self.get_logger(logger, level=log_level) self.autofeaturizer = autofeaturizer self.cleaner = cleaner self.reducer = reducer self.learner = learner self.autofeaturizer._logger = self.get_logger(logger) self.cleaner._logger = self.get_logger(logger) self.reducer._logger = self.get_logger(logger) self.learner._logger = self.get_logger(logger) self.pre_fit_df = None self.post_fit_df = None self.is_fit = False self.ml_type = None
def test_missing(self): with self.assertRaises(ValueError): _ = get_preset_config("QWERTYUIOP1234567890")
def test_caching_powerup(self): cache_src = "./somefile.json" prod = get_preset_config("production", cache_src=cache_src) self.assertEqual(prod[AF_KEY].cache_src, cache_src) MatPipe(**prod)
def test_heavy(self): heavy = get_preset_config("heavy") for k in KEYSET: self.assertTrue(k in heavy.keys()) MatPipe(**heavy)
def test_express_single(self): express_single = get_preset_config("express_single") for k in KEYSET: self.assertTrue(k in express_single.keys()) MatPipe(**express_single)
def test_express(self): express = get_preset_config("express") for k in KEYSET: self.assertTrue(k in express.keys()) MatPipe(**express)
def test_debug_single(self): debug_single = get_preset_config("debug_single") for k in KEYSET: self.assertTrue(k in debug_single.keys()) MatPipe(**debug_single)
def test_debug(self): debug = get_preset_config("debug") for k in KEYSET: self.assertTrue(k in debug.keys()) MatPipe(**debug)
def test_production(self): prod = get_preset_config("production") for k in KEYSET: self.assertTrue(k in prod.keys()) MatPipe(**prod)
def setUp(self): basedir = os.path.dirname(os.path.realpath(__file__)) df = pd.read_csv(basedir + "/mini_automl_df.csv", index_col=0) self.train_df = df.copy(deep=True).iloc[:450] self.test_df = df.copy(deep=True).iloc[451:] self.tpot = get_preset_config("debug")["learner"]
def test_n_jobs_powerup(self): n_jobs = 1 prod = get_preset_config("production", n_jobs=n_jobs) self.assertEqual(prod[AF_KEY].n_jobs, n_jobs) self.assertEqual(prod[ML_KEY].tpot_kwargs["n_jobs"], n_jobs) MatPipe(**prod)
def setUp(self): self.config = get_preset_config("debug")
def setUp(self): self.config = get_preset_config('debug')