def test_caching_powerup(self): cache_src = "./somefile.json" prod = get_preset_config("production", cache_src=cache_src) self.assertEqual(prod[AF_KEY].cache_src, cache_src) MatPipe(**prod)
def test_express_single(self): express_single = get_preset_config("express_single") for k in KEYSET: self.assertTrue(k in express_single.keys()) MatPipe(**express_single)
def test_heavy(self): heavy = get_preset_config("heavy") for k in KEYSET: self.assertTrue(k in heavy.keys()) MatPipe(**heavy)
def test_debug_single(self): debug_single = get_preset_config("debug_single") for k in KEYSET: self.assertTrue(k in debug_single.keys()) MatPipe(**debug_single)
def test_express(self): express = get_preset_config("express") for k in KEYSET: self.assertTrue(k in express.keys()) MatPipe(**express)
def test_debug(self): debug = get_preset_config("debug") for k in KEYSET: self.assertTrue(k in debug.keys()) MatPipe(**debug)
def test_production(self): prod = get_preset_config("production") for k in KEYSET: self.assertTrue(k in prod.keys()) MatPipe(**prod)
def test_n_jobs_powerup(self): n_jobs = 1 prod = get_preset_config("production", n_jobs=n_jobs) self.assertEqual(prod[AF_KEY].n_jobs, n_jobs) self.assertEqual(prod[ML_KEY].tpot_kwargs["n_jobs"], n_jobs) MatPipe(**prod)
) pipe_config = { "learner": learner, "reducer": FeatureReducer(reducers=[]), "cleaner": DataCleaner(feature_na_method="mean", max_na_frac=0.01, na_method_fit="drop", na_method_transform="mean"), "autofeaturizer": AutoFeaturizer(n_jobs=10, preset="debug"), } pipe = MatPipe(**pipe_config) mb = MatbenchBenchmark(autoload=False) for task in mb.tasks: task.load() for fold in task.folds: df_train = task.get_train_and_val_data(fold, as_type="df") # Fit the RF with matpipe pipe.fit(df_train, task.metadata.target) df_test = task.get_test_data(fold, include_target=False, as_type="df") predictions = pipe.predict( df_test)[f"{task.metadata.target} predicted"]
# The most basic usage of automatminer requires interacting with only one class, # MatPipe. This class, once fit, is a complete pipeline, and is able to # transform compositions, structures, bandstructures, and DOS into property # predictions. # A configured MatPipe object will featurize, clean, and learn on a dataset # automatically, and it made of 4 classes: AutoFeaturizer, DataCleaner, # FeatureReducer, and an ML adaptor (e.g., TPOTAdaptor). The exact operations # MatPipe executes are based entirely on how these 4 classes are configured. # The easiest way to get started is by passing in a preset configuration to # MatPipe. We can do this with the get_preset_config function; here, we'll use # the "express" config, which will provide decent results in a reasonable time # frame (an hour or two). pipe = MatPipe(**get_preset_config("express")) # Let's download an example dataset and try predicting bulk moduli. from sklearn.model_selection import train_test_split from matminer.datasets.dataset_retrieval import load_dataset df = load_dataset("elastic_tensor_2015")[["structure", "K_VRH"]] train, test = train_test_split(df, shuffle=True, random_state=20190301, test_size=0.2) test_true = test['K_VRH'] test = test.drop(columns=["K_VRH"]) # MatPipe uses an sklearn-esque BaseEstimator API for fitting pipelines and # predicting properties. Fitting a pipe trains it to the input data; predicting # with a pipe will output predictions.