예제 #1
0
 def test_caching_powerup(self):
     cache_src = "./somefile.json"
     prod = get_preset_config("production", cache_src=cache_src)
     self.assertEqual(prod[AF_KEY].cache_src, cache_src)
     MatPipe(**prod)
예제 #2
0
 def test_express_single(self):
     express_single = get_preset_config("express_single")
     for k in KEYSET:
         self.assertTrue(k in express_single.keys())
     MatPipe(**express_single)
예제 #3
0
 def test_heavy(self):
     heavy = get_preset_config("heavy")
     for k in KEYSET:
         self.assertTrue(k in heavy.keys())
     MatPipe(**heavy)
예제 #4
0
 def test_debug_single(self):
     debug_single = get_preset_config("debug_single")
     for k in KEYSET:
         self.assertTrue(k in debug_single.keys())
     MatPipe(**debug_single)
예제 #5
0
 def test_express(self):
     express = get_preset_config("express")
     for k in KEYSET:
         self.assertTrue(k in express.keys())
     MatPipe(**express)
예제 #6
0
 def test_debug(self):
     debug = get_preset_config("debug")
     for k in KEYSET:
         self.assertTrue(k in debug.keys())
     MatPipe(**debug)
예제 #7
0
 def test_production(self):
     prod = get_preset_config("production")
     for k in KEYSET:
         self.assertTrue(k in prod.keys())
     MatPipe(**prod)
예제 #8
0
 def test_n_jobs_powerup(self):
     n_jobs = 1
     prod = get_preset_config("production", n_jobs=n_jobs)
     self.assertEqual(prod[AF_KEY].n_jobs, n_jobs)
     self.assertEqual(prod[ML_KEY].tpot_kwargs["n_jobs"], n_jobs)
     MatPipe(**prod)
예제 #9
0
)
pipe_config = {
    "learner":
    learner,
    "reducer":
    FeatureReducer(reducers=[]),
    "cleaner":
    DataCleaner(feature_na_method="mean",
                max_na_frac=0.01,
                na_method_fit="drop",
                na_method_transform="mean"),
    "autofeaturizer":
    AutoFeaturizer(n_jobs=10, preset="debug"),
}

pipe = MatPipe(**pipe_config)

mb = MatbenchBenchmark(autoload=False)

for task in mb.tasks:
    task.load()
    for fold in task.folds:

        df_train = task.get_train_and_val_data(fold, as_type="df")

        # Fit the RF with matpipe
        pipe.fit(df_train, task.metadata.target)

        df_test = task.get_test_data(fold, include_target=False, as_type="df")
        predictions = pipe.predict(
            df_test)[f"{task.metadata.target} predicted"]
예제 #10
0
# The most basic usage of automatminer requires interacting with only one class,
# MatPipe. This class, once fit, is a complete pipeline, and is able to
# transform compositions, structures, bandstructures, and DOS into property
# predictions.

# A configured MatPipe object will featurize, clean, and learn on a dataset
# automatically, and it made of 4 classes: AutoFeaturizer, DataCleaner,
# FeatureReducer, and an ML adaptor (e.g., TPOTAdaptor). The exact operations
# MatPipe executes are based entirely on how these 4 classes are configured.

# The easiest way to get started is by passing in a preset configuration to
# MatPipe. We can do this with the get_preset_config function; here, we'll use
# the "express" config, which will provide decent results in a reasonable time
# frame (an hour or two).
pipe = MatPipe(**get_preset_config("express"))

# Let's download an example dataset and try predicting bulk moduli.
from sklearn.model_selection import train_test_split
from matminer.datasets.dataset_retrieval import load_dataset
df = load_dataset("elastic_tensor_2015")[["structure", "K_VRH"]]
train, test = train_test_split(df,
                               shuffle=True,
                               random_state=20190301,
                               test_size=0.2)
test_true = test['K_VRH']
test = test.drop(columns=["K_VRH"])

# MatPipe uses an sklearn-esque BaseEstimator API for fitting pipelines and
# predicting properties. Fitting a pipe trains it to the input data; predicting
# with a pipe will output predictions.