예제 #1
0
                na_method_fit="drop",
                na_method_transform="mean"),
    "autofeaturizer":
    AutoFeaturizer(n_jobs=10, preset="debug"),
}

pipe = MatPipe(**pipe_config)

mb = MatbenchBenchmark(autoload=False)

for task in mb.tasks:
    task.load()
    for fold in task.folds:

        df_train = task.get_train_and_val_data(fold, as_type="df")

        # Fit the RF with matpipe
        pipe.fit(df_train, task.metadata.target)

        df_test = task.get_test_data(fold, include_target=False, as_type="df")
        predictions = pipe.predict(
            df_test)[f"{task.metadata.target} predicted"]

        # A single configuration is used
        params = {'note': 'single config; see benchmark user metadata'}

        task.record(fold, predictions, params=params)

# Save your results
mb.to_file("results.json.gz")
예제 #2
0
# Let's download an example dataset and try predicting bulk moduli.
from sklearn.model_selection import train_test_split
from matminer.datasets.dataset_retrieval import load_dataset
df = load_dataset("elastic_tensor_2015")[["structure", "K_VRH"]]
train, test = train_test_split(df,
                               shuffle=True,
                               random_state=20190301,
                               test_size=0.2)
test_true = test['K_VRH']
test = test.drop(columns=["K_VRH"])

# MatPipe uses an sklearn-esque BaseEstimator API for fitting pipelines and
# predicting properties. Fitting a pipe trains it to the input data; predicting
# with a pipe will output predictions.
pipe.fit(train, target="K_VRH")

# Now we can predict our outputs. They'll appear in a column called
# "K_VRH predicted".
test_predicted = pipe.predict(test, "K_VRH")["K_VRH predicted"]

# Let's see how we did:
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(test_true, test_predicted)
print("MAE on {} samples: {}".format(len(test_true), mae))

# Save a text digest of the pipeline.
pipe.digest(filename="digest.txt")

# You can now save your model
pipe.save("mat.pipe")