na_method_fit="drop", na_method_transform="mean"), "autofeaturizer": AutoFeaturizer(n_jobs=10, preset="debug"), } pipe = MatPipe(**pipe_config) mb = MatbenchBenchmark(autoload=False) for task in mb.tasks: task.load() for fold in task.folds: df_train = task.get_train_and_val_data(fold, as_type="df") # Fit the RF with matpipe pipe.fit(df_train, task.metadata.target) df_test = task.get_test_data(fold, include_target=False, as_type="df") predictions = pipe.predict( df_test)[f"{task.metadata.target} predicted"] # A single configuration is used params = {'note': 'single config; see benchmark user metadata'} task.record(fold, predictions, params=params) # Save your results mb.to_file("results.json.gz")
# Let's download an example dataset and try predicting bulk moduli. from sklearn.model_selection import train_test_split from matminer.datasets.dataset_retrieval import load_dataset df = load_dataset("elastic_tensor_2015")[["structure", "K_VRH"]] train, test = train_test_split(df, shuffle=True, random_state=20190301, test_size=0.2) test_true = test['K_VRH'] test = test.drop(columns=["K_VRH"]) # MatPipe uses an sklearn-esque BaseEstimator API for fitting pipelines and # predicting properties. Fitting a pipe trains it to the input data; predicting # with a pipe will output predictions. pipe.fit(train, target="K_VRH") # Now we can predict our outputs. They'll appear in a column called # "K_VRH predicted". test_predicted = pipe.predict(test, "K_VRH")["K_VRH predicted"] # Let's see how we did: from sklearn.metrics import mean_absolute_error mae = mean_absolute_error(test_true, test_predicted) print("MAE on {} samples: {}".format(len(test_true), mae)) # Save a text digest of the pipeline. pipe.digest(filename="digest.txt") # You can now save your model pipe.save("mat.pipe")