def test_heavy(self): heavy = get_preset_config("heavy") for k in KEYSET: self.assertTrue(k in heavy.keys()) MatPipe(**heavy)
def test_caching_powerup(self): cache_src = "./somefile.json" prod = get_preset_config("production", cache_src=cache_src) self.assertEqual(prod[AF_KEY].cache_src, cache_src) MatPipe(**prod)
def test_express(self): express = get_preset_config("express") for k in KEYSET: self.assertTrue(k in express.keys()) MatPipe(**express)
def test_express_single(self): express_single = get_preset_config("express_single") for k in KEYSET: self.assertTrue(k in express_single.keys()) MatPipe(**express_single)
def test_debug(self): debug = get_preset_config("debug") for k in KEYSET: self.assertTrue(k in debug.keys()) MatPipe(**debug)
def test_debug_single(self): debug_single = get_preset_config("debug_single") for k in KEYSET: self.assertTrue(k in debug_single.keys()) MatPipe(**debug_single)
from automatminer import MatPipe from sklearn.model_selection import train_test_split import sys import pandas as pd import numpy as np import os #a=sys.argv[1] i = {'composition': [sys.argv[1]]} rpath = sys.argv[2] df = pd.DataFrame(i) filename = 'D:/FYP_files/Machine_learning/pipeline/p_files/MatPipe_predict_thirdelongation_from_composition.p' #MatPipe_predict_thirdelongation_from_composition.p #MatPipe_predict_Ultimate_fourthtime_from_composition.p pipe = MatPipe.load(filename) if __name__ == '__main__': df = pipe.predict(df) df.to_csv('%s/elongation.csv' % rpath)
def test_production(self): prod = get_preset_config("production") for k in KEYSET: self.assertTrue(k in prod.keys()) MatPipe(**prod)
importances = rf.feature_importances_ # included = np.asarray(included) included = X.columns.values indices = np.argsort(importances)[::-1] pf = PlotlyFig(y_title='Importance (%)', title='Feature by importances', filename='E:/importances.html', fontsize=20, ticksize=15) pf.bar(x=included[indices][0:10], y=importances[indices][0:10]) #---------------------------------------------------------------- #---------------------------------------------------------------- #---------------------------------------------------------------- pipe = MatPipe.from_preset("express")#the heavy can change to express or light, judge on how exactly the data you want to get pipe.fit(train_df, target)#this will take a long time prediction_df = pipe.predict(prediction_df) prediction_df.to_csv('C:/Users/DELL/Documents/predictionK_VRH.csv') from sklearn.metrics import mean_absolute_error from sklearn.dummy import DummyRegressor # fit the dummy dr = DummyRegressor() dr.fit(train_df["composition"], train_df[target]) dummy_test = dr.predict(test_df["composition"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"] mae_matpipe = mean_absolute_error(true, matpipe_test) mae_dummy = mean_absolute_error(true, dummy_test) print("K_VRH Dummy MAE: {} ".format(mae_dummy))
def test_n_jobs_powerup(self): n_jobs = 1 prod = get_preset_config("production", n_jobs=n_jobs) self.assertEqual(prod[AF_KEY].n_jobs, n_jobs) self.assertEqual(prod[ML_KEY].tpot_kwargs["n_jobs"], n_jobs) MatPipe(**prod)
df = pd.DataFrame(columns=['structure', 'K_VRH']) df['structure'] = centro_structs df['K_VRH'] = K_VRH df = df.dropna() df.to_csv('centro_elastic.csv') print(df.describe()) train_df, test_df = train_test_split(df, test_size=0.1, shuffle=True, random_state=1) target = "K_VRH" prediction_df = test_df.drop(columns=[target]) pipe = MatPipe.from_preset("express") pipe.fit(train_df, target) prediction_df = pipe.predict(prediction_df) # fit the dummy dr = DummyRegressor() dr.fit(train_df["structure"], train_df[target]) dummy_test = dr.predict(test_df["structure"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"] mae_matpipe = mean_absolute_error(true, matpipe_test) mse_matpipe = mean_squared_error(true, matpipe_test)
df['Mh'] = mh #df['diel']*df['K_VRH'] df = df.replace([np.inf, -np.inf], np.nan) df = df.dropna() df.to_csv('Mh_test.csv') print(df.describe()) target = 'Mh' train_df, test_df = train_test_split(df, test_size=0.1, shuffle=True, random_state=1) prediction_df = test_df.drop(target) #['Mh','K_VRH','diel'],axis=1) print(prediction_df.columns) from automatminer import MatPipe pipe = MatPipe.from_preset("debug", n_jobs=28) #,cache_src='Mh_cache.json') pipe.fit(train_df, target) prediction_df = pipe.predict(prediction_df) from sklearn.metrics import mean_absolute_error from sklearn.dummy import DummyRegressor # fit the dummy dr = DummyRegressor() dr.fit(train_df["structure"], train_df[target]) dummy_test = dr.predict(test_df["structure"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"]
) pipe_config = { "learner": learner, "reducer": FeatureReducer(reducers=[]), "cleaner": DataCleaner(feature_na_method="mean", max_na_frac=0.01, na_method_fit="drop", na_method_transform="mean"), "autofeaturizer": AutoFeaturizer(n_jobs=10, preset="debug"), } pipe = MatPipe(**pipe_config) mb = MatbenchBenchmark(autoload=False) for task in mb.tasks: task.load() for fold in task.folds: df_train = task.get_train_and_val_data(fold, as_type="df") # Fit the RF with matpipe pipe.fit(df_train, task.metadata.target) df_test = task.get_test_data(fold, include_target=False, as_type="df") predictions = pipe.predict( df_test)[f"{task.metadata.target} predicted"]
df = pd.DataFrame(columns=['structure', 'dielectric']) df['structure'] = centro_structs df['dielectric'] = diel df = df.dropna() df.to_csv('centro_diel.csv') print(df.describe()) train_df, test_df = train_test_split(df, test_size=0.1, shuffle=True, random_state=1) target = "dielectric" prediction_df = test_df.drop(columns=[target]) pipe = MatPipe.from_preset("express", n_jobs=28, cache_src="cache_diel.json") pipe.fit(train_df, target) prediction_df = pipe.predict(prediction_df) # fit the dummy dr = DummyRegressor() dr.fit(train_df["structure"], train_df[target]) dummy_test = dr.predict(test_df["structure"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"] mae_matpipe = mean_absolute_error(true, matpipe_test) mse_matpipe = mean_squared_error(true, matpipe_test)
# The most basic usage of automatminer requires interacting with only one class, # MatPipe. This class, once fit, is a complete pipeline, and is able to # transform compositions, structures, bandstructures, and DOS into property # predictions. # A configured MatPipe object will featurize, clean, and learn on a dataset # automatically, and it made of 4 classes: AutoFeaturizer, DataCleaner, # FeatureReducer, and an ML adaptor (e.g., TPOTAdaptor). The exact operations # MatPipe executes are based entirely on how these 4 classes are configured. # The easiest way to get started is by passing in a preset configuration to # MatPipe. We can do this with the get_preset_config function; here, we'll use # the "express" config, which will provide decent results in a reasonable time # frame (an hour or two). pipe = MatPipe(**get_preset_config("express")) # Let's download an example dataset and try predicting bulk moduli. from sklearn.model_selection import train_test_split from matminer.datasets.dataset_retrieval import load_dataset df = load_dataset("elastic_tensor_2015")[["structure", "K_VRH"]] train, test = train_test_split(df, shuffle=True, random_state=20190301, test_size=0.2) test_true = test['K_VRH'] test = test.drop(columns=["K_VRH"]) # MatPipe uses an sklearn-esque BaseEstimator API for fitting pipelines and # predicting properties. Fitting a pipe trains it to the input data; predicting # with a pipe will output predictions.