importances = rf.feature_importances_ # included = np.asarray(included) included = X.columns.values indices = np.argsort(importances)[::-1] pf = PlotlyFig(y_title='Importance (%)', title='Feature by importances', filename='E:/importances.html', fontsize=20, ticksize=15) pf.bar(x=included[indices][0:10], y=importances[indices][0:10]) #---------------------------------------------------------------- #---------------------------------------------------------------- #---------------------------------------------------------------- pipe = MatPipe.from_preset("express")#the heavy can change to express or light, judge on how exactly the data you want to get pipe.fit(train_df, target)#this will take a long time prediction_df = pipe.predict(prediction_df) prediction_df.to_csv('C:/Users/DELL/Documents/predictionK_VRH.csv') from sklearn.metrics import mean_absolute_error from sklearn.dummy import DummyRegressor # fit the dummy dr = DummyRegressor() dr.fit(train_df["composition"], train_df[target]) dummy_test = dr.predict(test_df["composition"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"] mae_matpipe = mean_absolute_error(true, matpipe_test) mae_dummy = mean_absolute_error(true, dummy_test) print("K_VRH Dummy MAE: {} ".format(mae_dummy))
df['Mh'] = mh #df['diel']*df['K_VRH'] df = df.replace([np.inf, -np.inf], np.nan) df = df.dropna() df.to_csv('Mh_test.csv') print(df.describe()) target = 'Mh' train_df, test_df = train_test_split(df, test_size=0.1, shuffle=True, random_state=1) prediction_df = test_df.drop(target) #['Mh','K_VRH','diel'],axis=1) print(prediction_df.columns) from automatminer import MatPipe pipe = MatPipe.from_preset("debug", n_jobs=28) #,cache_src='Mh_cache.json') pipe.fit(train_df, target) prediction_df = pipe.predict(prediction_df) from sklearn.metrics import mean_absolute_error from sklearn.dummy import DummyRegressor # fit the dummy dr = DummyRegressor() dr.fit(train_df["structure"], train_df[target]) dummy_test = dr.predict(test_df["structure"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"]
df = pd.DataFrame(columns=['structure', 'K_VRH']) df['structure'] = centro_structs df['K_VRH'] = K_VRH df = df.dropna() df.to_csv('centro_elastic.csv') print(df.describe()) train_df, test_df = train_test_split(df, test_size=0.1, shuffle=True, random_state=1) target = "K_VRH" prediction_df = test_df.drop(columns=[target]) pipe = MatPipe.from_preset("express") pipe.fit(train_df, target) prediction_df = pipe.predict(prediction_df) # fit the dummy dr = DummyRegressor() dr.fit(train_df["structure"], train_df[target]) dummy_test = dr.predict(test_df["structure"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"] mae_matpipe = mean_absolute_error(true, matpipe_test) mse_matpipe = mean_squared_error(true, matpipe_test)
df = pd.DataFrame(columns=['structure', 'dielectric']) df['structure'] = centro_structs df['dielectric'] = diel df = df.dropna() df.to_csv('centro_diel.csv') print(df.describe()) train_df, test_df = train_test_split(df, test_size=0.1, shuffle=True, random_state=1) target = "dielectric" prediction_df = test_df.drop(columns=[target]) pipe = MatPipe.from_preset("express", n_jobs=28, cache_src="cache_diel.json") pipe.fit(train_df, target) prediction_df = pipe.predict(prediction_df) # fit the dummy dr = DummyRegressor() dr.fit(train_df["structure"], train_df[target]) dummy_test = dr.predict(test_df["structure"]) # Score dummy and MatPipe true = test_df[target] matpipe_test = prediction_df[target + " predicted"] mae_matpipe = mean_absolute_error(true, matpipe_test) mse_matpipe = mean_squared_error(true, matpipe_test)