async def preprocess(ctx): await ctx.send("Running preprocessing...") try: pre() except Exception as e: await ctx.send('ERROR:\n' + str(e)) await ctx.send("Completed running preprocessing.")
def learning_process_final(train_path, test_path, sample_path, clf_code, n_estimators, num_leaves, learning_rate, top_k): """Returns a pd.DataFrame for test prediction. Parameters ---------- train_path : str The path of the train dataset. test_path : str The path of the test dataset. sample_path : str The path of the sample csv. clf_mode : str The type of classifier for model n_estimators : int The number of estimators for model num_leaves : int The number of leaf nodes for model learning_rate : float The Learning_rate of leaf nodes for model top_k : int The number of candidates estimators for final prediction of model """ start = time.time() train_cls = pre(train_path, True) test_cls = pre(test_path, False) train_df = train_cls.main() test_df = test_cls.main() sample_df = pd.read_csv(sample_path) print('data_loading_time : %f.2' % (time.time() - start)) start = time.time() main_cls = tp(train_df=train_df, test_df=test_df, sample_df=sample_df, clf_code=clf_code, n_estimators=(n_estimators, n_estimators), num_leaves=(num_leaves, num_leaves), lr=learning_rate, top_k=top_k) result_sample = main_cls.main_experience() print('learning_time : %f.2' % (time.time() - start)) return result_sample
def MLP(data_directory, model_dir, features): X_train, X_test, y_train, y_test, predict_X, features = pre( data_directory, features) os.chdir(model_dir) model = mlp(random_state=1, max_iter=10000) grid = gs(estimator=model, param_grid={ 'hidden_layer_sizes': [(500, 500)], 'activation': ['logistic', 'tanh', 'relu'], 'alpha': np.exp(2.303 * np.arange(-8, 0)), 'learning_rate': ['constant'] }, cv=5, n_jobs=6) grid.fit(X_train, y_train) print(grid.best_params_) print(grid.best_estimator_.score(X_test, y_test)) joblib.dump( grid.best_estimator_, 'mlp_%d_%.4f.m' % (len(features), grid.best_estimator_.score(X_test, y_test))) df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap']) df['pbe_bandgap'] = y_test df['ml_bandgap'] = grid.best_estimator_.predict(X_test) print(df)
def DecisionTree(data_directory, model_dir, features): X_train, X_test, y_train, y_test, predict_X, features = pre(data_directory, features) os.chdir(model_dir) model = dt(random_state=1) grid = gs(estimator=model, param_grid={'criterion': ['mse', 'friedman_mse', 'mae'], 'splitter': ['best', 'random'], 'max_features': ['auto', 'sqrt', 'log2']}, cv=5) grid.fit(X_train, y_train) print(grid.best_params_) print(grid.best_estimator_.score(X_test, y_test)) joblib.dump(grid.best_estimator_, 'dtr_%d_%.4f.m'%(len(features),grid.best_estimator_.score(X_test, y_test))) df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap']) df['pbe_bandgap'] = y_test df['ml_bandgap'] = grid.best_estimator_.predict(X_test) print(df)
def learning_process_vali(train_path, clf_code, n_estimators, num_leaves, learning_rate, top_k): """Returns a pd.DataFrame for validation results. Parameters ---------- train_path : str The path of the train dataset. clf_mode : str The type of classifier for model n_estimators : int The number of estimators for model num_leaves : int The number of leaf nodes for model learning_rate : float The Learning_rate of leaf nodes for model top_k : int The number of candidates estimators for final prediction of model """ start = time.time() train_cls = pre(train_path, True) train_df = train_cls.main() print('data_loading_time : %f.2' % (time.time() - start)) ##### start = time.time() main_cls = vp(base_df=train_df, clf_code=clf_code, n_estimators=n_estimators, num_leaves=num_leaves, lr=learning_rate, top_k=top_k) result_sample = main_cls.process_learning() print('learning_time : %f.2' % (time.time() - start)) return result_sample
from sklearn.preprocessing import StandardScaler from sklearn.model_selection import KFold import xgboost as xgb from xgboost import XGBRegressor from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error import numpy as np import pandas as pd from preprocessing import preprocessing as pre from to_csv import to_csv # 讀取預處理資料 train_x, val_x, test_x, train_y, val_y = pre() # 標準化 ss = StandardScaler() train_x = ss.fit_transform(train_x) val_x = ss.transform(val_x) test_x = ss.transform(test_x) kf = KFold(n_splits=10, shuffle=False) fold = 0 train_array = [] val_dic = {} test_dic = {} val_dic['id'] = pd.read_csv(r'../ntut-ml-2020-regression/valid-v3.csv').id test_dic['id'] = pd.read_csv(r'../ntut-ml-2020-regression/test-v3.csv').id for train_index, test_index in kf.split(train_x): # print('train_index:%s , test_index: %s ' % (train_index, test_index)) trainfold_x = train_x[train_index]