def LoadModelEvaluator(imputer_fname, scaler_fname, encoder_fname, model_fname): imp = joblib.load(imputer_fname) scaler = joblib.load(scaler_fname) enc = joblib.load(encoder_fname) model = joblib.load(model_fname) evaluator = ModelEvaluator(imputer=imp, scaler=scaler, encoder=enc, model=model) return evaluator
def load_data_and_construct_model(model: Model, model_dict, save_model, img_settings, training_dict, train_val_test_split, data_path, data_file, save_path, plots_in_row): if not save_model: save_path = None # load data data_loader = Data(data_path=data_path, data_file=data_file, img_settings=img_settings) # load dataframe data_loader.load_dataframe() # load the data to the memory / create data generators data_dict = data_loader.load_data(batch_size=training_dict['batch_size'], split=train_val_test_split) # create and train model model.construct(**model_dict, **training_dict, data_dict=data_dict, save_path=save_path) # plot results ModelEvaluator.plot_train_val_history(model, save_path=save_path, plots_in_row=plots_in_row) data_dict = data_loader.load_data(batch_size=training_dict['batch_size'], split=train_val_test_split, mode='val') labels = data_loader.get_labels(split=train_val_test_split) predictions = model.predict(data_dict) ModelEvaluator.evaluate_classifier(predictions=predictions, labels=labels, labels_name=data_file["used_labels"], mode='roc', save_path=save_path, plots_in_row=plots_in_row) ModelEvaluator.evaluate_classifier(predictions=predictions, labels=labels, labels_name=data_file["used_labels"], mode='pr', save_path=save_path, plots_in_row=plots_in_row)
from ModelBuilder import ModelBuilder from ModelEvaluator import ModelEvaluator from DataTransformer import multi_csv_to_dataset from ModelLoader import ModelLoader dataset = multi_csv_to_dataset([ 'test_data/SHOP_daily.csv', # 'test_data/TD_daily.csv', # 'test_data/ENB_daily.csv', # 'test_data/BA_daily.csv', # 'test_data/TSLA_daily.csv' ]) model_loader = ModelLoader() #test_data = ModelBuilder().build_model(dataset, 150) #model_loader.save_model(test_data.model, 'multistock-2020-04-09') test_data = ModelBuilder().split_test_data(dataset, 0.7) test_data.model = model_loader.load_model('multistock-2020-04-09.h5') evaluator = ModelEvaluator() evaluator.analyze(test_data) evaluator.plot(test_data)
plt.switch_backend('agg') # obtain valid data print 'Importing test sample... ' adapter = LearningDataAdapter(for_learning=True) adapter.adapt_file('data/validate.csv') X_num, X_cat = adapter.X_num, adapter.X_cat imp = joblib.load("./models/imputer.pkl") scaler = joblib.load("./models/scaler.pkl") enc = joblib.load("./models/encoder.pkl") evaluator = ModelEvaluator( imputer=imp, scaler=scaler, encoder=enc) pred_x = evaluator.preprocess(X_num,X_cat) pred_w, pred_y = adapter.w, adapter.y print # plot # this step will plot ROC curve, probability distribution for signal/background # and learning curve for each model. print 'Plotting ROC curve...' plt.figure(1) model_list = glob.glob('./models/*.pkl') for model in model_list:
from LearningScore import learning_curve if __name__ == '__main__': plt.switch_backend('agg') # obtain valid data print 'Importing test sample... ' adapter = LearningDataAdapter(for_learning=True) adapter.adapt_file('data/validate.csv') X_num, X_cat = adapter.X_num, adapter.X_cat imp = joblib.load("./models/imputer.pkl") scaler = joblib.load("./models/scaler.pkl") enc = joblib.load("./models/encoder.pkl") evaluator = ModelEvaluator(imputer=imp, scaler=scaler, encoder=enc) pred_x = evaluator.preprocess(X_num, X_cat) pred_w, pred_y = adapter.w, adapter.y print # plot # this step will plot ROC curve, probability distribution for signal/background # and learning curve for each model. print 'Plotting ROC curve...' plt.figure(1) model_list = glob.glob('./models/*.pkl') for model in model_list: if model not in [ './models/encoder.pkl', './models/scaler.pkl',
X_cat_trans = enc.transform(X_cat) joblib.dump(enc, 'models/encoder.pkl') X_trans = np.hstack((X_num_trans, X_cat_trans)) print 'Training model... ' #rf = RandomForestClassifier(n_estimators=10) #rf = RandomForestClassifier(n_estimators=100) rf = RandomForestClassifier(n_estimators=1000) rf.fit(X_trans, y, sample_weight=w) joblib.dump(rf, 'models/rf.pkl') print print 'Predicting in sample... ' evaluator = ModelEvaluator( imputer=imp, scaler=scaler, encoder=enc, model=rf ) y_pred = evaluator.predict(X_num, X_cat) print 'Training Error = {0}'.format(np.sum(w[y != y_pred]) / np.sum(w)) print 'Predicted +, - counts = {0}, {1}'.format(y_pred[y_pred==1].shape[0], y_pred[y_pred==0].shape[0]) print print 'Importing test sample... ' adapter = LearningDataAdapter(for_learning=True) adapter.adapt_file('data/validate.csv') X_num, X_cat = adapter.X_num, adapter.X_cat w, y = adapter.w, adapter.y print print 'Predicting out of sample... '
X_trans = np.hstack((X_num_trans, X_cat_trans)) print 'Training model... ' #rf = RandomForestClassifier(n_estimators=10) #rf = RandomForestClassifier(n_estimators=100) #rf = RandomForestClassifier(n_estimators=200) rf = RandomForestClassifier(n_estimators=1000) #rf.fit(X_trans, y, sample_weight=w) rf.fit(X_trans, y) joblib.dump(rf, 'models/rf.pkl') print print 'Predicting in sample... ' evaluator = ModelEvaluator(imputer=imp, scaler=scaler, encoder=enc, model=rf) y_pred = evaluator.predict(X_num, X_cat) #print 'Training Error = {0}'.format(np.sum(w[y != y_pred]) / np.sum(w)) print 'Training Error = {0}'.format(y_pred[y != y_pred].shape[0] / float(y_pred.shape[0])) print 'Predicted +, - counts = {0}, {1}'.format( y_pred[y_pred == 1].shape[0], y_pred[y_pred == 0].shape[0]) print print 'Importing test sample... ' adapter = LearningDataAdapter(for_learning=True) adapter.adapt_file('data/validate.csv') X_num, X_cat = adapter.X_num, adapter.X_cat w, y = adapter.w, adapter.y
print print '+ Connecting to {0} to populate column in table {1}'.format( args.dbname, args.table_name) print print ' Adding new column to table.' add_table_column(args.dbname, args.table_name) print print ' Loading models.' imp = joblib.load('models/imputer.pkl') scaler = joblib.load('models/scaler.pkl') enc = joblib.load('models/encoder.pkl') rf = joblib.load('models/rf.pkl') evaluator = ModelEvaluator(imputer=imp, scaler=scaler, encoder=enc, model=rf) adapter = LearningDataAdapter(for_learning=False) print print ' Predicting and updating.' print ' Started on {0}'.format(time.ctime(time.time())) with SqlDataLoader(database=args.dbname, table_name=args.table_name, itersize=200000, arraysize=200000, rollback=False, debug=False) as sql_loader: sql_loader.start() while sql_loader.curr_records: sys.stdout.write('.')
print '+ Loading pickled objects... ' imp = joblib.load('models/imputer.pkl') scaler = joblib.load('models/scaler.pkl') enc = joblib.load('models/encoder.pkl') rf = joblib.load('models/rf.pkl') print print '+ Importing test sample... ' adapter = LearningDataAdapter(for_learning=True) adapter.adapt_file('data/validate.csv') print print '+ Predicting test sample candidate scores... ' evaluator = ModelEvaluator( imputer=imp, scaler=scaler, encoder=enc, model=rf ) score = evaluator.predict_proba(adapter.X_num, adapter.X_cat)[:,1] print print '+ Assessing model results... ' print print ' Selecting best candidate... ' C_id = adapter.record_id C_att = np.hstack(( adapter.X_num, adapter.X_cat, score.reshape(score.shape[0], 1), adapter.y.reshape(adapter.y.shape[0], 1), )) E_id, E_att, E_meta = select_best_candidate(C_id, C_att)