def LoadModelEvaluator(imputer_fname, scaler_fname, encoder_fname,
                       model_fname):

    imp = joblib.load(imputer_fname)
    scaler = joblib.load(scaler_fname)
    enc = joblib.load(encoder_fname)
    model = joblib.load(model_fname)
    evaluator = ModelEvaluator(imputer=imp,
                               scaler=scaler,
                               encoder=enc,
                               model=model)

    return evaluator
def load_data_and_construct_model(model: Model, model_dict, save_model,
                                  img_settings, training_dict,
                                  train_val_test_split, data_path, data_file,
                                  save_path, plots_in_row):

    if not save_model:
        save_path = None

    # load data
    data_loader = Data(data_path=data_path,
                       data_file=data_file,
                       img_settings=img_settings)

    # load dataframe
    data_loader.load_dataframe()

    # load the data to the memory / create data generators
    data_dict = data_loader.load_data(batch_size=training_dict['batch_size'],
                                      split=train_val_test_split)

    # create and train model
    model.construct(**model_dict,
                    **training_dict,
                    data_dict=data_dict,
                    save_path=save_path)

    # plot results

    ModelEvaluator.plot_train_val_history(model,
                                          save_path=save_path,
                                          plots_in_row=plots_in_row)

    data_dict = data_loader.load_data(batch_size=training_dict['batch_size'],
                                      split=train_val_test_split,
                                      mode='val')

    labels = data_loader.get_labels(split=train_val_test_split)

    predictions = model.predict(data_dict)

    ModelEvaluator.evaluate_classifier(predictions=predictions,
                                       labels=labels,
                                       labels_name=data_file["used_labels"],
                                       mode='roc',
                                       save_path=save_path,
                                       plots_in_row=plots_in_row)
    ModelEvaluator.evaluate_classifier(predictions=predictions,
                                       labels=labels,
                                       labels_name=data_file["used_labels"],
                                       mode='pr',
                                       save_path=save_path,
                                       plots_in_row=plots_in_row)
Example #3
0
from ModelBuilder import ModelBuilder
from ModelEvaluator import ModelEvaluator
from DataTransformer import multi_csv_to_dataset
from ModelLoader import ModelLoader

dataset = multi_csv_to_dataset([
    'test_data/SHOP_daily.csv',
    # 'test_data/TD_daily.csv',
    # 'test_data/ENB_daily.csv',
    # 'test_data/BA_daily.csv',
    # 'test_data/TSLA_daily.csv'
])
model_loader = ModelLoader()

#test_data = ModelBuilder().build_model(dataset, 150)
#model_loader.save_model(test_data.model, 'multistock-2020-04-09')

test_data = ModelBuilder().split_test_data(dataset, 0.7)
test_data.model = model_loader.load_model('multistock-2020-04-09.h5')

evaluator = ModelEvaluator()
evaluator.analyze(test_data)
evaluator.plot(test_data)
    
	
    plt.switch_backend('agg')


    # obtain valid data
    print 'Importing test sample... '
    adapter = LearningDataAdapter(for_learning=True)
    adapter.adapt_file('data/validate.csv')
    X_num, X_cat = adapter.X_num, adapter.X_cat

    imp = joblib.load("./models/imputer.pkl")
    scaler = joblib.load("./models/scaler.pkl")
    enc = joblib.load("./models/encoder.pkl")
    evaluator = ModelEvaluator(
    imputer=imp, scaler=scaler,
    encoder=enc)
    
    pred_x = evaluator.preprocess(X_num,X_cat)
    pred_w, pred_y = adapter.w, adapter.y
    print


    # plot
    # this step will plot ROC curve, probability distribution for signal/background
    # and learning curve for each model.

    print 'Plotting ROC curve...'
    plt.figure(1)    
    model_list = glob.glob('./models/*.pkl')
    for model in model_list:
Example #5
0
from LearningScore import learning_curve

if __name__ == '__main__':

    plt.switch_backend('agg')

    # obtain valid data
    print 'Importing test sample... '
    adapter = LearningDataAdapter(for_learning=True)
    adapter.adapt_file('data/validate.csv')
    X_num, X_cat = adapter.X_num, adapter.X_cat

    imp = joblib.load("./models/imputer.pkl")
    scaler = joblib.load("./models/scaler.pkl")
    enc = joblib.load("./models/encoder.pkl")
    evaluator = ModelEvaluator(imputer=imp, scaler=scaler, encoder=enc)

    pred_x = evaluator.preprocess(X_num, X_cat)
    pred_w, pred_y = adapter.w, adapter.y
    print

    # plot
    # this step will plot ROC curve, probability distribution for signal/background
    # and learning curve for each model.

    print 'Plotting ROC curve...'
    plt.figure(1)
    model_list = glob.glob('./models/*.pkl')
    for model in model_list:
        if model not in [
                './models/encoder.pkl', './models/scaler.pkl',
    X_cat_trans = enc.transform(X_cat)
    joblib.dump(enc, 'models/encoder.pkl')

    X_trans = np.hstack((X_num_trans, X_cat_trans))

    print 'Training model... '
    #rf = RandomForestClassifier(n_estimators=10)
    #rf = RandomForestClassifier(n_estimators=100)
    rf = RandomForestClassifier(n_estimators=1000)
    rf.fit(X_trans, y, sample_weight=w)
    joblib.dump(rf, 'models/rf.pkl')
    print

    print 'Predicting in sample... '
    evaluator = ModelEvaluator(
        imputer=imp, scaler=scaler,
        encoder=enc, model=rf
    )
    y_pred = evaluator.predict(X_num, X_cat)

    print 'Training Error = {0}'.format(np.sum(w[y != y_pred]) / np.sum(w))
    print 'Predicted +, - counts = {0}, {1}'.format(y_pred[y_pred==1].shape[0], y_pred[y_pred==0].shape[0])
    print

    print 'Importing test sample... '
    adapter = LearningDataAdapter(for_learning=True)
    adapter.adapt_file('data/validate.csv')
    X_num, X_cat = adapter.X_num, adapter.X_cat
    w, y = adapter.w, adapter.y
    print

    print 'Predicting out of sample... '
    X_trans = np.hstack((X_num_trans, X_cat_trans))

    print 'Training model... '
    #rf = RandomForestClassifier(n_estimators=10)
    #rf = RandomForestClassifier(n_estimators=100)
    #rf = RandomForestClassifier(n_estimators=200)
    rf = RandomForestClassifier(n_estimators=1000)
    #rf.fit(X_trans, y, sample_weight=w)
    rf.fit(X_trans, y)
    joblib.dump(rf, 'models/rf.pkl')
    print

    print 'Predicting in sample... '
    evaluator = ModelEvaluator(imputer=imp,
                               scaler=scaler,
                               encoder=enc,
                               model=rf)
    y_pred = evaluator.predict(X_num, X_cat)

    #print 'Training Error = {0}'.format(np.sum(w[y != y_pred]) / np.sum(w))
    print 'Training Error = {0}'.format(y_pred[y != y_pred].shape[0] /
                                        float(y_pred.shape[0]))
    print 'Predicted +, - counts = {0}, {1}'.format(
        y_pred[y_pred == 1].shape[0], y_pred[y_pred == 0].shape[0])
    print

    print 'Importing test sample... '
    adapter = LearningDataAdapter(for_learning=True)
    adapter.adapt_file('data/validate.csv')
    X_num, X_cat = adapter.X_num, adapter.X_cat
    w, y = adapter.w, adapter.y
Example #8
0
    print
    print '+ Connecting to {0} to populate column in table {1}'.format(
        args.dbname, args.table_name)
    print

    print '  Adding new column to table.'
    add_table_column(args.dbname, args.table_name)
    print

    print '  Loading models.'
    imp = joblib.load('models/imputer.pkl')
    scaler = joblib.load('models/scaler.pkl')
    enc = joblib.load('models/encoder.pkl')
    rf = joblib.load('models/rf.pkl')
    evaluator = ModelEvaluator(imputer=imp,
                               scaler=scaler,
                               encoder=enc,
                               model=rf)
    adapter = LearningDataAdapter(for_learning=False)
    print

    print '  Predicting and updating.'
    print '  Started on {0}'.format(time.ctime(time.time()))
    with SqlDataLoader(database=args.dbname,
                       table_name=args.table_name,
                       itersize=200000,
                       arraysize=200000,
                       rollback=False,
                       debug=False) as sql_loader:
        sql_loader.start()
        while sql_loader.curr_records:
            sys.stdout.write('.')
    print '+ Loading pickled objects... '
    imp = joblib.load('models/imputer.pkl')
    scaler = joblib.load('models/scaler.pkl')
    enc = joblib.load('models/encoder.pkl')
    rf = joblib.load('models/rf.pkl')
    print

    print '+ Importing test sample... '
    adapter = LearningDataAdapter(for_learning=True)
    adapter.adapt_file('data/validate.csv')
    print


    print '+ Predicting test sample candidate scores... '
    evaluator = ModelEvaluator(
        imputer=imp, scaler=scaler,
        encoder=enc, model=rf
    )
    score = evaluator.predict_proba(adapter.X_num, adapter.X_cat)[:,1]
    print

    print '+ Assessing model results... '
    print

    print '  Selecting best candidate... '
    C_id = adapter.record_id
    C_att = np.hstack((
        adapter.X_num, adapter.X_cat,
        score.reshape(score.shape[0], 1),
        adapter.y.reshape(adapter.y.shape[0], 1),
    ))
    E_id, E_att, E_meta = select_best_candidate(C_id, C_att)