def knn_by_minute(data, features, prefix, name, minute): features = get_features(features, prefix, minute) target = 'result' train_x, train_y, test_x, test_y = utils.split_train_test( data, features, target) dirname = PATH_JOBS_MODELS + name + "/" model_file = dirname + "2_" + str(minute) + ".joblib" with open(model_file, "rb") as f: model = pickle.load(f) predict_knn(minute, model, name, test_x, test_y, train_x, train_y)
def gnb_by_minute(data, features, prefix, name, minute): features = get_features(features, prefix, minute) target = 'result' train_x, train_y, test_x, test_y = utils.split_train_test( data, features, target) dirname = PATH_JOBS_MODELS + name + "/" model_file = dirname + str(minute) + ".joblib" with open(model_file, "rb") as f: model = pickle.load(f) pd.options.mode.chained_assignment = None # default='warn' predict(minute, model, name, test_x, test_y, train_x, train_y)
def gnb_by_minute(data, features, prefix, name, minute): features = get_features(features, prefix, minute) target = 'result' train_x, train_y, test_x, test_y = utils.split_train_test( data, features, target) model = GaussianNB() model.fit(train_x, train_y) dirname = PATH_JOBS_MODELS + name + "/" if not os.path.exists(dirname): os.mkdir(dirname) model_file = dirname + str(minute) + ".joblib" with open(model_file, "wb") as f: pickle.dump(model, f) evaluate = True if evaluate: testing.predict(minute, model, name, test_x, test_y, train_x, train_y)
def xgb_by_minute(data, features, prefix, name, minute, odds): features = get_features(features, prefix, minute) target = 'result' if odds: features.append('odds_p_avg_home') features.append('odds_p_avg_draw') features.append('odds_p_avg_away') features.append('odds_p_std_home') features.append('odds_p_std_draw') features.append('odds_p_std_away') train_x, train_y, test_x, test_y = utils.split_train_test( data, features, target, odds) dirname = PATH_JOBS_MODELS + name + "/" model_file = dirname + "1_" + str(minute) + ".joblib" with open(model_file, "rb") as f: model = pickle.load(f) predict_knn(minute, model, name, test_x, test_y, train_x, train_y)
def mlp_by_minute(data, features, prefix, name, config): cnn_encoder_kernel_sizes, cnn_encoder_layer_sizes, minute, optim_batch_size, optim_learning_rate, \ optim_num_epochs, method, dropout = utils.get_config(config) features = utils.get_features(features, prefix, minute) target = 'result' train_x, train_y, test_x, test_y = utils.split_train_test( data, features, target) def get_target(row): if row == 'H': return 0 elif row == 'D': return 1 elif row == 'A': return 2 train_y = train_y.apply(lambda row: get_target(row)) train_y_cat = to_categorical(train_y) test_y = test_y.apply(lambda row: get_target(row)) test_y_cat = to_categorical(test_y) model_dir = PATH_JOBS_MODELS + 'mlp_stats' + "/" model_label = config['minute'] + "_" + config['method'] + "_" + \ config['optim_num_epochs'] + "_" + \ config['optim_batch_size'] + "_" + \ config['optim_learning_rate'] + "_" + \ config['cnn_encoder_layer_sizes'] + "_" + config['optim:dropout_rate'] model_file_json = model_dir + model_label + ".json" model_file_h5 = model_dir + model_label + ".h5" # load json and create model json_file = open(model_file_json, 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) # load weights into new model loaded_model.load_weights(model_file_h5) optimizer = Adam(lr=optim_learning_rate) loaded_model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', rps_loss]) eval_train = loaded_model.evaluate(train_x, train_y_cat) eval_test = loaded_model.evaluate(test_x, test_y_cat) print(eval_train) print(eval_test) met = pd.DataFrame() met["acc_train"] = [eval_train[1]] met["rps_train"] = [eval_train[2]] met["acc_test"] = [eval_test[1]] met["rps_test"] = [eval_test[2]] dirname = PATH_JOBS_RESULTS + name + "/" if not os.path.exists(dirname): os.mkdir(dirname) results_file = dirname + str(minute) + ".csv" met.to_csv(results_file) print(met)