("rfe_features", rfe_feature),
                ("no_elo_features", no_elo_features)]

file_name = "outcome_report_full.txt"

reports = []
for (name, feature_set) in feature_sets:
    write_log(file_name, str(datetime.datetime.now()))
    write_log(file_name,
              f"Running test for feature set: {name}",
              print_text=True)

    data_loader = DataLoader(feature_set)
    X, y = data_loader.get_all_data("home_win")

    params = get_default_parameters()
    arguments = get_cv_grid_search_arguments(params, X)
    results = run_grid_search_for_outcome(arguments, X, y)
    results.to_csv(f"outcome_hyperparam_optimization_{name}.csv")
    best_params_dict = get_best_params(results)
    write_log(file_name, str(best_params_dict), print_text=True)

    optimal_params = params.copy()
    optimal_params["max_depth"] = best_params_dict["max_depth"]
    optimal_params["min_samples_leaf"] = best_params_dict["min_samples_leaf"]
    optimal_params["max_features"] = best_params_dict["max_features"]

    for (tt_file, bet_file, filter_start) in tournament_parameters:
        data_loader.set_filter_start(filter_start)
        simulations, units, kellys = iterate_simulations(
            data_loader, tt_file, bet_file, run_outcome_model_for_features,
Beispiel #2
0
        tt_file = 'data/original/wc_2010_games_real.csv'
        mb_file = 'data/original/wc_2010_bets.csv'
        filter_start = "2010-06-11"
    elif args.y == 2014:
        tt_file = 'data/original/wc_2014_games_real.csv'
        mb_file = 'data/original/wc_2014_bets.csv'
        filter_start = "2014-06-12"
    else:
        tt_file = 'data/original/wc_2018_games_real.csv'
        mb_file = 'data/original/wc_2018_bets.csv'
        filter_start = "2018-06-13"

    prefix = f"{args.f}_{args.y}"

    dl = DataLoader(all_features, filter_start=filter_start)
    model_parameters = get_default_parameters()
    model_parameters["max_depth"] = 8
    model_parameters["max_features"] = "sqrt"
    model_parameters["min_samples_leaf"] = 1
    af_data = simulate(tt_file, mb_file, dl, model_parameters,
                       f"{prefix}_all_features")

    dl = DataLoader(other_features, filter_start=filter_start)
    model_parameters["max_depth"] = 8
    model_parameters["max_features"] = "log2"
    model_parameters["min_samples_leaf"] = 10
    gf_data = simulate(tt_file, mb_file, dl, model_parameters,
                       f"{prefix}_general_features")

    dl = DataLoader(player_features, filter_start=filter_start)
    model_parameters["max_depth"] = None