def __init__(self, params): self.params = params self.model = None self.features = None self.target = None self.model_output_dir = "./data/model_outputs/" check_create_dir(self.model_output_dir)
def train(self, xy_train, xy_test): if self.target == "star_target": metric_fn = accuracy else: metric_fn = mae fast_data = self.prepare_data(xy_train, xy_test) learn = tabular_learner(fast_data, layers=[256, 128], emb_drop=0.2, metrics=metric_fn) learn.fit_one_cycle(4, 1e-4) # set this to 4 self.model = learn # get training results # tr = learn.validate(learn.data.train_dl) # va = learn.validate(learn.data.valid_dl) # print("The Metrics used In Evaluating The Network: {}".format(learn.metrics)) # print("The Training Set Loss: {}".format(tr)) # print("The Validation Set Loss: {}".format(va)) # get test set predictions # test_predictions = learn.get_preds(ds_type=DatasetType.Test)[0] # xy_test["fastai_pred"] = test_predictions train_loss, valid_loss = learn.recorder.losses, learn.recorder.val_losses # save model save_dir = os.path.join(self.model_output_dir, "fastai_{}_model".format(self.target)) check_create_dir(save_dir) save_path = os.path.join(save_dir, "export.pkl") learn.export(file=save_path) return train_loss, valid_loss
def __init__(self, cat_features, num_features, target): self.cat_features = cat_features self.num_features = num_features self.target = target self.model = None self.model_output_dir = "./data/model_outputs/" check_create_dir(self.model_output_dir)
def __init__(self, config): self.config = config self.data_dir = os.path.join(config["source_dir"], config["season"]) self.data_dir_raw = os.path.join(self.data_dir, 'raw') self.data_dir_clean = os.path.join(self.data_dir, 'clean') check_create_dir(self.data_dir_raw) check_create_dir(self.data_dir_clean) self.data_scraper = DataScraper(config)
def __init__(self, config): """ initialize data scraper :param config: config file specifying filepaths :type config: dict """ self.name = "FPL-Scraper" self.config = config self.data = None self.data_dir = os.path.join(config["source_dir"], config["season"]) check_create_dir(self.data_dir) # set fpl urls self.fpl_url = "https://fantasy.premierleague.com/api/" self.login_url = "https://users.premierleague.com/accounts/login/" self.manager_url = "https://fantasy.premierleague.com/api/entry/" self.classic_league_suburl = "leagues-classic/" self.team_entry_suburl = "entry/" self.bootstrap_suburl = "bootstrap-static/" self.player_suburl = "element-summary/" self.fixtures_suburl = "fixtures/" self.league_standing_url = self.fpl_url + self.classic_league_suburl try: self.username = os.environ["fpl_email"] self.password = os.environ["fpl_pwd"] except: print("Error: Set FPL Email and Password in your OS environment") payload = { 'login': self.username, 'password': self.password, 'redirect_uri': "https://fantasy.premierleague.com/", 'app': 'plfpl-web' } self.session = requests.session() self.session.post(self.login_url, data=payload)
def make_XY_data(scoring_gw=None, dataset_dir="./data/model_data/xy_data/"): # configs check_create_dir(dataset_dir) scraper_config = {"season": "2020_21", "source_dir": "./data/raw/"} data_scraper = DataScraper(scraper_config) if scoring_gw: pass else: print("getting latest scoring gameweek ...") scoring_gw = data_scraper.get_next_gameweek_id() fe_2020 = FeatureEngineering() config_2020 = { "data_dir": "./data/model_data/2020_21/", "file_fixture": "fixtures.csv", "file_team": "teams.csv", "file_gw": "merged_gw.csv", "file_player": "players_raw.csv", "file_understat_team": "understat_team_data.pkl", "scoring_gw": scoring_gw } df_2020 = fe_2020.execute_fe(config_2020) # for imputing opponent next in scoring df data_maker_2020 = ModelDataMaker(config_2020) tbf_feats = [ "strength", "strength_overall_home", "strength_overall_away", "strength_attack_home", "strength_attack_away", "strength_defence_home", "strength_defence_away" ] tbf_feats = ["opp_" + feat for feat in tbf_feats] tbf_feats_next_1_map = dict() tbf_feats_next_2_map = dict() for feat in tbf_feats: tbf_feats_next_1_map[feat] = feat + "_next_1" tbf_feats_next_2_map[feat] = feat + "_next_2" df_next_1_gw = data_maker_2020.make_nth_gw_scoring_base(scoring_gw + 1) df_next_2_gw = data_maker_2020.make_nth_gw_scoring_base(scoring_gw + 2) df_next_1_gw = df_next_1_gw.rename(columns=tbf_feats_next_1_map) df_next_2_gw = df_next_2_gw.rename(columns=tbf_feats_next_2_map) df_next_1_gw = df_next_1_gw.drop(columns=["opp_id", "opp_name"]) df_next_2_gw = df_next_2_gw.drop(columns=["opp_id", "opp_name"]) # pdb.set_trace() fe_2019 = FeatureEngineering() config_2019 = { "data_dir": "./data/model_data/2019_20/", "file_fixture": "fixtures.csv", "file_team": "teams.csv", "file_gw": "merged_gw.csv", "file_player": "players_raw.csv", "file_understat_team": "understat_team_data.pkl", "scoring_gw": "NA" } df_2019 = fe_2019.execute_fe(config_2019) fe_2018 = FeatureEngineering() config_2018 = { "data_dir": "./data/model_data/2018_19/", "file_fixture": "fixtures.csv", "file_team": "teams.csv", "file_gw": "merged_gw.csv", "file_player": "players_raw.csv", "file_understat_team": "understat_team_data.pkl", "scoring_gw": "NA" } df_2018 = fe_2018.execute_fe(config_2018) df_2018["season_id"] = 0 df_2019["season_id"] = 1 df_2020["season_id"] = 2 df_XY = pd.concat([df_2018, df_2019, df_2020]) df_XY["global_gw_id"] = df_XY[["season_id", "gw_id"]].apply(lambda x: x[0] * 100 + x[1], axis=1) # FIX: was home df_XY["was_home_lag_1"] = df_XY["was_home_lag_1"].astype(bool) df_XY["was_home_lag_2"] = df_XY["was_home_lag_2"].astype(bool) df_XY["was_home_lag_3"] = df_XY["was_home_lag_3"].astype(bool) # cat cols features_dict = fe_2020.feature_dict cat_features = features_dict["cat_features"] # cat_list = [] type_dict = dict(df_XY.dtypes) for k, v in type_dict.items(): if str(v) == 'object': cat_list.append(k) # print(cat_list) for feat in cat_features: if feat in cat_list: # print(feat) df_XY[feat] = df_XY[feat].astype('category').cat.codes pts_clip = 10 star_clip = 5 pot_clip = 24 df_XY["reg_target"] = df_XY["total_points"].clip(upper=pts_clip) df_XY["star_target"] = df_XY["total_points"].apply( lambda x: 1 if x >= star_clip else 0) df_XY["pot_target"] = df_XY["potential"].clip(upper=pot_clip) df_XY["global_gw_id"] = df_XY["global_gw_id"].fillna(-1) df_XY["global_gw_id"] = df_XY["global_gw_id"].astype(int) global_scoring_gw = df_XY["global_gw_id"].max() global_test_gw = global_scoring_gw - 1 df_XY_train = df_XY[df_XY["global_gw_id"] < global_test_gw].copy() df_XY_test = df_XY[df_XY["global_gw_id"] == global_test_gw].copy() df_XY_scoring = df_XY[df_XY["global_gw_id"] == global_scoring_gw].copy() # impute missing values in scoring df tbf_feats_next_1 = [feat + "_next_1" for feat in tbf_feats] tbf_feats_next_2 = [feat + "_next_2" for feat in tbf_feats] impute_feats = tbf_feats_next_1 + tbf_feats_next_2 df_XY_scoring = df_XY_scoring.drop(columns=impute_feats) df_next_1_gw["gw_id"] = scoring_gw df_next_2_gw["gw_id"] = scoring_gw df_XY_scoring = pd.merge(df_XY_scoring, df_next_1_gw, how='left', on=["player_id", "gw_id"]) df_XY_scoring = pd.merge(df_XY_scoring, df_next_2_gw, how='left', on=["player_id", "gw_id"]) # save XY data df_XY_train.to_csv(os.path.join(dataset_dir, "xy_train_gw_{}.csv".format(scoring_gw)), index=False) df_XY_test.to_csv(os.path.join(dataset_dir, "xy_test_gw_{}.csv".format(scoring_gw)), index=False) df_XY_scoring.to_csv(os.path.join( dataset_dir, "xy_scoring_gw_{}.csv".format(scoring_gw)), index=False) with open(os.path.join(dataset_dir, "features_after_fe.pkl"), 'wb') as f: pickle.dump(features_dict, f)