Ejemplo n.º 1
0
def perform_shap_analysis(model_name, gw_id):
    XY_train, XY_test, XY_scoring, features_dict = load_data(gw_id)
    model, features = None, None
    if model_name == 'LGBM Point':
        model, _ = perform_lgbm_point_training(gw_id)
        features = model.features
    if model_name == 'LGBM Potential':
        model, _ = perform_lgbm_potential_training(gw_id)
        features = model.features
    if model_name == 'LGBM Return':
        model, _ = perform_lgbm_return_training(gw_id)
        features = model.features

    X_scoring = XY_scoring[features].copy()
    explainer = shap.TreeExplainer(model.model)

    shap_values = explainer.shap_values(X_scoring)
    ave_score = explainer.expected_value
    if model_name == 'LGBM Return':
        shap_values = shap_values[1]
        ave_score = explainer.expected_value[1]
    df = pd.DataFrame(shap_values)
    shap_cols = ["shap_" + feat for feat in features]
    df.columns = shap_cols
    df_exp = pd.concat([XY_scoring, df], axis=1)

    data_maker = ModelDataMaker(CONFIG_2020)
    player_id_player_name_map = data_maker.get_player_id_player_name_map()
    df_exp["name"] = df_exp["player_id"].apply(
        lambda x: player_id_player_name_map.get(x, x))

    return df_exp, ave_score
Ejemplo n.º 2
0
def make_player_comparison_section():
    margin_style = {"margin-top": "1rem", "margin-bottom": "2rem"}
    data_maker = ModelDataMaker(CONFIG_2020)
    player_id_player_name_map = data_maker.get_player_id_player_name_map()
    player_names = []
    for k, v in player_id_player_name_map.items():
        player_names.append(v)
    player_names = sorted(list(set(player_names)))
    player_options = [{
        'label': player,
        'value': player
    } for player in player_names]
    dropdown_player_a = make_dropdown('player-selection-dropdown-a',
                                      player_options,
                                      placeholder="Select Player ...")
    dropdown_player_b = make_dropdown('player-selection-dropdown-b',
                                      player_options,
                                      placeholder="Select Player ...")

    player_dropdown_section = html.Div(children=[
        html.Div(dropdown_player_a, className='col-6'),
        html.Div(dropdown_player_b, className='col-6'),
    ],
                                       className='row')
    section = html.Div(children=[
        html.Div("Player Comparison", className='subtitle inline-header'),
        player_dropdown_section,
        html.Div(id='player-compare-output', style=margin_style)
    ])
    return section
Ejemplo n.º 3
0
def make_lead_generation_section():
    margin_style = {"margin-top": "1rem", "margin-bottom": "2rem"}
    data_maker = ModelDataMaker(CONFIG_2020)
    team_id_team_name_map = data_maker.get_team_id_team_name_map()
    team_names = []
    for k, v in team_id_team_name_map.items():
        team_names.append(v)
    team_names = list(set(team_names))
    team_names.append("All")
    team_names = sorted(team_names)

    team_options = [{'label': team, 'value': team} for team in team_names]
    dropdown_team = make_dropdown('team-selection-dropdown-leads',
                                  team_options,
                                  placeholder="Select Team ...")

    ai_models = [
        "LGBM Point", "LGBM Potential", "LGBM Return", "Fast Point",
        "Fast Potential", "Fast Return"
    ]
    model_options = [{'label': model, 'value': model} for model in ai_models]
    dropdown_model = make_dropdown('model-selection-dropdown-leads',
                                   model_options,
                                   placeholder="Select Model ...")
    dropdown_section = html.Div(children=[
        html.Div(dropdown_team, className='col-6'),
        html.Div(dropdown_model, className='col-6'),
    ],
                                className='row')

    leads_output = html.Div(children=[
        html.Div("GK", className='subtitle inline-header'),
        dcc.Loading(html.Div(id='gk-leads', style=margin_style),
                    color='black'),
        html.Div("DEF", className='subtitle inline-header'),
        dcc.Loading(html.Div(id='def-leads', style=margin_style),
                    color='black'),
        html.Div("MID", className='subtitle inline-header'),
        dcc.Loading(html.Div(id='mid-leads', style=margin_style),
                    color='black'),
        html.Div("FWD", className='subtitle inline-header'),
        dcc.Loading(html.Div(id='fwd-leads', style=margin_style),
                    color='black')
    ])

    section = html.Div(children=[
        html.Div("Select Team & Model", className='subtitle inline-header'),
        dropdown_section, leads_output
    ])
    return section
Ejemplo n.º 4
0
def make_shap_explanation_section():
    margin_style = {"margin-top": "1rem", "margin-bottom": "2rem"}
    data_maker = ModelDataMaker(CONFIG_2020)
    player_id_player_name_map = data_maker.get_player_id_player_name_map()
    player_names = []
    for k, v in player_id_player_name_map.items():
        player_names.append(v)
    player_names = sorted(list(set(player_names)))

    player_options = [{
        'label': player,
        'value': player
    } for player in player_names]
    dropdown_player = make_dropdown('player-selection-dropdown-shap',
                                    player_options,
                                    placeholder="Select Player ...")

    ai_models = ["LGBM Point", "LGBM Potential", "LGBM Return"]
    model_options = [{'label': model, 'value': model} for model in ai_models]
    dropdown_model = make_dropdown('model-selection-dropdown-shap',
                                   model_options,
                                   placeholder="Select Model ...")
    dropdown_section = html.Div(children=[
        html.Div(dropdown_player, className='col-6'),
        html.Div(dropdown_model, className='col-6'),
    ],
                                className='row')

    shap_output = html.Div(children=[
        dcc.Loading(html.Div(id='shap-output', style=margin_style),
                    color='black'),
    ])

    section = html.Div(children=[
        # html.Div("Select Player & Model", className='subtitle inline-header'),
        dropdown_section,
        shap_output
    ])
    return section
Ejemplo n.º 5
0
    def execute_fe(self, config):
        gw_cat_features = [
            "was_home", "team_h_score", "team_a_score", "goals_scored",
            "assists", "clean_sheets", "goals_conceded", "own_goals",
            "penalties_saved", "penalties_missed", "yellow_cards", "red_cards",
            "saves", "bonus"
        ]
        gw_num_features = [
            "minutes", "bps", "influence", "creativity", "threat", "ict_index",
            "selected", "transfers_in", "transfers_out", "transfers_balance"
        ]
        team_cat_features = ["strength"]
        team_num_features = [
            "strength_overall_home", "strength_overall_away",
            "strength_attack_home", "strength_attack_away",
            "strength_defence_home", "strength_defence_away"
        ]
        understat_cat_features = []
        understat_num_features = [
            "xg", "xga", "npxg", "npxga", "deep", "deep_allowed", "xpts",
            "npxgd", "ppda_att", "ppda_def", "ppda_allowed_att",
            "ppda_allowed_def"
        ]

        static_cat_features = ["position", "is_home"
                               ]  # mis-leading name? these are non-lag feats
        static_num_features = []

        own_team_cat_features = ["own_" + feat for feat in team_cat_features]
        own_team_num_features = ["own_" + feat for feat in team_num_features]
        opp_team_cat_features = ["opp_" + feat for feat in team_cat_features]
        opp_team_num_features = ["opp_" + feat for feat in team_num_features]

        static_cat_features = static_cat_features + own_team_cat_features + opp_team_cat_features
        static_num_features = static_num_features + own_team_num_features + opp_team_num_features

        self.feature_dict["features"].extend(static_cat_features)
        self.feature_dict["features"].extend(static_num_features)
        self.feature_dict["cat_features"].extend(static_cat_features)
        self.feature_dict["num_features"].extend(static_num_features)

        data_maker = ModelDataMaker(config)
        df_base = data_maker.make_base_data()
        df_understat = data_maker.prepare_understat_data()

        print("Removing invalid data points from GW Data...")
        print("Shape before removal: {}".format(df_base.shape))
        df_base = df_base[~df_base["effective_gw_id"].isna()].copy()
        df_base = df_base[df_base["effective_gw_id"] > 0].copy()
        print("Shape after removal: {}".format(df_base.shape))

        df_base["player_id"] = df_base["player_id"].astype(int)
        df_base["effective_gw_id"] = df_base["effective_gw_id"].astype(int)
        df_base["own_team_id"] = df_base["own_team_id"].astype(int)
        df_base["opp_team_id"] = df_base["opp_team_id"].astype(int)

        df_understat_own = df_understat.copy()
        df_understat_opp = df_understat.copy()
        understat_focus_cols = understat_cat_features + understat_num_features
        understat_own_col_map, understat_opp_col_map = dict(), dict()
        for col in understat_focus_cols:
            understat_own_col_map[col] = "own_" + col
            understat_opp_col_map[col] = "opp_" + col

        df_understat_own = df_understat_own.rename(
            columns=understat_own_col_map)
        df_understat_opp = df_understat_opp.rename(
            columns=understat_opp_col_map)

        opp_cat_lag_dfs = self.make_lag_features(
            df_base, ["player_id", "effective_gw_id"], "effective_gw_id",
            opp_team_cat_features, 'cat')
        opp_num_lag_dfs = self.make_lag_features(
            df_base, ["player_id", "effective_gw_id"], "effective_gw_id",
            opp_team_num_features, 'num')
        opp_cat_next_dfs = self.make_next_features(
            df_base, ["player_id", "effective_gw_id"], "effective_gw_id",
            opp_team_cat_features, 'cat')
        opp_num_next_dfs = self.make_next_features(
            df_base, ["player_id", "effective_gw_id"], "effective_gw_id",
            opp_team_num_features, 'num')

        gw_cat_lag_dfs = self.make_lag_features(
            df_base, ["player_id", "effective_gw_id"], "effective_gw_id",
            gw_cat_features, 'cat')
        gw_num_lag_dfs = self.make_lag_features(
            df_base, ["player_id", "effective_gw_id"], "effective_gw_id",
            gw_num_features, 'num')

        understat_own_cat_features = [
            "own_" + feat for feat in understat_cat_features
        ]
        understat_own_num_features = [
            "own_" + feat for feat in understat_num_features
        ]
        understat_own_cat_lag_dfs = self.make_lag_features(
            df_understat_own, ["team_id", "effective_gw_id"],
            "effective_gw_id", understat_own_cat_features, 'cat')
        understat_own_num_lag_dfs = self.make_lag_features(
            df_understat_own, ["team_id", "effective_gw_id"],
            "effective_gw_id", understat_own_num_features, 'num')

        understat_opp_cat_features = [
            "opp_" + feat for feat in understat_cat_features
        ]
        understat_opp_num_features = [
            "opp_" + feat for feat in understat_num_features
        ]
        understat_opp_cat_lag_dfs = self.make_lag_features(
            df_understat_opp, ["team_id", "effective_gw_id"],
            "effective_gw_id", understat_opp_cat_features, 'cat')
        understat_opp_num_lag_dfs = self.make_lag_features(
            df_understat_opp, ["team_id", "effective_gw_id"],
            "effective_gw_id", understat_opp_num_features, 'num')

        print("Merging Gameweek Cat Features")
        print("Shape before merge: {}".format(df_base.shape))
        for df in gw_cat_lag_dfs:
            df = df.drop_duplicates(
                subset=["player_id", "effective_gw_id"]).copy()
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["player_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        print("Merging Gameweek Num Features")
        print("Shape before merge: {}".format(df_base.shape))
        for df in gw_num_lag_dfs:
            df = df.drop_duplicates(
                subset=["player_id", "effective_gw_id"]).copy()
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["player_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        print("Merging Opp Cat Team Features")
        print("Shape before merge: {}".format(df_base.shape))
        for df in opp_cat_lag_dfs:
            df = df.drop_duplicates(
                subset=["player_id", "effective_gw_id"]).copy()
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["player_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        print("Merging Opp Num Team Features")
        print("Shape before merge: {}".format(df_base.shape))
        for df in opp_num_lag_dfs:
            df = df.drop_duplicates(
                subset=["player_id", "effective_gw_id"]).copy()
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["player_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        print("Merging Opp Cat Team Next Features")
        print("Shape before merge: {}".format(df_base.shape))
        for df in opp_cat_next_dfs:
            df = df.drop_duplicates(
                subset=["player_id", "effective_gw_id"]).copy()
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["player_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        print("Merging Opp Num Team Next Features")
        print("Shape before merge: {}".format(df_base.shape))
        for df in opp_num_next_dfs:
            df = df.drop_duplicates(
                subset=["player_id", "effective_gw_id"]).copy()
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["player_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        # add understat data own team
        print("Merging understat own team data")
        for df in understat_own_num_lag_dfs:
            df = df.drop_duplicates(
                subset=["team_id", "effective_gw_id"]).copy()
            df = df.rename(columns={"team_id": "own_team_id"})
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["own_team_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        # add understat data opp team
        print("Merging understat opp team data")
        for df in understat_opp_num_lag_dfs:
            df = df.drop_duplicates(
                subset=["team_id", "effective_gw_id"]).copy()
            df = df.rename(columns={"team_id": "opp_team_id"})
            df_base = pd.merge(df_base,
                               df,
                               how="left",
                               on=["opp_team_id", "effective_gw_id"])
        print("Shape after merge: {}".format(df_base.shape))

        return df_base
Ejemplo n.º 6
0
def make_XY_data(scoring_gw=None, dataset_dir="./data/model_data/xy_data/"):
    # configs
    check_create_dir(dataset_dir)
    scraper_config = {"season": "2020_21", "source_dir": "./data/raw/"}
    data_scraper = DataScraper(scraper_config)

    if scoring_gw:
        pass
    else:
        print("getting latest scoring gameweek ...")
        scoring_gw = data_scraper.get_next_gameweek_id()

    fe_2020 = FeatureEngineering()
    config_2020 = {
        "data_dir": "./data/model_data/2020_21/",
        "file_fixture": "fixtures.csv",
        "file_team": "teams.csv",
        "file_gw": "merged_gw.csv",
        "file_player": "players_raw.csv",
        "file_understat_team": "understat_team_data.pkl",
        "scoring_gw": scoring_gw
    }

    df_2020 = fe_2020.execute_fe(config_2020)

    # for imputing opponent next in scoring df
    data_maker_2020 = ModelDataMaker(config_2020)
    tbf_feats = [
        "strength", "strength_overall_home", "strength_overall_away",
        "strength_attack_home", "strength_attack_away",
        "strength_defence_home", "strength_defence_away"
    ]
    tbf_feats = ["opp_" + feat for feat in tbf_feats]
    tbf_feats_next_1_map = dict()
    tbf_feats_next_2_map = dict()
    for feat in tbf_feats:
        tbf_feats_next_1_map[feat] = feat + "_next_1"
        tbf_feats_next_2_map[feat] = feat + "_next_2"

    df_next_1_gw = data_maker_2020.make_nth_gw_scoring_base(scoring_gw + 1)
    df_next_2_gw = data_maker_2020.make_nth_gw_scoring_base(scoring_gw + 2)

    df_next_1_gw = df_next_1_gw.rename(columns=tbf_feats_next_1_map)
    df_next_2_gw = df_next_2_gw.rename(columns=tbf_feats_next_2_map)
    df_next_1_gw = df_next_1_gw.drop(columns=["opp_id", "opp_name"])
    df_next_2_gw = df_next_2_gw.drop(columns=["opp_id", "opp_name"])
    # pdb.set_trace()

    fe_2019 = FeatureEngineering()
    config_2019 = {
        "data_dir": "./data/model_data/2019_20/",
        "file_fixture": "fixtures.csv",
        "file_team": "teams.csv",
        "file_gw": "merged_gw.csv",
        "file_player": "players_raw.csv",
        "file_understat_team": "understat_team_data.pkl",
        "scoring_gw": "NA"
    }
    df_2019 = fe_2019.execute_fe(config_2019)

    fe_2018 = FeatureEngineering()
    config_2018 = {
        "data_dir": "./data/model_data/2018_19/",
        "file_fixture": "fixtures.csv",
        "file_team": "teams.csv",
        "file_gw": "merged_gw.csv",
        "file_player": "players_raw.csv",
        "file_understat_team": "understat_team_data.pkl",
        "scoring_gw": "NA"
    }
    df_2018 = fe_2018.execute_fe(config_2018)

    df_2018["season_id"] = 0
    df_2019["season_id"] = 1
    df_2020["season_id"] = 2

    df_XY = pd.concat([df_2018, df_2019, df_2020])
    df_XY["global_gw_id"] = df_XY[["season_id",
                                   "gw_id"]].apply(lambda x: x[0] * 100 + x[1],
                                                   axis=1)

    # FIX: was home
    df_XY["was_home_lag_1"] = df_XY["was_home_lag_1"].astype(bool)
    df_XY["was_home_lag_2"] = df_XY["was_home_lag_2"].astype(bool)
    df_XY["was_home_lag_3"] = df_XY["was_home_lag_3"].astype(bool)

    # cat cols
    features_dict = fe_2020.feature_dict
    cat_features = features_dict["cat_features"]

    #
    cat_list = []
    type_dict = dict(df_XY.dtypes)
    for k, v in type_dict.items():
        if str(v) == 'object':
            cat_list.append(k)

    # print(cat_list)

    for feat in cat_features:
        if feat in cat_list:
            # print(feat)
            df_XY[feat] = df_XY[feat].astype('category').cat.codes

    pts_clip = 10
    star_clip = 5
    pot_clip = 24
    df_XY["reg_target"] = df_XY["total_points"].clip(upper=pts_clip)
    df_XY["star_target"] = df_XY["total_points"].apply(
        lambda x: 1 if x >= star_clip else 0)
    df_XY["pot_target"] = df_XY["potential"].clip(upper=pot_clip)

    df_XY["global_gw_id"] = df_XY["global_gw_id"].fillna(-1)
    df_XY["global_gw_id"] = df_XY["global_gw_id"].astype(int)
    global_scoring_gw = df_XY["global_gw_id"].max()
    global_test_gw = global_scoring_gw - 1
    df_XY_train = df_XY[df_XY["global_gw_id"] < global_test_gw].copy()
    df_XY_test = df_XY[df_XY["global_gw_id"] == global_test_gw].copy()
    df_XY_scoring = df_XY[df_XY["global_gw_id"] == global_scoring_gw].copy()

    # impute missing values in scoring df
    tbf_feats_next_1 = [feat + "_next_1" for feat in tbf_feats]
    tbf_feats_next_2 = [feat + "_next_2" for feat in tbf_feats]
    impute_feats = tbf_feats_next_1 + tbf_feats_next_2
    df_XY_scoring = df_XY_scoring.drop(columns=impute_feats)
    df_next_1_gw["gw_id"] = scoring_gw
    df_next_2_gw["gw_id"] = scoring_gw
    df_XY_scoring = pd.merge(df_XY_scoring,
                             df_next_1_gw,
                             how='left',
                             on=["player_id", "gw_id"])
    df_XY_scoring = pd.merge(df_XY_scoring,
                             df_next_2_gw,
                             how='left',
                             on=["player_id", "gw_id"])

    # save XY data
    df_XY_train.to_csv(os.path.join(dataset_dir,
                                    "xy_train_gw_{}.csv".format(scoring_gw)),
                       index=False)
    df_XY_test.to_csv(os.path.join(dataset_dir,
                                   "xy_test_gw_{}.csv".format(scoring_gw)),
                      index=False)
    df_XY_scoring.to_csv(os.path.join(
        dataset_dir, "xy_scoring_gw_{}.csv".format(scoring_gw)),
                         index=False)

    with open(os.path.join(dataset_dir, "features_after_fe.pkl"), 'wb') as f:
        pickle.dump(features_dict, f)
Ejemplo n.º 7
0
def execute_fastai_return_scoring(team_name, model_name, gw_id):
    if not gw_id:
        msg = html.P("Please select GW for scoring")
        return msg, msg, msg, msg

    if not model_name:
        msg = html.P("Please select Model")
        return msg, msg, msg, msg

    model_name_col_map = {
        "LGBM Point": "lgbm_point_pred",
        "LGBM Potential": "lgbm_potential_pred",
        "LGBM Return": "lgbm_return_pred",
        "Fast Point": "fastai_point_pred",
        "Fast Potential": "fastai_potential_pred",
        "Fast Return": "fastai_return_pred"
    }

    print("Leads for {} in gw {}".format(team_name, gw_id))
    data_maker = ModelDataMaker(CONFIG_2020)
    output_dir = "./data/model_outputs/"

    # load model predictions
    lgbm_point_path = os.path.join(
        output_dir, "lgbm_point_predictions_gw_{}.csv".format(gw_id))
    lgbm_potential_path = os.path.join(
        output_dir, "lgbm_potential_predictions_gw_{}.csv".format(gw_id))
    lgbm_return_path = os.path.join(
        output_dir, "lgbm_return_predictions_gw_{}.csv".format(gw_id))

    fastai_point_path = os.path.join(
        output_dir, "fastai_point_predictions_gw_{}.csv".format(gw_id))
    fastai_potential_path = os.path.join(
        output_dir, "fastai_potential_predictions_gw_{}.csv".format(gw_id))
    fastai_return_path = os.path.join(
        output_dir, "fastai_return_predictions_gw_{}.csv".format(gw_id))

    df_lgbm_point = load_dataframe(lgbm_point_path)
    df_lgbm_potential = load_dataframe(lgbm_potential_path)
    df_lgbm_return = load_dataframe(lgbm_return_path)
    df_fastai_point = load_dataframe(fastai_point_path)
    df_fastai_potential = load_dataframe(fastai_potential_path)
    df_fastai_return = load_dataframe(fastai_return_path)

    all_preds_df = [
        df_lgbm_point, df_lgbm_potential, df_lgbm_return, df_fastai_point,
        df_fastai_potential, df_fastai_return
    ]

    for df in all_preds_df:
        try:
            assert len(df) > 0
        except:
            msg = html.P("Run scoring for models before generating leads")
            return msg, msg, msg, msg

    # prepare prediction base dataframe
    XY_train, XY_test, XY_scoring, features_dict = load_data(gw_id)
    player_id_team_id_map = data_maker.get_player_id_team_id_map()
    player_id_player_name_map = data_maker.get_player_id_player_name_map()
    player_id_player_position_map = data_maker.get_player_id_player_position_map(
    )
    team_id_team_name_map = data_maker.get_team_id_team_name_map()
    player_id_cost_map = data_maker.get_player_id_cost_map()
    player_id_play_chance_map = data_maker.get_player_id_play_chance_map()
    player_id_selection_map = data_maker.get_player_id_selection_map()
    player_id_ave_points_map = data_maker.get_player_id_ave_points_map()

    df_leads = pd.DataFrame()
    df_leads["player_id"] = XY_scoring["player_id"].values
    df_leads["name"] = df_leads["player_id"].apply(
        lambda x: player_id_player_name_map.get(x, x))
    df_leads["team"] = df_leads["player_id"].apply(
        lambda x: team_id_team_name_map[player_id_team_id_map.get(x, x)])
    df_leads["next_opponent"] = XY_scoring["opp_team_id"].apply(
        lambda x: team_id_team_name_map.get(x, x))
    df_leads["position"] = df_leads["player_id"].apply(
        lambda x: player_id_player_position_map.get(x, x))
    df_leads["chance_of_play"] = df_leads["player_id"].apply(
        lambda x: player_id_play_chance_map.get(x, x))
    df_leads["cost"] = df_leads["player_id"].apply(
        lambda x: player_id_cost_map.get(x, x))
    df_leads["selection_pct"] = df_leads["player_id"].apply(
        lambda x: player_id_selection_map.get(x, x))
    df_leads["ave_pts"] = df_leads["player_id"].apply(
        lambda x: player_id_ave_points_map.get(x, x))
    df_leads["gw"] = gw_id
    df_leads = df_leads.drop_duplicates(subset=["player_id"])

    if team_name != "All":
        df_leads = df_leads[df_leads["team"] == team_name].copy()

    # merge predictions
    for df in all_preds_df:
        df = df.drop_duplicates()
        df_leads = pd.merge(df_leads, df, how='left', on=['player_id', 'gw'])
    # keep_cols = ["name", "cost", "position", "selection_pct", "next_opponent", "lgbm_point_pred", "lgbm_potential_pred"]
    # df_leads = df_leads[keep_cols].copy()
    # make tables
    df_leads["cost"] = df_leads["cost"] / 10
    model_col = model_name_col_map[model_name]
    df_leads = df_leads.sort_values(by=model_col, ascending=False)

    # column round up
    pred_cols = [
        "lgbm_point_pred", "lgbm_potential_pred", "lgbm_return_pred",
        "fastai_point_pred", "fastai_potential_pred", "fastai_return_pred"
    ]
    for col in pred_cols:
        df_leads[col] = df_leads[col].round(2)

    df_gk = df_leads[df_leads["position"] == "GK"].copy()
    df_def = df_leads[df_leads["position"] == "DEF"].copy()
    df_mid = df_leads[df_leads["position"] == "MID"].copy()
    df_fwd = df_leads[df_leads["position"] == "FWD"].copy()

    col_map = {
        "name": "Player",
        "cost": "Cost",
        "next_opponent": "Opponent",
        "selection_pct": "TSB"
    }
    base_cols = ["name", "cost", "selection_pct", "next_opponent", model_col]
    col_map[model_col] = model_name

    df_gk = df_gk[base_cols].copy()
    df_gk = df_gk.rename(columns=col_map)
    gk_table = make_table(df_gk)

    df_def = df_def[base_cols].copy()
    df_def = df_def.rename(columns=col_map)
    def_table = make_table(df_def)

    df_mid = df_mid[base_cols].copy()
    df_mid = df_mid.rename(columns=col_map)
    mid_table = make_table(df_mid)

    df_fwd = df_fwd[base_cols].copy()
    df_fwd = df_fwd.rename(columns=col_map)
    fwd_table = make_table(df_fwd)
    return gk_table, def_table, mid_table, fwd_table
Ejemplo n.º 8
0
def load_leads(gw_id):
    data_maker = ModelDataMaker(CONFIG_2020)
    output_dir = "./data/model_outputs/"
    lgbm_point_path = os.path.join(
        output_dir, "lgbm_point_predictions_gw_{}.csv".format(gw_id))
    lgbm_potential_path = os.path.join(
        output_dir, "lgbm_potential_predictions_gw_{}.csv".format(gw_id))
    lgbm_return_path = os.path.join(
        output_dir, "lgbm_return_predictions_gw_{}.csv".format(gw_id))

    fastai_point_path = os.path.join(
        output_dir, "fastai_point_predictions_gw_{}.csv".format(gw_id))
    fastai_potential_path = os.path.join(
        output_dir, "fastai_potential_predictions_gw_{}.csv".format(gw_id))
    fastai_return_path = os.path.join(
        output_dir, "fastai_return_predictions_gw_{}.csv".format(gw_id))
    all_paths = [
        lgbm_point_path, lgbm_potential_path, lgbm_return_path,
        fastai_point_path, fastai_potential_path, fastai_return_path
    ]
    dfs = []
    for file_path in all_paths:
        if not check_cache_validity(file_path, valid_days=2.0):
            return html.P("refresh model scores")
        df = pd.read_csv(file_path)
        dfs.append(df)
    XY_train, XY_test, XY_scoring, features_dict = load_data(gw_id)
    player_id_team_id_map = data_maker.get_player_id_team_id_map()
    player_id_player_name_map = data_maker.get_player_id_player_name_map()
    player_id_player_position_map = data_maker.get_player_id_player_position_map(
    )
    team_id_team_name_map = data_maker.get_team_id_team_name_map()
    player_id_cost_map = data_maker.get_player_id_cost_map()
    player_id_play_chance_map = data_maker.get_player_id_play_chance_map()
    player_id_selection_map = data_maker.get_player_id_selection_map()
    player_id_ave_points_map = data_maker.get_player_id_ave_points_map()

    df_leads = pd.DataFrame()
    df_leads["player_id"] = XY_scoring["player_id"].values
    df_leads["name"] = df_leads["player_id"].apply(
        lambda x: player_id_player_name_map.get(x, x))
    df_leads["team"] = df_leads["player_id"].apply(
        lambda x: team_id_team_name_map[player_id_team_id_map.get(x, x)])
    df_leads["next_opponent"] = XY_scoring["opp_team_id"].apply(
        lambda x: team_id_team_name_map.get(x, x))
    df_leads["position"] = df_leads["player_id"].apply(
        lambda x: player_id_player_position_map.get(x, x))
    df_leads["chance_of_play"] = df_leads["player_id"].apply(
        lambda x: player_id_play_chance_map.get(x, x))
    df_leads["cost"] = df_leads["player_id"].apply(
        lambda x: player_id_cost_map.get(x, x))
    df_leads["selection_pct"] = df_leads["player_id"].apply(
        lambda x: player_id_selection_map.get(x, x))
    df_leads["ave_pts"] = df_leads["player_id"].apply(
        lambda x: player_id_ave_points_map.get(x, x))
    df_leads["gw"] = gw_id
    df_leads = df_leads.drop_duplicates(subset=["player_id"])

    # merge predictions
    for df in dfs:
        df = df.drop_duplicates()
        df_leads = pd.merge(df_leads, df, how='left', on=['player_id', 'gw'])
    df_leads["cost"] = df_leads["cost"] / 10

    model_name_col_map = {
        "LGBM Point": "lgbm_point_pred",
        "LGBM Potential": "lgbm_potential_pred",
        "LGBM Return": "lgbm_return_pred",
        "Fast Point": "fastai_point_pred",
        "Fast Potential": "fastai_potential_pred",
        "Fast Return": "fastai_return_pred"
    }
    col_model_name_map = dict()
    for k, v in model_name_col_map.items():
        col_model_name_map[v] = k

    df_leads = df_leads.rename(columns=col_model_name_map)
    df_leads["Net"] = (2 * df_leads["LGBM Point"] +
                       df_leads["LGBM Potential"] +
                       2 * df_leads["Fast Point"] + df_leads["Fast Potential"]
                       ) * df_leads["Fast Return"] * df_leads["LGBM Return"]
    max_net = df_leads["Net"].max()
    df_leads["Net"] = df_leads["Net"] / max_net
    return df_leads
Ejemplo n.º 9
0
def query_manager_current_gw_picks(manager_id, league_id):
    config = load_config()
    data_loader = DataLoader(config)
    data = data_loader.get_manager_current_gw_picks(manager_id)
    df = pd.DataFrame(data)

    data_maker = ModelDataMaker(CONFIG_2020)
    player_id_team_id_map = data_maker.get_player_id_team_id_map()
    player_id_player_name_map = data_maker.get_player_id_player_name_map()
    player_id_player_position_map = data_maker.get_player_id_player_position_map(
    )
    team_id_team_name_map = data_maker.get_team_id_team_name_map()
    player_id_cost_map = data_maker.get_player_id_cost_map()
    player_id_selection_map = data_maker.get_player_id_selection_map()

    # points
    df_gw = data_loader.get_live_gameweek_data()
    df_gw = df_gw.rename(columns={"id": "element", "event_points": "Points"})
    # print(df_gw.head(1).T)
    df_gw = df_gw[["element", "Points"]].copy()
    df_gw = df_gw.drop_duplicates(subset=["element"])
    df = pd.merge(df, df_gw, how='left', on="element")
    # print(df.head())
    df["Player"] = df["element"].apply(
        lambda x: player_id_player_name_map.get(x, x))
    df["Player"] = df["Player"].apply(lambda x: " ".join(x.split(" ")[:2]))
    df["Team"] = df["element"].apply(
        lambda x: team_id_team_name_map[player_id_team_id_map[x]])
    df["Position"] = df["element"].apply(
        lambda x: player_id_player_position_map.get(x, x))
    df["Player"] = df[["Player", "is_captain"]].apply(lambda x: x[0] + " (C)"
                                                      if x[1] else x[0],
                                                      axis=1)
    df["Player"] = df[["Player",
                       "is_vice_captain"]].apply(lambda x: x[0] + " (VC)"
                                                 if x[1] else x[0],
                                                 axis=1)
    df["Cost"] = df["element"].apply(lambda x: player_id_cost_map.get(x, x))
    df["Cost"] = df["Cost"] / 10
    df["TSB"] = df["element"].apply(
        lambda x: player_id_selection_map.get(x, x))

    # Get Effective ownership
    df_stats = get_top_eo()
    df_league_eo = get_league_eo(league_id)
    df = pd.merge(df, df_stats, on="element", how="left")
    df = pd.merge(df, df_league_eo, on="element", how="left")

    df_leads = load_leads_current_gw()
    df_leads = df_leads[["player_id", "xP"]].copy()
    df = pd.merge(df,
                  df_leads,
                  how='left',
                  left_on="element",
                  right_on="player_id")

    position_map = {"GK": 1, "DEF": 2, "MID": 3, "FWD": 4}
    df["pos"] = df["Position"].apply(lambda x: position_map[x])
    df = df.sort_values(by=["pos"])
    df_xi = df[df["multiplier"] > 0].copy()
    df_bench = df[df["multiplier"] == 0].copy()
    df = pd.concat([df_xi, df_bench])
    # print(df.head())
    keep_cols = [
        "Player", "multiplier", "Team", "Position", "Top EO", "League EO",
        "xP", "Points"
    ]
    # keep_cols = ["Player", "Team", "Position", "TSB", "Top EO", "Points"]
    # merge player info
    df = df[keep_cols].copy()
    return df