Python format_dataframe Exemples, utils.format_dataframe Python Exemples

Exemple #1

0

Afficher le fichier

def graph_player_distributions(stats_df,
                               player_name,
                               team,
                               season,
                               teams_df,
                               cols,
                               pct_cols,
                               filter_by_position=False,
                               position_to_use=None):
    team_info = get_team_info(stats_df=stats_df, teams_df=teams_df, team=team)
    df_to_graph, player_stats_df, sub_title = format_dataframe(
        stats_df=stats_df,
        player_name=player_name,
        team=team,
        season=season,
        cols=cols,
        filter_by_position=filter_by_position,
        position_to_use=position_to_use)
    fig = plt.figure(figsize=(8, len(cols) * 4), constrained_layout=True)
    gs = fig.add_gridspec(len(cols), 1)
    chart_title = player_name
    fig.suptitle(chart_title,
                 fontsize=30,
                 fontweight='bold',
                 y=1.02,
                 x=-.05,
                 horizontalalignment='left',
                 verticalalignment='bottom')
    fig.text(x=1.05,
             y=1.02,
             s=sub_title,
             horizontalalignment='right',
             verticalalignment='top',
             fontsize=24,
             style='italic')
    fig.text(x=-.05,
             y=1.02,
             s=f'{team_info["Full"]}, 2019-20',
             horizontalalignment='left',
             verticalalignment='top',
             fontsize=24,
             style='italic')
    for index, col in enumerate(cols):
        ax = fig.add_subplot(gs[index, 0])
        is_pct_col = col in pct_cols
        dist_plot(totals=df_to_graph,
                  stats_to_graph=player_stats_df,
                  col=col,
                  team=team_info,
                  ax=ax,
                  is_pct_col=is_pct_col)
    plt.savefig(f'output/{player_name} {season} Shooting Distribution.png',
                bbox_inches='tight',
                pad_inches=2)
    plt.close('all')

Exemple #2

0

Afficher le fichier

 def predict(self):
     try:
         # model = self.load_model("randomForest")
         model = self.load_model_by_database(self.config["algorithm"],
                                             self.config["model"])
         res = {}
         if self.config['oneSample']:
             if not self.config['X']:
                 raise ValueError(
                     "feature must not be empty when one-sample")
             X = [[float(x) for x in self.config['X']]]
             predict = model.predict(X)[0] if isinstance(
                 model.predict(X)[0], str) else "{:.0f}".format(
                     model.predict(X)[0])
             res.update({
                 "data":
                 [[",".join([str(s) for s in self.config['X']]), predict]],
                 "title":
                 "单样本预测结果",
                 "col": ["样本特征", "模型预测结果"],
             })
         else:
             # 从数据库拿数据
             if not self.config['tableName'] or self.config[
                     'tableName'] == "":
                 raise ValueError(
                     "cannot find table data when multi-sample")
             data = self.table_data
             log.info("输入数据大小:{}".format(len(data)))
             data = data.astype(float)
             data["predict"] = model.predict(data.values)
             if data["predict"].dtypes != "object":
                 data = format_dataframe(data, {"predict": ".0f"})
             res.update(
                 transform_table_data_to_html({
                     "data": data.values.tolist(),
                     "title": "多样本预测结果",
                     "col": data.columns.tolist(),
                     "row": data.index.tolist()
                 }))
         response_data = {"res": res, "code": "200", "msg": "ok!"}
         return response_data
     except Exception as e:
         # raise e
         log.exception("Exception Logged")
         return {"data": "", "code": "500", "msg": "{}".format(e.args)}

Exemple #3

0

Afficher le fichier

def graph_player_with_shot_chart(stats_df,
                                 player_name,
                                 team,
                                 season,
                                 teams_df,
                                 cols,
                                 pct_cols,
                                 shot_df,
                                 filter_by_position=False,
                                 position_to_use=None):
    team_info = get_team_info(stats_df=stats_df, teams_df=teams_df, team=team)
    df_to_graph, player_stats_df, sub_title = format_dataframe(
        stats_df=stats_df,
        player_name=player_name,
        team=team,
        season=season,
        cols=cols,
        filter_by_position=filter_by_position,
        position_to_use=position_to_use)
    fig = plt.figure(figsize=(40, len(cols) * 4), constrained_layout=True)
    gs = fig.add_gridspec(len(cols), 6)
    chart_title = player_name
    fig.suptitle(chart_title,
                 fontsize=80,
                 fontweight='bold',
                 y=1.02,
                 x=0,
                 horizontalalignment='left',
                 verticalalignment='bottom')
    fig.text(x=1,
             y=1.02,
             s=sub_title,
             horizontalalignment='right',
             verticalalignment='top',
             fontsize=40,
             style='italic')
    fig.text(x=0,
             y=1.02,
             s=f'{team_info["Full"]}, 2019-20 Regular Season',
             horizontalalignment='left',
             verticalalignment='top',
             fontsize=40,
             style='italic')
    fig.text(x=0,
             y=-.06,
             s='By Andrew Lawlor, Twitter: @lawlorpalooza',
             horizontalalignment='left',
             verticalalignment='top',
             fontsize=40,
             style='italic')
    fig.text(
        x=0,
        y=-.08,
        s='Shot Data from stats.nba.com, Statistics from Basketball Reference',
        horizontalalignment='left',
        verticalalignment='top',
        fontsize=40,
        style='italic')
    ax1 = fig.add_subplot(gs[:, :5])
    ax1 = draw_court(ax=ax1, outer_lines=False)
    player_shot_df = shot_df[shot_df['PLAYER_NAME'] == player_name]
    shot_chart(shot_df=player_shot_df, ax=ax1)
    for index, col in enumerate(cols):
        ax = fig.add_subplot(gs[index, 5])
        is_pct_col = col in pct_cols
        dist_plot(totals=df_to_graph,
                  stats_to_graph=player_stats_df,
                  col=col,
                  team=team_info,
                  ax=ax,
                  is_pct_col=is_pct_col,
                  show_colors=False,
                  fontsize=32)
    plt.savefig(
        f'output/player-shot-charts/{player_name} {season} Shot Chart.png',
        bbox_inches='tight',
        pad_inches=2)
    plt.close('all')

Exemple #4

0

Afficher le fichier

    def show_regression_result(self, x, y, model, options=[]):
        """
        回归模型拟合效果展示
        :param x: 特征列
        :param y: 标签列
        :param model: 已经训练好的回归模型
        :param options: 可选参数，控制输出结果["coff", "independence", "resid_normal"]
        :return: 给前端的结果
        """
        # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
        # plt.rcParams['axes.unicode_minus'] = False
        res = []
        # 拟合优度
        if "r2" in options:
            res.append({
                "is_test":
                False,
                "title":
                "拟合优度",
                "str":
                str(model.summary().tables[0]).replace("\n", "<br/>")
            })

        # 系数解读
        if "coff" in options:
            res.append({
                "is_test":
                False,
                "title":
                "系数解读",
                "str":
                str(model.summary().tables[1]).replace("\n", "<br/>")
            })

        # 独立性检验
        if "independence" in options:
            res.append({
                "is_test":
                True,
                "title":
                "独立性检验",
                "str":
                str(model.summary().tables[2]).replace("\n", "<br/>")
            })

        # 残差正态性检验
        if "resid_normal" in options:
            sns.distplot(a=model.resid,
                         bins=10,
                         fit=stats.norm,
                         norm_hist=True,
                         hist_kws={
                             'color': 'green',
                             'edgecolor': 'black'
                         },
                         kde_kws={
                             'color': 'black',
                             'linestyle': '--',
                             'label': 'kernel density curve'
                         },
                         fit_kws={
                             'color': 'red',
                             'linestyle': ':',
                             'label': 'normal density curve'
                         })
            plt.legend()
            plt.title("残差正态性检验")
            res.append({
                "is_test":
                True,
                "title":
                "残差正态性检验",
                "base64":
                "{}".format(self.plot_and_output_base64_png(plt))
            })

        # 残差pp图
        if "pp" in options:
            pp_qq_plot = sm.ProbPlot(model.resid)
            pp_qq_plot.ppplot(line='45')
            res.append({
                "is_test":
                True,
                "title":
                "残差pp图",
                "base64":
                "{}".format(self.plot_and_output_base64_png(plt))
            })

        # 残差qq图
        if "qq" in options:
            pp_qq_plot = sm.ProbPlot(model.resid)
            pp_qq_plot.qqplot(line='q')
            res.append({
                "is_test":
                True,
                "title":
                "残差qq图",
                "base64":
                "{}".format(self.plot_and_output_base64_png(plt))
            })

        # 标准化残差与预测值之间的散点图(验证残差的方差齐性)
        if "var" in options:
            plt.scatter(model.predict(),
                        (model.resid - model.resid.mean()) / model.resid.std())
            plt.xlabel('predict value')
            plt.ylabel('standardized residual ')
            # 添加水平参考线
            plt.axhline(y=0, color='r', linewidth=2)
            res.append({
                "is_test":
                True,
                "title":
                "方差齐性检验",
                "base64":
                "{}".format(self.plot_and_output_base64_png(plt))
            })

        # 多重共线性检验
        if len(x.columns) > 1 and "vif" in options:
            X = sm.add_constant(x)
            vif = pd.DataFrame()
            vif['features'] = X.columns
            vif["VIF Factor"] = [
                variance_inflation_factor(X.values, i)
                for i in range(X.shape[1])
            ]
            vif = format_dataframe(vif, {"VIF Factor": ".4f"})
            res.append(
                self.transform_table_data_to_html({
                    "is_test":
                    True,
                    "title":
                    "多重共线性检验",
                    "row":
                    vif['features'].values.tolist(),
                    "col": ["VIF Factor"],
                    "data": [vif["VIF Factor"].values.tolist()],
                    "remarks":
                    "VIF>10，存在多重共线性，>100则变量间存在严重的多重共线性"
                }))

        # 线性相关性检验(留在数据探索里面展示)

        # 异常值检测（帽子矩阵、DFFITS准则、学生化残差、Cook距离）
        if "outliers" in options:
            outliers = model.get_influence()
            # 帽子矩阵
            leverage = outliers.hat_matrix_diag
            # dffits值
            dffits = outliers.dffits[0]
            # 学生化残差
            resid_stu = outliers.resid_studentized_external
            # cook距离
            cook = outliers.cooks_distance[0]
            # 合并各种异常值检验的统计量值
            """

            """
            contatl = pd.concat([
                pd.Series(leverage, name='leverage'),
                pd.Series(dffits, name='dffits'),
                pd.Series(resid_stu, name='resid_stu'),
                pd.Series(cook, name='cook')
            ],
                                axis=1)

            x.index = range(x.shape[0])
            profit_outliers = pd.concat([x, contatl], axis=1)
            profit_outliers = format_dataframe(
                profit_outliers, {
                    "leverage": ".4f",
                    "dffits": ".4f",
                    "resid_stu": ".4f",
                    "cook": ".4f"
                })
            res.append(
                self.transform_table_data_to_html({
                    "is_test":
                    True,
                    "title":
                    "异常值检测",
                    "row":
                    profit_outliers.index.tolist(),
                    "col":
                    profit_outliers.columns.tolist(),
                    "data":
                    profit_outliers.values.tolist(),
                    "remarks":
                    "当高杠杆值点（或帽子矩阵）大于2(p+1)/n时，则认为该样本点可能存在异常（其中p为自变量的个数，n为观测的个数）；当DFFITS统计值大于2sqrt((p+1)/n)时，则认为该样本点可能存在异常；当学生化残差的绝对值大于2，则认为该样本点可能存在异常；对于cook距离来说，则没有明确的判断标准，一般来说，值越大则为异常点的可能性就越高；对于covratio值来说，如果一个样本的covratio值离数值1越远，则认为该样本越可能是异常值。"
                }))

        # 预测值与真实值的散点图
        if "pred_y_contrast" in options:
            plt.scatter(model.predict(), y)
            plt.plot([model.predict().min(),
                      model.predict().max()], [y.min(), y.max()],
                     'r-',
                     linewidth=3)
            plt.xlabel("predict value")
            plt.ylabel('true value')
            res.append({
                "is_test":
                False,
                "title":
                "预测值与真实值对比散点图",
                "base64":
                "{}".format(self.plot_and_output_base64_png(plt))
            })
        return res

Exemple #5

0

Afficher le fichier

def plot_player_profile(player_name, team, season):
    stats_df = pd.read_csv('./data/nba_per_poss.csv')
    teams_df = pd.read_csv('./data/nba_team_colors.csv')
    advanced_stats_df = pd.read_csv('./data/nba_advanced.csv')
    radar_df = advanced_stats_df[[
        'player', 'team_id', 'season', 'mp', 'per', 'obpm', 'dbpm', 'ast_pct',
        'fg3a_per_fga_pct', 'ts_pct', 'fta_per_fga_pct', 'trb_pct', 'blk_pct',
        'stl_pct', 'tov_pct', 'usg_pct'
    ]]
    dists_df = stats_df[[
        'player', 'team_id', 'season', 'fga_per_poss', 'fg2a_per_poss',
        'fg3a_per_poss', 'fta_per_poss', 'fg2_pct', 'fg3_pct', 'ft_pct',
        'pf_per_poss'
    ]]
    shot_df = pd.read_csv('./data/all_shots.csv')

    merged_df = radar_df.merge(dists_df, on=['player', 'team_id', 'season'])
    merged_df.rename(columns={
        'pf_per_poss': 'PF/100',
        'fga_per_poss': 'FGA/100',
        'fta_per_poss': 'FTA/100',
        'fg3a_per_poss': '3PA/100',
        'fg2a_per_poss': '2PA/100'
    },
                     inplace=True)

    cols = [
        'mp',
        'per',
        'obpm',
        'dbpm',
        'ast_pct',
        'fg3a_per_fga_pct',
        'ts_pct',
        'fta_per_fga_pct',
        'trb_pct',
        'blk_pct',
        'stl_pct',
        'tov_pct',
        'usg_pct',
        '3PA/100',
        '2PA/100',
        'FTA/100',
        'fg2_pct',
        'fg3_pct',
        'ft_pct',
        'PF/100',
    ]
    pct_cols = [
        'fg2_pct',
        'fg3_pct',
        'ft_pct',
        'fg3a_per_fga_pct',
        'ts_pct',
        'fta_per_fga_pct',
    ]

    team_info = get_team_info(stats_df=stats_df, teams_df=teams_df, team=team)
    df_to_graph, player_stats_df, sub_title = format_dataframe(
        stats_df=merged_df,
        player_name=player_name,
        team=team,
        season=season,
        cols=cols,
    )
    N = len(cols) + 1
    fig = plt.figure(figsize=(50, N * 2), constrained_layout=True)
    gs = fig.add_gridspec(N, 6)
    chart_title = player_name
    fig.suptitle(chart_title,
                 fontsize=120,
                 fontweight='bold',
                 y=1.02,
                 x=0,
                 horizontalalignment='left',
                 verticalalignment='bottom')
    fig.text(x=1,
             y=1.02,
             s=sub_title,
             horizontalalignment='right',
             verticalalignment='top',
             fontsize=60,
             style='italic')
    fig.text(x=0,
             y=1.02,
             s=f'{team_info["Full"]}, 2019-20',
             horizontalalignment='left',
             verticalalignment='top',
             fontsize=60,
             style='italic')
    ax1 = fig.add_subplot(gs[9:, :5])
    ax1 = draw_court(ax=ax1, outer_lines=False)
    player_shot_df = shot_df[shot_df['PLAYER_NAME'] == player_name]
    shot_chart(shot_df=player_shot_df, ax=ax1, ylim=(-47.5, 300))
    player_stats = merged_df[(merged_df['player'] == player_name)
                             & (merged_df['season'] == season) &
                             (merged_df['team_id'] == team)].squeeze()
    radar_plot(
        totals_df=merged_df,
        stats_to_graph=player_stats,
        player_name=player_name,
        team_info=team_info,
        season=2020,
        cols=[
            'fta_per_fga_pct',
            'ts_pct',
            'fg3a_per_fga_pct',
            'ast_pct',
            'usg_pct',
            'tov_pct',
            'PF/100',
            'stl_pct',
            'blk_pct',
            'trb_pct',
        ],
        pct_cols=[
            'ts_pct',
            'fg3a_per_fga_pct',
            'fta_per_fga_pct',
        ],
        inverse_cols=[
            'tov_pct',
            'PF/100',
        ],
        fig=fig,
        gs=gs[1:9, :5],
        label_font_size=48,
        tick_font_size=32,
        label_padding=60,
    )
    for index, col in enumerate(cols):
        ax = fig.add_subplot(gs[index + 1, 5])
        is_pct_col = col in pct_cols
        dist_plot(totals=df_to_graph,
                  stats_to_graph=player_stats_df,
                  col=col,
                  team=team_info,
                  ax=ax,
                  is_pct_col=is_pct_col)
    plt.savefig(f'output/{player_name} 2019-20 Profile.png',
                bbox_inches='tight',
                pad_inches=2)