def graph_player_distributions(stats_df, player_name, team, season, teams_df, cols, pct_cols, filter_by_position=False, position_to_use=None): team_info = get_team_info(stats_df=stats_df, teams_df=teams_df, team=team) df_to_graph, player_stats_df, sub_title = format_dataframe( stats_df=stats_df, player_name=player_name, team=team, season=season, cols=cols, filter_by_position=filter_by_position, position_to_use=position_to_use) fig = plt.figure(figsize=(8, len(cols) * 4), constrained_layout=True) gs = fig.add_gridspec(len(cols), 1) chart_title = player_name fig.suptitle(chart_title, fontsize=30, fontweight='bold', y=1.02, x=-.05, horizontalalignment='left', verticalalignment='bottom') fig.text(x=1.05, y=1.02, s=sub_title, horizontalalignment='right', verticalalignment='top', fontsize=24, style='italic') fig.text(x=-.05, y=1.02, s=f'{team_info["Full"]}, 2019-20', horizontalalignment='left', verticalalignment='top', fontsize=24, style='italic') for index, col in enumerate(cols): ax = fig.add_subplot(gs[index, 0]) is_pct_col = col in pct_cols dist_plot(totals=df_to_graph, stats_to_graph=player_stats_df, col=col, team=team_info, ax=ax, is_pct_col=is_pct_col) plt.savefig(f'output/{player_name} {season} Shooting Distribution.png', bbox_inches='tight', pad_inches=2) plt.close('all')
def predict(self): try: # model = self.load_model("randomForest") model = self.load_model_by_database(self.config["algorithm"], self.config["model"]) res = {} if self.config['oneSample']: if not self.config['X']: raise ValueError( "feature must not be empty when one-sample") X = [[float(x) for x in self.config['X']]] predict = model.predict(X)[0] if isinstance( model.predict(X)[0], str) else "{:.0f}".format( model.predict(X)[0]) res.update({ "data": [[",".join([str(s) for s in self.config['X']]), predict]], "title": "单样本预测结果", "col": ["样本特征", "模型预测结果"], }) else: # 从数据库拿数据 if not self.config['tableName'] or self.config[ 'tableName'] == "": raise ValueError( "cannot find table data when multi-sample") data = self.table_data log.info("输入数据大小:{}".format(len(data))) data = data.astype(float) data["predict"] = model.predict(data.values) if data["predict"].dtypes != "object": data = format_dataframe(data, {"predict": ".0f"}) res.update( transform_table_data_to_html({ "data": data.values.tolist(), "title": "多样本预测结果", "col": data.columns.tolist(), "row": data.index.tolist() })) response_data = {"res": res, "code": "200", "msg": "ok!"} return response_data except Exception as e: # raise e log.exception("Exception Logged") return {"data": "", "code": "500", "msg": "{}".format(e.args)}
def graph_player_with_shot_chart(stats_df, player_name, team, season, teams_df, cols, pct_cols, shot_df, filter_by_position=False, position_to_use=None): team_info = get_team_info(stats_df=stats_df, teams_df=teams_df, team=team) df_to_graph, player_stats_df, sub_title = format_dataframe( stats_df=stats_df, player_name=player_name, team=team, season=season, cols=cols, filter_by_position=filter_by_position, position_to_use=position_to_use) fig = plt.figure(figsize=(40, len(cols) * 4), constrained_layout=True) gs = fig.add_gridspec(len(cols), 6) chart_title = player_name fig.suptitle(chart_title, fontsize=80, fontweight='bold', y=1.02, x=0, horizontalalignment='left', verticalalignment='bottom') fig.text(x=1, y=1.02, s=sub_title, horizontalalignment='right', verticalalignment='top', fontsize=40, style='italic') fig.text(x=0, y=1.02, s=f'{team_info["Full"]}, 2019-20 Regular Season', horizontalalignment='left', verticalalignment='top', fontsize=40, style='italic') fig.text(x=0, y=-.06, s='By Andrew Lawlor, Twitter: @lawlorpalooza', horizontalalignment='left', verticalalignment='top', fontsize=40, style='italic') fig.text( x=0, y=-.08, s='Shot Data from stats.nba.com, Statistics from Basketball Reference', horizontalalignment='left', verticalalignment='top', fontsize=40, style='italic') ax1 = fig.add_subplot(gs[:, :5]) ax1 = draw_court(ax=ax1, outer_lines=False) player_shot_df = shot_df[shot_df['PLAYER_NAME'] == player_name] shot_chart(shot_df=player_shot_df, ax=ax1) for index, col in enumerate(cols): ax = fig.add_subplot(gs[index, 5]) is_pct_col = col in pct_cols dist_plot(totals=df_to_graph, stats_to_graph=player_stats_df, col=col, team=team_info, ax=ax, is_pct_col=is_pct_col, show_colors=False, fontsize=32) plt.savefig( f'output/player-shot-charts/{player_name} {season} Shot Chart.png', bbox_inches='tight', pad_inches=2) plt.close('all')
def show_regression_result(self, x, y, model, options=[]): """ 回归模型拟合效果展示 :param x: 特征列 :param y: 标签列 :param model: 已经训练好的回归模型 :param options: 可选参数,控制输出结果["coff", "independence", "resid_normal"] :return: 给前端的结果 """ # plt.rcParams['font.sans-serif'] = ['Arial Unicode MS'] # plt.rcParams['axes.unicode_minus'] = False res = [] # 拟合优度 if "r2" in options: res.append({ "is_test": False, "title": "拟合优度", "str": str(model.summary().tables[0]).replace("\n", "<br/>") }) # 系数解读 if "coff" in options: res.append({ "is_test": False, "title": "系数解读", "str": str(model.summary().tables[1]).replace("\n", "<br/>") }) # 独立性检验 if "independence" in options: res.append({ "is_test": True, "title": "独立性检验", "str": str(model.summary().tables[2]).replace("\n", "<br/>") }) # 残差正态性检验 if "resid_normal" in options: sns.distplot(a=model.resid, bins=10, fit=stats.norm, norm_hist=True, hist_kws={ 'color': 'green', 'edgecolor': 'black' }, kde_kws={ 'color': 'black', 'linestyle': '--', 'label': 'kernel density curve' }, fit_kws={ 'color': 'red', 'linestyle': ':', 'label': 'normal density curve' }) plt.legend() plt.title("残差正态性检验") res.append({ "is_test": True, "title": "残差正态性检验", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) # 残差pp图 if "pp" in options: pp_qq_plot = sm.ProbPlot(model.resid) pp_qq_plot.ppplot(line='45') res.append({ "is_test": True, "title": "残差pp图", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) # 残差qq图 if "qq" in options: pp_qq_plot = sm.ProbPlot(model.resid) pp_qq_plot.qqplot(line='q') res.append({ "is_test": True, "title": "残差qq图", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) # 标准化残差与预测值之间的散点图(验证残差的方差齐性) if "var" in options: plt.scatter(model.predict(), (model.resid - model.resid.mean()) / model.resid.std()) plt.xlabel('predict value') plt.ylabel('standardized residual ') # 添加水平参考线 plt.axhline(y=0, color='r', linewidth=2) res.append({ "is_test": True, "title": "方差齐性检验", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) # 多重共线性检验 if len(x.columns) > 1 and "vif" in options: X = sm.add_constant(x) vif = pd.DataFrame() vif['features'] = X.columns vif["VIF Factor"] = [ variance_inflation_factor(X.values, i) for i in range(X.shape[1]) ] vif = format_dataframe(vif, {"VIF Factor": ".4f"}) res.append( self.transform_table_data_to_html({ "is_test": True, "title": "多重共线性检验", "row": vif['features'].values.tolist(), "col": ["VIF Factor"], "data": [vif["VIF Factor"].values.tolist()], "remarks": "VIF>10,存在多重共线性,>100则变量间存在严重的多重共线性" })) # 线性相关性检验(留在数据探索里面展示) # 异常值检测(帽子矩阵、DFFITS准则、学生化残差、Cook距离) if "outliers" in options: outliers = model.get_influence() # 帽子矩阵 leverage = outliers.hat_matrix_diag # dffits值 dffits = outliers.dffits[0] # 学生化残差 resid_stu = outliers.resid_studentized_external # cook距离 cook = outliers.cooks_distance[0] # 合并各种异常值检验的统计量值 """ """ contatl = pd.concat([ pd.Series(leverage, name='leverage'), pd.Series(dffits, name='dffits'), pd.Series(resid_stu, name='resid_stu'), pd.Series(cook, name='cook') ], axis=1) x.index = range(x.shape[0]) profit_outliers = pd.concat([x, contatl], axis=1) profit_outliers = format_dataframe( profit_outliers, { "leverage": ".4f", "dffits": ".4f", "resid_stu": ".4f", "cook": ".4f" }) res.append( self.transform_table_data_to_html({ "is_test": True, "title": "异常值检测", "row": profit_outliers.index.tolist(), "col": profit_outliers.columns.tolist(), "data": profit_outliers.values.tolist(), "remarks": "当高杠杆值点(或帽子矩阵)大于2(p+1)/n时,则认为该样本点可能存在异常(其中p为自变量的个数,n为观测的个数);当DFFITS统计值大于2sqrt((p+1)/n)时,则认为该样本点可能存在异常;当学生化残差的绝对值大于2,则认为该样本点可能存在异常;对于cook距离来说,则没有明确的判断标准,一般来说,值越大则为异常点的可能性就越高;对于covratio值来说,如果一个样本的covratio值离数值1越远,则认为该样本越可能是异常值。" })) # 预测值与真实值的散点图 if "pred_y_contrast" in options: plt.scatter(model.predict(), y) plt.plot([model.predict().min(), model.predict().max()], [y.min(), y.max()], 'r-', linewidth=3) plt.xlabel("predict value") plt.ylabel('true value') res.append({ "is_test": False, "title": "预测值与真实值对比散点图", "base64": "{}".format(self.plot_and_output_base64_png(plt)) }) return res
def plot_player_profile(player_name, team, season): stats_df = pd.read_csv('./data/nba_per_poss.csv') teams_df = pd.read_csv('./data/nba_team_colors.csv') advanced_stats_df = pd.read_csv('./data/nba_advanced.csv') radar_df = advanced_stats_df[[ 'player', 'team_id', 'season', 'mp', 'per', 'obpm', 'dbpm', 'ast_pct', 'fg3a_per_fga_pct', 'ts_pct', 'fta_per_fga_pct', 'trb_pct', 'blk_pct', 'stl_pct', 'tov_pct', 'usg_pct' ]] dists_df = stats_df[[ 'player', 'team_id', 'season', 'fga_per_poss', 'fg2a_per_poss', 'fg3a_per_poss', 'fta_per_poss', 'fg2_pct', 'fg3_pct', 'ft_pct', 'pf_per_poss' ]] shot_df = pd.read_csv('./data/all_shots.csv') merged_df = radar_df.merge(dists_df, on=['player', 'team_id', 'season']) merged_df.rename(columns={ 'pf_per_poss': 'PF/100', 'fga_per_poss': 'FGA/100', 'fta_per_poss': 'FTA/100', 'fg3a_per_poss': '3PA/100', 'fg2a_per_poss': '2PA/100' }, inplace=True) cols = [ 'mp', 'per', 'obpm', 'dbpm', 'ast_pct', 'fg3a_per_fga_pct', 'ts_pct', 'fta_per_fga_pct', 'trb_pct', 'blk_pct', 'stl_pct', 'tov_pct', 'usg_pct', '3PA/100', '2PA/100', 'FTA/100', 'fg2_pct', 'fg3_pct', 'ft_pct', 'PF/100', ] pct_cols = [ 'fg2_pct', 'fg3_pct', 'ft_pct', 'fg3a_per_fga_pct', 'ts_pct', 'fta_per_fga_pct', ] team_info = get_team_info(stats_df=stats_df, teams_df=teams_df, team=team) df_to_graph, player_stats_df, sub_title = format_dataframe( stats_df=merged_df, player_name=player_name, team=team, season=season, cols=cols, ) N = len(cols) + 1 fig = plt.figure(figsize=(50, N * 2), constrained_layout=True) gs = fig.add_gridspec(N, 6) chart_title = player_name fig.suptitle(chart_title, fontsize=120, fontweight='bold', y=1.02, x=0, horizontalalignment='left', verticalalignment='bottom') fig.text(x=1, y=1.02, s=sub_title, horizontalalignment='right', verticalalignment='top', fontsize=60, style='italic') fig.text(x=0, y=1.02, s=f'{team_info["Full"]}, 2019-20', horizontalalignment='left', verticalalignment='top', fontsize=60, style='italic') ax1 = fig.add_subplot(gs[9:, :5]) ax1 = draw_court(ax=ax1, outer_lines=False) player_shot_df = shot_df[shot_df['PLAYER_NAME'] == player_name] shot_chart(shot_df=player_shot_df, ax=ax1, ylim=(-47.5, 300)) player_stats = merged_df[(merged_df['player'] == player_name) & (merged_df['season'] == season) & (merged_df['team_id'] == team)].squeeze() radar_plot( totals_df=merged_df, stats_to_graph=player_stats, player_name=player_name, team_info=team_info, season=2020, cols=[ 'fta_per_fga_pct', 'ts_pct', 'fg3a_per_fga_pct', 'ast_pct', 'usg_pct', 'tov_pct', 'PF/100', 'stl_pct', 'blk_pct', 'trb_pct', ], pct_cols=[ 'ts_pct', 'fg3a_per_fga_pct', 'fta_per_fga_pct', ], inverse_cols=[ 'tov_pct', 'PF/100', ], fig=fig, gs=gs[1:9, :5], label_font_size=48, tick_font_size=32, label_padding=60, ) for index, col in enumerate(cols): ax = fig.add_subplot(gs[index + 1, 5]) is_pct_col = col in pct_cols dist_plot(totals=df_to_graph, stats_to_graph=player_stats_df, col=col, team=team_info, ax=ax, is_pct_col=is_pct_col) plt.savefig(f'output/{player_name} 2019-20 Profile.png', bbox_inches='tight', pad_inches=2)