cmap2 = sns.color_palette('Reds_r', n_colors=len(gammas)*2) ax.plot(months, cum_rets(e_perf), alpha=alpha, label='ETE', lw=lw, c='steelblue') ax.plot(months, cum_rets(mv_perf), alpha=alpha, label='MV', lw=lw, c='forestgreen') for i, gamma in enumerate(reversed(gammas[1:])): ax.plot(months, cum_rets(capm[gamma]), alpha=alpha, label=f'CAPM $\\gamma={gamma}$', lw=lw, c=cmap2[i]) fig.autofmt_xdate() plt.ylabel('Cumulative Return') plt.xlabel('Time') plt.legend() plt.tight_layout() eu.save_fig(f'../plots/portfolio_comparison', dpi=300) eu.latex_figure(f'../plots/portfolio_comparison') # %% tbl = pd.DataFrame({ 'ETE': e_perf, 'MV': mv_perf, 'CAPM 1': capm[1], 'CAPM 10': capm[10], }, index=months[1:]) tbl = (tbl.mean()*4).to_frame().join((tbl.std()*np.sqrt(4)).to_frame(), rsuffix='vol') tbl.columns = 'Return Volatility'.split() tbl['Sharpe'] = tbl['Return']/tbl['Volatility']
def plot_temporal_vol_and_matrices(assets): mod = TransferEntropy(assets=assets) periods = [ ('7/1/2011', '8/1/2011'), ('8/1/2011', '9/1/2011'), ('9/1/2011', '10/1/2011'), ('10/1/2011', '11/1/2011'), ('11/1/2011', '12/1/2011'), ('12/1/2011', '1/1/2012'), ('1/1/2012', '2/1/2012'), ] dates = [pd.to_datetime(p[0]) for p in periods] xlabs = [date.strftime('%b %Y') for date in dates] df = df = df**2 df = df.resample('M').sum() df = (df.mean(axis=1) * 12)**0.5 df = df[(df.index >= pd.to_datetime('6/1/2011')) & (df.index <= pd.to_datetime(periods[-1][-1]))] fig, ax = plt.subplots(1, 1, figsize=(20, 3)) ax.plot(df, c='steelblue') tick = mtick.StrMethodFormatter('{x:.0%}') ax.yaxis.set_major_formatter(tick) ax.set_xlabel('Date') ax.set_ylabel('Annualized Volatility') eu.save_fig('temporal_vol', dir='../figures') # %% # Find maximum transfer entropy for all periods. te_max, ete_max, ete_min = 0, 0, 0 for i, period in enumerate(periods): mod.set_timeperiod(*period) mod.compute_transfer_entropy() te_max = max(np.max(mod.te.values), te_max) mod.compute_effective_transfer_entropy(sims=3) ete_max = max(np.max(mod.ete.values), ete_max) ete_min = min(np.min(mod.ete.values), ete_min) # %% fig, axes = plt.subplots(3, len(periods), figsize=(20, 10), sharex=True, sharey=True) for i, period in enumerate(periods): mod.set_timeperiod(*period) mod.plot_corr(method='pearson', ax=axes[0, i], cbar=False, labels=False) mod.compute_transfer_entropy() mod.plot_te(ax=axes[1, i], cbar=False, labels=False, vmin=0, vmax=te_max) mod.compute_effective_transfer_entropy(sims=5) mod.plot_te(ax=axes[2, i], te='ete', cbar=False, labels=False, vmin=ete_min, vmax=0.75*te_max) axes[2, i].set_xlabel(xlabs[i]) axes[0, 0].set_ylabel('Correlation') axes[1, 0].set_ylabel('Transfer Entropy') axes[2, 0].set_ylabel('Effective Transfer Entropy') plt.tight_layout() # eu.save_fig('temporal_matrices', dir='../figures')
def make_algo_comparison_tables(year=2018, fdir='../figures', save=True): # %% ess_train = CompareModels('essential', period='weekly', subset='train') ess_cv = CompareModels(stats='essential', period='weekly', subset='cv') ess_test = CompareModels(stats='essential', period='weekly', subset='test') ess_thresh = CompareModels( 'essential_below_thresh',period='weekly', subset='train') non_ess = CompareModels('non_essential', period='weekly', subset='train') # %% # Essential stats. fig, axes=plt.subplots(1, 2, figsize=(18, 12)) ess_train.plot_color_table(ax=axes[0], fontsize=12, prec=2) ess_cv.plot_color_table(ax=axes[1], fontsize=12, prec=2) plt.tight_layout() if save: eu.save_fig('essential_MAE_color_table', dir=fdir) else: ess_train.plot_color_table(figsize=(9, 12), fontsize=12, prec=2) if save: eu.save_fig('essential_train_MAE_table', dir=fdir) else: ess_cv.plot_color_table(figsize=(9, 12), fontsize=12, prec=2) if save: eu.save_fig('essential_cv_MAE_table', dir=fdir) cap = 'Mean Absolute Error (MAE) values for all essential stats '\ 'and studied algorithms.' eu.latex_figure( fids=['essential_train_MAE_table', 'essential_cv_MAE_table'], dir=fdir, caption=cap, subcaptions=['Training MAE.', 'Cross-Validation MAE.'], width=0.98, ) else: ess_thresh.plot_color_table(figsize=(9, 12), fontsize=12) if save: eu.save_fig('essential_thresh_MAE_table', dir=fdir) else: non_ess.plot_color_table(figsize=(9, 12), fontsize=12, prec=3) if save: eu.save_fig('nonessential_MAE_table', dir=fdir) else: # %% # Make train/cv/test table and print to LaTeX. with open('../data/.models/weekly/optimal_models.json', 'r') as fid: opt_models = json.load(fid) def get_set_mae_list(subset_comparison, models=opt_models): df = subset_comparison.mae_df mae_list = [] for _, row in df.iterrows(): mae_list.append(row[models[row['Position']][row['Stat']]]) return mae_list df = ess_train.mae_df fmt_methods = { 'MEAN': 'Mean', 'MEDIAN': 'Median', 'FLOOR': 'Min', 'CEIL': 'Max', } algos = [] for _, row in df.iterrows(): algo = models[row['Position']][row['Stat']] algos.append(fmt_methods.get(algo, algo)) pos = [] for p in df['Position'].values: pos.append('{}' if p in pos else p) table = pd.DataFrame({ 'Position': pos, 'Stat': df['Stat'], 'Projection Function': algos, 'Train': get_set_mae_list(ess_train), 'CV': get_set_mae_list(ess_cv), 'Test': get_set_mae_list(ess_test), }) cap = 'MAE values for training, cross-validation, and test sets with ' \ 'selected optimal projection function for each stat.' eu.latex_print(table, hide_index=True, prec=2, col_fmt='lllcrrr', caption=cap)
def plot_missing_data(fdir='../figures', save=True): """Plot # of sources and % data missing for each essential stat.""" stats = { 'QB': ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD'], 'RB': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'], 'WR': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'], 'TE': ['Receptions', 'Rec Yds', 'Rec TD'], 'DST': ['PA', 'YdA', 'TD', 'Sack', 'Int', 'Fum Rec'], } n_stats = sum(len(s) for s in stats.values()) n_sources = np.zeros(n_stats) pct_nan = np.zeros(n_stats) x_ticks = [] i = 0 for pos in stats.keys(): proj = TrainProjections(pos) proj.load_data(weeks=range(1,18), impute_method=False) for stat in stats[pos]: df = proj._stats_df[stat] # Subset DataFrame to only include only projection columns. ignored_cols = ['Player', 'Team', 'Pos', 'Week', 'STATS'] keep_cols = [c for c in list(df) if c not in ignored_cols] proj_df = df[keep_cols].copy() # Find percentage of NaNs in for the stat and # of sources. n, m = proj_df.shape pct_nan[i] = np.sum(np.sum(proj_df.isnull())) / (n * m) n_sources[i] = m x_ticks.append(f'{pos} - {stat}') i += 1 # Creat figure. fig, ax = plt.subplots(1, 1, figsize=[12, 6]) x = np.arange(n_stats), n_sources, color='steelblue', alpha=0.7), n_sources*pct_nan, color='firebrick', alpha=0.9) for xt, yt, s in zip(x, n_sources*pct_nan, pct_nan): ax.text(xt, yt+0.05, f'{s:.1%}', color='firebrick', ha='center', fontsize=7) ax.set_ylabel('Number of Sources') ax.set_xlabel('Projected Stat') plt.xticks(x, x_ticks, rotation=45, ha='right') ax.xaxis.grid(False) # Save figure. if save: fid = 'missing_data' eu.save_fig(fid, dir=fdir) cap = 'Number of sources collected for each essential stat (blue). Red ' cap += 'indicates percentage of missing data for each respective stat.' eu.latex_figure(fid, dir=fdir, width=0.95, caption=cap) # Nonessential stats. stats = { 'QB': ['Receptions', 'Rec Yds', 'Rec TD', '2PT'], 'RB': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'], 'WR': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'], 'TE': ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD', '2PT'], 'DST': ['Saf', 'Blk'], } n_stats = sum(len(s) for s in stats.values()) n_sources = np.zeros(n_stats) pct_nan = np.zeros(n_stats) x_ticks = [] i = 0 for pos in stats.keys(): proj = TrainProjections(pos) proj.load_data(weeks=range(1,18), impute_method=False) for stat in stats[pos]: df = proj._stats_df[stat] # Subset DataFrame to only include only projection columns. ignored_cols = ['Player', 'Team', 'Pos', 'Week', 'STATS'] keep_cols = [c for c in list(df) if c not in ignored_cols] proj_df = df[keep_cols].copy() # Find percentage of NaNs in for the stat and # of sources. n, m = proj_df.shape pct_nan[i] = np.sum(np.sum(proj_df.isnull())) / (n * m) n_sources[i] = m x_ticks.append(f'{pos} - {stat}') i += 1 # Creat figure. fig, ax = plt.subplots(1, 1, figsize=[12, 6]) x = np.arange(n_stats), n_sources, color='steelblue', alpha=0.7), n_sources*pct_nan, color='firebrick', alpha=0.9) for xt, yt, s in zip(x, n_sources*pct_nan, pct_nan): ax.text(xt, yt+0.05, f'{s:.1%}', color='firebrick', ha='center', fontsize=7) ax.set_ylabel('Number of Sources') ax.set_xlabel('Projected Stat') plt.yticks(np.arange(max(n_sources)+1)) plt.xticks(x, x_ticks, rotation=45, ha='right') ax.xaxis.grid(False) # Save figure. if save: fid = 'nonessential_missing_data' eu.save_fig(fid, dir=fdir) cap = 'Number of sources collected for each nonessential stat (blue). ' cap += 'Red indicates percentage of missing data for each ' cap += 'respective stat.' eu.latex_figure(fid, dir=fdir, width=0.95, caption=cap)
def plot_stat_hists(fdir='../figures', save=True): """ Plot all essential stat histograms for the appendix and an example histograms figure for the main body of the paper. """ # Load data. positions = 'QB RB WR TE DST'.split() stats = {pos: TrainProjections(pos) for pos in positions} for pos in positions: stats[pos].load_data(weeks=range(1, 18)) # Plot all no threshold histograms for appendix. no_thresh_fids = {pos: f'no_threshold_hist_{pos}' for pos in positions} for pos in positions: n = len(stats[pos].essential_stats) fig, axes = plt.subplots(n, 1, figsize=[6, 2.5*n]) for stat, ax in zip(stats[pos].essential_stats, axes.flat): stats[pos].plot_projection_hist( stat, bins=None, threshold=False, ax=ax) plt.tight_layout() if save: eu.save_fig(no_thresh_fids[pos], dir=fdir) else: # Plot all threshold histograms for appendix. thresh_fids = {pos: f'threshold_hist_{pos}' for pos in positions} for pos in positions: n = len(stats[pos].essential_stats) fig, axes = plt.subplots(n, 1, figsize=[6, 2.5*n]) for stat, ax in zip(stats[pos].essential_stats, axes.flat): stats[pos].plot_projection_hist( stat, bins=None, threshold=True, ax=ax) plt.tight_layout() if save: eu.save_fig(thresh_fids[pos], dir=fdir) else: # Create LaTeX code to plot figures. if save: for pos in positions: cap = 'Essential stat raw histograms and thresholded histograms ' cap += f'for {pos}.' eu.latex_figure( fids=[no_thresh_fids[pos], thresh_fids[pos]], dir=fdir, subcaptions=['Raw histogram with threshold (red).', 'Histogram above threshold.'], caption=cap, width=0.9, ) print() print('\pagebreak') # Plot raw histograms for example stats. fids = ['no_theshold_example_hists', 'no_theshold_example_hists_RB'] fig, axes = plt.subplots(2, 1, figsize=[6, 9]) for pos, stat, ax in zip(['QB', 'RB'], ['Pass Yds', 'Rush Yds'], axes.flat): stats[pos].plot_projection_hist( stat, bins=None, threshold=False, ax=ax) plt.tight_layout() if save: eu.save_fig(fids[0], dir=fdir) else: # Plot thresholded histograms for example stats. fids = ['no_theshold_example_hists', 'no_theshold_example_hists_RB'] fig, axes = plt.subplots(2, 1, figsize=[6, 9]) for pos, stat, ax in zip(['QB', 'RB'], ['Pass Yds', 'Rush Yds'], axes.flat): stats[pos].plot_projection_hist( stat, bins=None, threshold=True, ax=ax) plt.tight_layout() if save: eu.save_fig(fids[1], dir=fdir) else: # Create LaTeX code to plot example stats figure. if save: print('\n\n\n') cap = 'Example raw and thresholded histograms for QB passing yards ' cap += f'and RB rushing yards.' eu.latex_figure( fids=fids, dir=fdir, subcaptions=['Raw histogram with threshold (red).', 'Histogram above threshold.'], caption=cap, width=0.9, )
ax.set_xlabel('Projected Points') plt.ylim(n_players+2, -2) plt.tight_layout() # %% for i in range(1, 6): plot_weekly_pos_projections('TE', i) # %% pos = 'TE' week = 17 plot_weekly_pos_projections(pos, week, figsize=(12, 8)) # eu.save_fig(f'{pos}_{week}', dir=fdir) # %% # %% class CompareModels: """ Load data for model comparison and compare MAE values across all models for given stat. Parameters ---------- stats: {'essential', 'essential_below_thresh', 'non_essential'} Stats to compare. period: {'weekly', 'season'} Season or weekly models. subset: {'train', 'cv', 'test'}
def main(): n_sims = 50 essential_stats = False impute_methods = [ # 'BiScaler', 'IterativeImpute', # 'IterativeSVD', 'KNN', # 'MatrixFactorization', 'Mean', 'Median', # 'NuclearNorm', 'SoftImpute', ] if essential_stats: impute_stats = { 'QB': ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD'], 'RB': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'], 'WR': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'], 'TE': ['Receptions', 'Rec Yds', 'Rec TD'], 'DST': ['PA', 'YdA', 'TD', 'Sack', 'Int', 'Fum Rec'], } fid_MAE = '../figures/impute_MAE' fid_RMSE = '../figures/impute_RMSE' else: impute_stats = { 'QB': ['Receptions', 'Rec Yds', 'Rec TD', '2PT'], 'RB': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'], 'WR': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'], 'TE': ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD', '2PT'], 'DST': ['Saf', 'Blk'], } fid_MAE = '../figures/nonessential_impute_MAE' fid_RMSE = '../figures/nonessential_impute_RMSE' j = 0 x_ticks = [] n_stats = sum(len(stats) for stats in impute_stats.values()) mae = np.zeros([len(impute_methods), n_stats]) rmse = np.zeros([len(impute_methods), n_stats]) with tqdm(total=n_stats) as pbar: for pos in impute_stats.keys(): proj = TrainProjections(pos) proj.load_data(weeks=range(1, 18), impute_method=False) for stat in impute_stats[pos]: df = proj._stats_df[stat] # Subset DataFrame to only include only projection columns. ignored_cols = ['Player', 'Team', 'Pos', 'Week', 'STATS'] impute_cols = [c for c in list(df) if c not in ignored_cols] proj_df = df[impute_cols].copy() # Find percentage of NaNs in for the stat. n, m = proj_df.shape nan_pct = np.sum(np.sum(proj_df.isnull())) / (n * m) # Remove rows with NaNs. proj_df.dropna(how='any', axis=0, inplace=True) print(f'\n{pos} - {stat}\n') # Get average RMSE and MAE for each imputation method. for i, method in enumerate(impute_methods): try: rmse[i, j], mae[i, j] = simulate_imputing( proj_df.copy(), method, n_sims, nan_pct) except ValueError: pass x_ticks.append(f'{pos} - {stat}') j += 1 pbar.update(1) # %% mae_df = pd.DataFrame(mae, columns=x_ticks, index=impute_methods) mae_df = (mae_df - mae_df.mean()) / mae_df.std() plot_heatmap(mae_df, cbar_label='Normalized MAE') eu.save_fig(fid_MAE) # %% rmse_df = pd.DataFrame(rmse, columns=x_ticks, index=impute_methods) rmse_df = (rmse_df - rmse_df.mean()) / rmse_df.std() plot_heatmap(rmse_df, cbar_label='Normalized RMSE') eu.save_fig(fid_RMSE)