cmap2 = sns.color_palette('Reds_r', n_colors=len(gammas)*2)
ax.plot(months, cum_rets(e_perf), alpha=alpha,
    label='ETE', lw=lw, c='steelblue')

ax.plot(months, cum_rets(mv_perf), alpha=alpha,
    label='MV', lw=lw, c='forestgreen')
for i, gamma in enumerate(reversed(gammas[1:])):
    ax.plot(months, cum_rets(capm[gamma]), alpha=alpha,
        label=f'CAPM $\\gamma={gamma}$', lw=lw, c=cmap2[i])

fig.autofmt_xdate()
plt.ylabel('Cumulative Return')
plt.xlabel('Time')
plt.legend()
plt.tight_layout()
eu.save_fig(f'../plots/portfolio_comparison', dpi=300)
plt.show()
eu.latex_figure(f'../plots/portfolio_comparison')
# %%
tbl = pd.DataFrame({
        'ETE': e_perf,
        'MV': mv_perf,
        'CAPM 1': capm[1],
        'CAPM 10': capm[10],
        }, index=months[1:])

tbl = (tbl.mean()*4).to_frame().join((tbl.std()*np.sqrt(4)).to_frame(),
        rsuffix='vol')

tbl.columns = 'Return Volatility'.split()
tbl['Sharpe'] = tbl['Return']/tbl['Volatility']
Ejemplo n.º 2
0
def plot_temporal_vol_and_matrices(assets):
    mod = TransferEntropy(assets=assets)

    periods = [
        ('7/1/2011', '8/1/2011'),
        ('8/1/2011', '9/1/2011'),
        ('9/1/2011', '10/1/2011'),
        ('10/1/2011', '11/1/2011'),
        ('11/1/2011', '12/1/2011'),
        ('12/1/2011', '1/1/2012'),
        ('1/1/2012', '2/1/2012'),
        ]
    dates = [pd.to_datetime(p[0]) for p in periods]
    xlabs = [date.strftime('%b %Y') for date in dates]

        
    df = mod.data
    df = df**2
    df = df.resample('M').sum()
    df = (df.mean(axis=1) * 12)**0.5
    df = df[(df.index >= pd.to_datetime('6/1/2011'))
            & (df.index <= pd.to_datetime(periods[-1][-1]))]


    fig, ax = plt.subplots(1, 1, figsize=(20, 3))
    ax.plot(df, c='steelblue')
    tick = mtick.StrMethodFormatter('{x:.0%}')
    ax.yaxis.set_major_formatter(tick)
    ax.set_xlabel('Date')
    ax.set_ylabel('Annualized Volatility')
    eu.save_fig('temporal_vol', dir='../figures')
    plt.show()

    # %%


    # Find maximum transfer entropy for all periods.
    te_max, ete_max, ete_min = 0, 0, 0

    for i, period in enumerate(periods):
        mod.set_timeperiod(*period)
        mod.compute_transfer_entropy()
        te_max = max(np.max(mod.te.values), te_max)
        mod.compute_effective_transfer_entropy(sims=3)
        ete_max = max(np.max(mod.ete.values), ete_max)
        ete_min = min(np.min(mod.ete.values), ete_min)
        
    # %%
    fig, axes = plt.subplots(3, len(periods), figsize=(20, 10),
                             sharex=True, sharey=True)



    for i, period in enumerate(periods):
        mod.set_timeperiod(*period)
        mod.plot_corr(method='pearson', ax=axes[0, i],
                      cbar=False, labels=False)
        mod.compute_transfer_entropy()
        mod.plot_te(ax=axes[1, i], cbar=False, labels=False,
                    vmin=0, vmax=te_max)
        mod.compute_effective_transfer_entropy(sims=5)
        mod.plot_te(ax=axes[2, i], te='ete', cbar=False, labels=False,
                    vmin=ete_min, vmax=0.75*te_max)

        axes[2, i].set_xlabel(xlabs[i])


    axes[0, 0].set_ylabel('Correlation')
    axes[1, 0].set_ylabel('Transfer Entropy')
    axes[2, 0].set_ylabel('Effective Transfer Entropy')
    plt.tight_layout()
    # eu.save_fig('temporal_matrices', dir='../figures')
    plt.show()
Ejemplo n.º 3
0
def make_algo_comparison_tables(year=2018, fdir='../figures', save=True):
    # %%
    ess_train = CompareModels('essential', period='weekly', subset='train')
    ess_cv = CompareModels(stats='essential', period='weekly', subset='cv')
    ess_test = CompareModels(stats='essential', period='weekly', subset='test')
    ess_thresh = CompareModels(
        'essential_below_thresh',period='weekly', subset='train')
    non_ess = CompareModels('non_essential', period='weekly', subset='train')
    
    # %%
    # Essential stats.
    
    
    fig, axes=plt.subplots(1, 2, figsize=(18, 12))
    ess_train.plot_color_table(ax=axes[0], fontsize=12, prec=2)
    ess_cv.plot_color_table(ax=axes[1], fontsize=12, prec=2)
    plt.tight_layout()
    if save:
        eu.save_fig('essential_MAE_color_table', dir=fdir)
    else:
        plt.show()
    
    ess_train.plot_color_table(figsize=(9, 12), fontsize=12, prec=2)
    if save:
        eu.save_fig('essential_train_MAE_table', dir=fdir)
    else:
        plt.show()
    
    
    ess_cv.plot_color_table(figsize=(9, 12), fontsize=12, prec=2)
    if save:
        eu.save_fig('essential_cv_MAE_table', dir=fdir)
        cap = 'Mean Absolute Error (MAE) values for all essential stats '\
            'and studied algorithms.'
        eu.latex_figure(
            fids=['essential_train_MAE_table', 'essential_cv_MAE_table'],
            dir=fdir,
            caption=cap,
            subcaptions=['Training MAE.', 'Cross-Validation MAE.'],
            width=0.98,
            )
    else:
        plt.show()
        
    
    ess_thresh.plot_color_table(figsize=(9, 12), fontsize=12)
    if save:
        eu.save_fig('essential_thresh_MAE_table', dir=fdir)
    else:
        plt.show()
        
    non_ess.plot_color_table(figsize=(9, 12), fontsize=12, prec=3)
    if save:
        eu.save_fig('nonessential_MAE_table', dir=fdir)
    else:
        plt.show()

    
    # %%
    # Make train/cv/test table and print to LaTeX.
    
    with open('../data/.models/weekly/optimal_models.json', 'r') as fid:
        opt_models = json.load(fid)
    
    def get_set_mae_list(subset_comparison, models=opt_models):
        df = subset_comparison.mae_df
        mae_list = []
        for _, row in df.iterrows():
            mae_list.append(row[models[row['Position']][row['Stat']]])
        return mae_list
        
    df = ess_train.mae_df
    fmt_methods = {
        'MEAN': 'Mean',
        'MEDIAN': 'Median',
        'FLOOR': 'Min',
        'CEIL': 'Max',
        }
    algos = []
    for _, row in df.iterrows():
        algo = models[row['Position']][row['Stat']]
        algos.append(fmt_methods.get(algo, algo))
    pos = []
    for p in df['Position'].values:
        pos.append('{}' if p in pos else p)
    
    table = pd.DataFrame({
        'Position': pos,
        'Stat': df['Stat'],
        'Projection Function': algos,
        'Train': get_set_mae_list(ess_train),
        'CV': get_set_mae_list(ess_cv),
        'Test': get_set_mae_list(ess_test),
        })
    cap = 'MAE values for training, cross-validation, and test sets with ' \
        'selected optimal projection function for each stat.'
    eu.latex_print(table, hide_index=True, prec=2, col_fmt='lllcrrr',
                   caption=cap)
Ejemplo n.º 4
0
def plot_missing_data(fdir='../figures', save=True):
    """Plot # of sources and % data missing for each essential stat."""
    stats = {
        'QB': ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD'],
        'RB': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'],
        'WR': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'],
        'TE': ['Receptions', 'Rec Yds', 'Rec TD'],
        'DST': ['PA', 'YdA', 'TD', 'Sack', 'Int', 'Fum Rec'],
        }
    n_stats = sum(len(s) for s in stats.values())
    
    n_sources = np.zeros(n_stats)
    pct_nan = np.zeros(n_stats)
    x_ticks = []
    i = 0
    for pos in stats.keys():
        proj = TrainProjections(pos)
        proj.load_data(weeks=range(1,18), impute_method=False)
        for stat in stats[pos]:
            df = proj._stats_df[stat]
            
            # Subset DataFrame to only include only projection columns.
            ignored_cols = ['Player', 'Team', 'Pos', 'Week', 'STATS']
            keep_cols = [c for c in list(df) if c not in ignored_cols]
            proj_df = df[keep_cols].copy()
            
            # Find percentage of NaNs in for the stat and # of sources.
            n, m = proj_df.shape
            pct_nan[i] = np.sum(np.sum(proj_df.isnull())) / (n * m)
            n_sources[i] = m
            x_ticks.append(f'{pos} - {stat}')
            i += 1
    
    # Creat figure.
    fig, ax = plt.subplots(1, 1, figsize=[12, 6])
    x = np.arange(n_stats)
    ax.bar(x, n_sources, color='steelblue', alpha=0.7)
    ax.bar(x, n_sources*pct_nan, color='firebrick', alpha=0.9)
    for xt, yt, s in zip(x, n_sources*pct_nan, pct_nan):
        ax.text(xt, yt+0.05, f'{s:.1%}', color='firebrick', ha='center',
                fontsize=7)
    ax.set_ylabel('Number of Sources')
    ax.set_xlabel('Projected Stat')
    plt.xticks(x, x_ticks, rotation=45, ha='right')
    ax.xaxis.grid(False)
    
    # Save figure.
    if save:
        fid = 'missing_data'
        eu.save_fig(fid, dir=fdir)
        cap = 'Number of sources collected for each essential stat (blue). Red '
        cap += 'indicates percentage of missing data for each respective stat.'
        eu.latex_figure(fid, dir=fdir, width=0.95, caption=cap)

    
    
    # Nonessential stats.
    stats = {
        'QB': ['Receptions', 'Rec Yds', 'Rec TD', '2PT'],
        'RB': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'],
        'WR': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'],
        'TE': ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD', '2PT'],
        'DST': ['Saf', 'Blk'],
        }
    n_stats = sum(len(s) for s in stats.values())
    
    n_sources = np.zeros(n_stats)
    pct_nan = np.zeros(n_stats)
    x_ticks = []
    i = 0
    for pos in stats.keys():
        proj = TrainProjections(pos)
        proj.load_data(weeks=range(1,18), impute_method=False)
        for stat in stats[pos]:
            df = proj._stats_df[stat]
            
            # Subset DataFrame to only include only projection columns.
            ignored_cols = ['Player', 'Team', 'Pos', 'Week', 'STATS']
            keep_cols = [c for c in list(df) if c not in ignored_cols]
            proj_df = df[keep_cols].copy()
            
            # Find percentage of NaNs in for the stat and # of sources.
            n, m = proj_df.shape
            pct_nan[i] = np.sum(np.sum(proj_df.isnull())) / (n * m)
            n_sources[i] = m
            x_ticks.append(f'{pos} - {stat}')
            i += 1
    
    # Creat figure.
    fig, ax = plt.subplots(1, 1, figsize=[12, 6])
    x = np.arange(n_stats)
    ax.bar(x, n_sources, color='steelblue', alpha=0.7)
    ax.bar(x, n_sources*pct_nan, color='firebrick', alpha=0.9)
    for xt, yt, s in zip(x, n_sources*pct_nan, pct_nan):
        ax.text(xt, yt+0.05, f'{s:.1%}', color='firebrick', ha='center',
                fontsize=7)
    ax.set_ylabel('Number of Sources')
    ax.set_xlabel('Projected Stat')
    plt.yticks(np.arange(max(n_sources)+1))
    plt.xticks(x, x_ticks, rotation=45, ha='right')
    ax.xaxis.grid(False)
    
    # Save figure.
    if save:
        fid = 'nonessential_missing_data'
        eu.save_fig(fid, dir=fdir)
        cap = 'Number of sources collected for each nonessential stat (blue). '
        cap += 'Red  indicates percentage of missing data for each '
        cap += 'respective stat.'
        eu.latex_figure(fid, dir=fdir, width=0.95, caption=cap)
Ejemplo n.º 5
0
def plot_stat_hists(fdir='../figures', save=True):
    """
    Plot all essential stat histograms for the appendix and
    an example histograms figure for the main body of the paper.
    """
    # Load data.
    positions = 'QB RB WR TE DST'.split()
    stats = {pos: TrainProjections(pos) for pos in positions}
    for pos in positions:
        stats[pos].load_data(weeks=range(1, 18))
    
    
    # Plot all no threshold histograms for appendix.
    no_thresh_fids = {pos: f'no_threshold_hist_{pos}' for pos in positions}
    for pos in positions:
        n = len(stats[pos].essential_stats)
        fig, axes = plt.subplots(n, 1, figsize=[6, 2.5*n])
        for stat, ax in zip(stats[pos].essential_stats, axes.flat):
            stats[pos].plot_projection_hist(
                stat, bins=None, threshold=False, ax=ax)
        plt.tight_layout()
        if save:
            eu.save_fig(no_thresh_fids[pos], dir=fdir)
        else:
            plt.show()
            
    # Plot all threshold histograms for appendix.
    thresh_fids = {pos: f'threshold_hist_{pos}' for pos in positions}
    for pos in positions:
        n = len(stats[pos].essential_stats)
        fig, axes = plt.subplots(n, 1, figsize=[6, 2.5*n])
        for stat, ax in zip(stats[pos].essential_stats, axes.flat):
            stats[pos].plot_projection_hist(
                stat, bins=None, threshold=True, ax=ax)
        plt.tight_layout()
        if save:
            eu.save_fig(thresh_fids[pos], dir=fdir)
        else:
            plt.show()
    
    # Create LaTeX code to plot figures.
    if save:
        for pos in positions:
            cap = 'Essential stat raw histograms and thresholded histograms '
            cap += f'for {pos}.'
            
            eu.latex_figure(
                fids=[no_thresh_fids[pos], thresh_fids[pos]],
                dir=fdir,
                subcaptions=['Raw histogram with threshold (red).',
                             'Histogram above threshold.'],
                caption=cap,
                width=0.9,
                )
            print()
            print('\pagebreak')
    
    # Plot raw histograms for example stats.
    fids = ['no_theshold_example_hists', 'no_theshold_example_hists_RB']
    fig, axes = plt.subplots(2, 1, figsize=[6, 9])
    for pos, stat, ax in zip(['QB', 'RB'], ['Pass Yds', 'Rush Yds'], axes.flat):
        stats[pos].plot_projection_hist(
                stat, bins=None, threshold=False, ax=ax)
    plt.tight_layout()
    if save:
        eu.save_fig(fids[0], dir=fdir)
    else:
        plt.show()
        
    # Plot thresholded histograms for example stats.
    fids = ['no_theshold_example_hists', 'no_theshold_example_hists_RB']
    fig, axes = plt.subplots(2, 1, figsize=[6, 9])
    for pos, stat, ax in zip(['QB', 'RB'], ['Pass Yds', 'Rush Yds'], axes.flat):
        stats[pos].plot_projection_hist(
            stat, bins=None, threshold=True, ax=ax)
    plt.tight_layout()
    if save:
        eu.save_fig(fids[1], dir=fdir)
    else:
        plt.show()
            
    # Create LaTeX code to plot example stats figure.
    if save:
        print('\n\n\n')
        cap = 'Example raw and thresholded histograms for QB passing yards '
        cap += f'and RB rushing yards.'
        eu.latex_figure(
            fids=fids,
            dir=fdir,
            subcaptions=['Raw histogram with threshold (red).',
                         'Histogram above threshold.'],
            caption=cap,
            width=0.9,
            )
Ejemplo n.º 6
0
    ax.set_xlabel('Projected Points')
    plt.ylim(n_players+2, -2)
    plt.tight_layout()
    
# %%
for i in range(1, 6):
    plot_weekly_pos_projections('TE', i)
    plt.show()

# %%
pos = 'TE'
week = 17

plot_weekly_pos_projections(pos, week, figsize=(12, 8))
# plt.show()
eu.save_fig(f'{pos}_{week}', dir=fdir)
# %%

# %%
class CompareModels:
    """
    Load data for model comparison and compare MAE values across all
    models for given stat.
    
    Parameters
    ----------
    stats: {'essential', 'essential_below_thresh', 'non_essential'}
        Stats to compare.
    period: {'weekly', 'season'}
        Season or weekly models.
    subset: {'train', 'cv', 'test'}
def main():
    n_sims = 50
    essential_stats = False
    impute_methods = [
        # 'BiScaler',
        'IterativeImpute',
        # 'IterativeSVD',
        'KNN',
        # 'MatrixFactorization',
        'Mean',
        'Median',
        # 'NuclearNorm',
        'SoftImpute',
    ]

    if essential_stats:
        impute_stats = {
            'QB': ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD'],
            'RB': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'],
            'WR': ['Rush Yds', 'Rush TD', 'Receptions', 'Rec Yds', 'Rec TD'],
            'TE': ['Receptions', 'Rec Yds', 'Rec TD'],
            'DST': ['PA', 'YdA', 'TD', 'Sack', 'Int', 'Fum Rec'],
        }
        fid_MAE = '../figures/impute_MAE'
        fid_RMSE = '../figures/impute_RMSE'
    else:
        impute_stats = {
            'QB': ['Receptions', 'Rec Yds', 'Rec TD', '2PT'],
            'RB': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'],
            'WR': ['Pass Yds', 'Pass TD', 'Pass Int', '2PT'],
            'TE':
            ['Pass Yds', 'Pass TD', 'Pass Int', 'Rush Yds', 'Rush TD', '2PT'],
            'DST': ['Saf', 'Blk'],
        }
        fid_MAE = '../figures/nonessential_impute_MAE'
        fid_RMSE = '../figures/nonessential_impute_RMSE'

    j = 0
    x_ticks = []
    n_stats = sum(len(stats) for stats in impute_stats.values())
    mae = np.zeros([len(impute_methods), n_stats])
    rmse = np.zeros([len(impute_methods), n_stats])
    with tqdm(total=n_stats) as pbar:
        for pos in impute_stats.keys():
            proj = TrainProjections(pos)
            proj.load_data(weeks=range(1, 18), impute_method=False)
            for stat in impute_stats[pos]:
                df = proj._stats_df[stat]

                # Subset DataFrame to only include only projection columns.
                ignored_cols = ['Player', 'Team', 'Pos', 'Week', 'STATS']
                impute_cols = [c for c in list(df) if c not in ignored_cols]
                proj_df = df[impute_cols].copy()

                # Find percentage of NaNs in for the stat.
                n, m = proj_df.shape
                nan_pct = np.sum(np.sum(proj_df.isnull())) / (n * m)

                # Remove rows with NaNs.
                proj_df.dropna(how='any', axis=0, inplace=True)

                print(f'\n{pos} - {stat}\n')
                # Get average RMSE and MAE for each imputation method.
                for i, method in enumerate(impute_methods):
                    try:
                        rmse[i, j], mae[i, j] = simulate_imputing(
                            proj_df.copy(), method, n_sims, nan_pct)
                    except ValueError:
                        pass
                x_ticks.append(f'{pos} - {stat}')
                j += 1
                pbar.update(1)

    # %%
    mae_df = pd.DataFrame(mae, columns=x_ticks, index=impute_methods)
    mae_df = (mae_df - mae_df.mean()) / mae_df.std()
    plot_heatmap(mae_df, cbar_label='Normalized MAE')
    eu.save_fig(fid_MAE)
    plt.show()

    # %%
    rmse_df = pd.DataFrame(rmse, columns=x_ticks, index=impute_methods)
    rmse_df = (rmse_df - rmse_df.mean()) / rmse_df.std()
    plot_heatmap(rmse_df, cbar_label='Normalized RMSE')
    eu.save_fig(fid_RMSE)
    plt.show()