def make_return_dist_fig(sim_lookup, predictions, pick_K=100, n_bins=200, n_boots=5000):

    sim_net = sim_lookup['net_ret'].values
    sim_weights = sim_lookup['weights'].values

    bin_locs = np.linspace(0, 100, n_bins)[::-1]
    bins = np.percentile(sim_lookup['pred'].values, bin_locs)
    
    sim_samps_per_bin = len(sim_lookup)/float(n_bins)
    pred_bins = np.digitize(predictions['returns'] / 100., bins) #find bins of first max_K points in prediction
    
    sim_returns = np.zeros(n_boots)
    boot_samps = sim_samps_per_bin*pred_bins[:pick_K] + np.random.randint(0, sim_samps_per_bin, size=(n_boots, pick_K))
    boot_samps = boot_samps.astype(int)
    sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum(sim_weights[boot_samps], axis=1)                
    sim_returns = LCM.annualize_returns(sim_returns)
    
    fig,ax=plt.subplots(figsize=(5.0,4.0))
    sns.distplot(sim_returns,bins=100, hist=False, rug=False,
                 ax=ax, kde_kws={'color':'k','lw':3})
    plt.xlabel('Annual returns (%)',fontsize=14)
    plt.ylabel('Probability',fontsize=14)
    plt.title('Estimated portfolio returns', fontsize=18)
    plt.tick_params(axis='both', which='major', labelsize=10)
    plt.margins(.01, .01)   
    plt.tight_layout()
    return fig
def make_return_dist_fig(sim_lookup,
                         predictions,
                         pick_K=100,
                         n_bins=200,
                         n_boots=5000):

    sim_net = sim_lookup['net_ret'].values
    sim_weights = sim_lookup['weights'].values

    bin_locs = np.linspace(0, 100, n_bins)[::-1]
    bins = np.percentile(sim_lookup['pred'].values, bin_locs)

    sim_samps_per_bin = len(sim_lookup) / float(n_bins)
    pred_bins = np.digitize(
        predictions['returns'] / 100.,
        bins)  #find bins of first max_K points in prediction

    sim_returns = np.zeros(n_boots)
    boot_samps = sim_samps_per_bin * pred_bins[:pick_K] + np.random.randint(
        0, sim_samps_per_bin, size=(n_boots, pick_K))
    boot_samps = boot_samps.astype(int)
    sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum(
        sim_weights[boot_samps], axis=1)
    sim_returns = LCM.annualize_returns(sim_returns)

    fig, ax = plt.subplots(figsize=(5.0, 4.0))
    sns.distplot(sim_returns,
                 bins=100,
                 hist=False,
                 rug=False,
                 ax=ax,
                 kde_kws={
                     'color': 'k',
                     'lw': 3
                 })
    plt.xlabel('Annual returns (%)', fontsize=14)
    plt.ylabel('Probability', fontsize=14)
    plt.title('Estimated portfolio returns', fontsize=18)
    plt.tick_params(axis='both', which='major', labelsize=10)
    plt.margins(.01, .01)
    plt.tight_layout()
    return fig
  
     returns[name].append(LCM.pick_K_returns( 
                             test_pred, net_returns[test], prnc_weights[test],
                            pick_K_list, n_boots=100, sub_marg=False))
                            
     grade_returns[name].append(LCM.pick_K_returns_by_grade(
                             test_pred, net_returns[test], prnc_weights[test],
                            LD.iloc[test][grade_group], grade_pick_K))
                            
     grade_makeup[name][cnt,:] = LCM.get_choice_grade_makeup(test_pred, LD.iloc[test][grade_group], 
                                                             unique_grades, grade_pick_K)     
 cnt += 1

# Annualize portfolio returns, and convert them into numpy arrays as needed
rel_returns = {}
marg_returns = LCM.annualize_returns(np.array(marg_returns))
for name, model in model_set:
    returns[name] = LCM.annualize_returns(np.array(returns[name]))
    rel_returns[name] = returns[name] - marg_returns[:,np.newaxis,np.newaxis]  
    returns[name] = returns[name].reshape(-1, len(pick_K_list))
    rel_returns[name] = rel_returns[name].reshape(-1, len(pick_K_list))
    
    grade_returns[name] = LCM.annualize_returns(np.array(grade_returns[name]))
    grade_returns[name] = grade_returns[name].reshape(-1, len(unique_grades))


#%% PLOT RELATIVE FEATURE IMPORTANCES FOR FULL RF MODEL
feature_imp_df = pd.DataFrame(RF_feature_imp).T
feature_imp_df = feature_imp_df / feature_imp_df.apply(max)
feature_imp_df['avg'] = feature_imp_df.mean(axis=1)
feature_imp_df['sem'] = feature_imp_df.sem(axis=1)
Example #4
0
                               n_boots=100,
                               sub_marg=False))

        grade_returns[name].append(
            LCM.pick_K_returns_by_grade(test_pred, net_returns[test],
                                        prnc_weights[test],
                                        LD.iloc[test][grade_group],
                                        grade_pick_K))

        grade_makeup[name][cnt, :] = LCM.get_choice_grade_makeup(
            test_pred, LD.iloc[test][grade_group], unique_grades, grade_pick_K)
    cnt += 1

# Annualize portfolio returns, and convert them into numpy arrays as needed
rel_returns = {}
marg_returns = LCM.annualize_returns(np.array(marg_returns))
for name, model in model_set:
    returns[name] = LCM.annualize_returns(np.array(returns[name]))
    rel_returns[name] = returns[name] - marg_returns[:, np.newaxis, np.newaxis]
    returns[name] = returns[name].reshape(-1, len(pick_K_list))
    rel_returns[name] = rel_returns[name].reshape(-1, len(pick_K_list))

    grade_returns[name] = LCM.annualize_returns(np.array(grade_returns[name]))
    grade_returns[name] = grade_returns[name].reshape(-1, len(unique_grades))

#%% PLOT RELATIVE FEATURE IMPORTANCES FOR FULL RF MODEL
feature_imp_df = pd.DataFrame(RF_feature_imp).T
feature_imp_df = feature_imp_df / feature_imp_df.apply(max)
feature_imp_df['avg'] = feature_imp_df.mean(axis=1)
feature_imp_df['sem'] = feature_imp_df.sem(axis=1)
feature_imp_df.sort_values(by='avg', inplace=True)