def make_return_dist_fig(sim_lookup, predictions, pick_K=100, n_bins=200, n_boots=5000): sim_net = sim_lookup['net_ret'].values sim_weights = sim_lookup['weights'].values bin_locs = np.linspace(0, 100, n_bins)[::-1] bins = np.percentile(sim_lookup['pred'].values, bin_locs) sim_samps_per_bin = len(sim_lookup)/float(n_bins) pred_bins = np.digitize(predictions['returns'] / 100., bins) #find bins of first max_K points in prediction sim_returns = np.zeros(n_boots) boot_samps = sim_samps_per_bin*pred_bins[:pick_K] + np.random.randint(0, sim_samps_per_bin, size=(n_boots, pick_K)) boot_samps = boot_samps.astype(int) sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum(sim_weights[boot_samps], axis=1) sim_returns = LCM.annualize_returns(sim_returns) fig,ax=plt.subplots(figsize=(5.0,4.0)) sns.distplot(sim_returns,bins=100, hist=False, rug=False, ax=ax, kde_kws={'color':'k','lw':3}) plt.xlabel('Annual returns (%)',fontsize=14) plt.ylabel('Probability',fontsize=14) plt.title('Estimated portfolio returns', fontsize=18) plt.tick_params(axis='both', which='major', labelsize=10) plt.margins(.01, .01) plt.tight_layout() return fig
def make_return_dist_fig(sim_lookup, predictions, pick_K=100, n_bins=200, n_boots=5000): sim_net = sim_lookup['net_ret'].values sim_weights = sim_lookup['weights'].values bin_locs = np.linspace(0, 100, n_bins)[::-1] bins = np.percentile(sim_lookup['pred'].values, bin_locs) sim_samps_per_bin = len(sim_lookup) / float(n_bins) pred_bins = np.digitize( predictions['returns'] / 100., bins) #find bins of first max_K points in prediction sim_returns = np.zeros(n_boots) boot_samps = sim_samps_per_bin * pred_bins[:pick_K] + np.random.randint( 0, sim_samps_per_bin, size=(n_boots, pick_K)) boot_samps = boot_samps.astype(int) sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum( sim_weights[boot_samps], axis=1) sim_returns = LCM.annualize_returns(sim_returns) fig, ax = plt.subplots(figsize=(5.0, 4.0)) sns.distplot(sim_returns, bins=100, hist=False, rug=False, ax=ax, kde_kws={ 'color': 'k', 'lw': 3 }) plt.xlabel('Annual returns (%)', fontsize=14) plt.ylabel('Probability', fontsize=14) plt.title('Estimated portfolio returns', fontsize=18) plt.tick_params(axis='both', which='major', labelsize=10) plt.margins(.01, .01) plt.tight_layout() return fig
returns[name].append(LCM.pick_K_returns( test_pred, net_returns[test], prnc_weights[test], pick_K_list, n_boots=100, sub_marg=False)) grade_returns[name].append(LCM.pick_K_returns_by_grade( test_pred, net_returns[test], prnc_weights[test], LD.iloc[test][grade_group], grade_pick_K)) grade_makeup[name][cnt,:] = LCM.get_choice_grade_makeup(test_pred, LD.iloc[test][grade_group], unique_grades, grade_pick_K) cnt += 1 # Annualize portfolio returns, and convert them into numpy arrays as needed rel_returns = {} marg_returns = LCM.annualize_returns(np.array(marg_returns)) for name, model in model_set: returns[name] = LCM.annualize_returns(np.array(returns[name])) rel_returns[name] = returns[name] - marg_returns[:,np.newaxis,np.newaxis] returns[name] = returns[name].reshape(-1, len(pick_K_list)) rel_returns[name] = rel_returns[name].reshape(-1, len(pick_K_list)) grade_returns[name] = LCM.annualize_returns(np.array(grade_returns[name])) grade_returns[name] = grade_returns[name].reshape(-1, len(unique_grades)) #%% PLOT RELATIVE FEATURE IMPORTANCES FOR FULL RF MODEL feature_imp_df = pd.DataFrame(RF_feature_imp).T feature_imp_df = feature_imp_df / feature_imp_df.apply(max) feature_imp_df['avg'] = feature_imp_df.mean(axis=1) feature_imp_df['sem'] = feature_imp_df.sem(axis=1)
n_boots=100, sub_marg=False)) grade_returns[name].append( LCM.pick_K_returns_by_grade(test_pred, net_returns[test], prnc_weights[test], LD.iloc[test][grade_group], grade_pick_K)) grade_makeup[name][cnt, :] = LCM.get_choice_grade_makeup( test_pred, LD.iloc[test][grade_group], unique_grades, grade_pick_K) cnt += 1 # Annualize portfolio returns, and convert them into numpy arrays as needed rel_returns = {} marg_returns = LCM.annualize_returns(np.array(marg_returns)) for name, model in model_set: returns[name] = LCM.annualize_returns(np.array(returns[name])) rel_returns[name] = returns[name] - marg_returns[:, np.newaxis, np.newaxis] returns[name] = returns[name].reshape(-1, len(pick_K_list)) rel_returns[name] = rel_returns[name].reshape(-1, len(pick_K_list)) grade_returns[name] = LCM.annualize_returns(np.array(grade_returns[name])) grade_returns[name] = grade_returns[name].reshape(-1, len(unique_grades)) #%% PLOT RELATIVE FEATURE IMPORTANCES FOR FULL RF MODEL feature_imp_df = pd.DataFrame(RF_feature_imp).T feature_imp_df = feature_imp_df / feature_imp_df.apply(max) feature_imp_df['avg'] = feature_imp_df.mean(axis=1) feature_imp_df['sem'] = feature_imp_df.sem(axis=1) feature_imp_df.sort_values(by='avg', inplace=True)