def make_return_dist_fig(sim_lookup, predictions, pick_K=100, n_bins=200, n_boots=5000):

    sim_net = sim_lookup['net_ret'].values
    sim_weights = sim_lookup['weights'].values

    bin_locs = np.linspace(0, 100, n_bins)[::-1]
    bins = np.percentile(sim_lookup['pred'].values, bin_locs)
    
    sim_samps_per_bin = len(sim_lookup)/float(n_bins)
    pred_bins = np.digitize(predictions['returns'] / 100., bins) #find bins of first max_K points in prediction
    
    sim_returns = np.zeros(n_boots)
    boot_samps = sim_samps_per_bin*pred_bins[:pick_K] + np.random.randint(0, sim_samps_per_bin, size=(n_boots, pick_K))
    boot_samps = boot_samps.astype(int)
    sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum(sim_weights[boot_samps], axis=1)                
    sim_returns = LCM.annualize_returns(sim_returns)
    
    fig,ax=plt.subplots(figsize=(5.0,4.0))
    sns.distplot(sim_returns,bins=100, hist=False, rug=False,
                 ax=ax, kde_kws={'color':'k','lw':3})
    plt.xlabel('Annual returns (%)',fontsize=14)
    plt.ylabel('Probability',fontsize=14)
    plt.title('Estimated portfolio returns', fontsize=18)
    plt.tick_params(axis='both', which='major', labelsize=10)
    plt.margins(.01, .01)   
    plt.tight_layout()
    return fig
def make_return_dist_fig(sim_lookup,
                         predictions,
                         pick_K=100,
                         n_bins=200,
                         n_boots=5000):

    sim_net = sim_lookup['net_ret'].values
    sim_weights = sim_lookup['weights'].values

    bin_locs = np.linspace(0, 100, n_bins)[::-1]
    bins = np.percentile(sim_lookup['pred'].values, bin_locs)

    sim_samps_per_bin = len(sim_lookup) / float(n_bins)
    pred_bins = np.digitize(
        predictions['returns'] / 100.,
        bins)  #find bins of first max_K points in prediction

    sim_returns = np.zeros(n_boots)
    boot_samps = sim_samps_per_bin * pred_bins[:pick_K] + np.random.randint(
        0, sim_samps_per_bin, size=(n_boots, pick_K))
    boot_samps = boot_samps.astype(int)
    sim_returns = np.sum(sim_net[boot_samps], axis=1) / np.sum(
        sim_weights[boot_samps], axis=1)
    sim_returns = LCM.annualize_returns(sim_returns)

    fig, ax = plt.subplots(figsize=(5.0, 4.0))
    sns.distplot(sim_returns,
                 bins=100,
                 hist=False,
                 rug=False,
                 ax=ax,
                 kde_kws={
                     'color': 'k',
                     'lw': 3
                 })
    plt.xlabel('Annual returns (%)', fontsize=14)
    plt.ylabel('Probability', fontsize=14)
    plt.title('Estimated portfolio returns', fontsize=18)
    plt.tick_params(axis='both', which='major', labelsize=10)
    plt.margins(.01, .01)
    plt.tight_layout()
    return fig
#%%
#load data 
data_name = 'all_loans_proc'
LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d',])

#%% Set up list of predictors and their properties
'''Store info for each predictor as a named tuple containing the col-name within
the pandas dataframe, the full_name (human readable), and the type of normalization
to apply to that feature.'''
predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type'])

#dict to create transformers for each specified type
transformer_map = {'minMax':MinMaxScaler(),
                   'maxAbs':MaxAbsScaler(),
                   'standScal':StandardScaler(),
                   'log_minmax': LCM.log_minmax(),
                   'robScal':RobustScaler()
                   }

predictors = [
            predictor('acc_now_delinq','num delinq accounts','maxAbs'),
            predictor('annual_inc','annual income','log_minmax'),
            predictor('collections_12_mths_ex_med','num recent collections','maxAbs'),
            predictor('cr_line_dur', 'duration cred line','standScal'),
            predictor('delinq_2yrs', 'num recent delinq','maxAbs'),
            predictor('desc_length', 'loan desc length','maxAbs'),
            predictor('dti', 'debt-income ratio','standScal'),
            predictor('emp_length', 'employment length','maxAbs'),
            predictor('funded_amnt','loan amount','maxAbs'),
            predictor('inq_last_6mths', 'num recent inqs','maxAbs'),
            predictor('int_rate', 'interest rate','maxAbs'),
Beispiel #4
0
#%%
#load data 
data_name = 'all_loans_proc'
LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d',])

#%%
'''Store info for each predictor as a named tuple containing the col-name within
the pandas dataframe, the full_name (human readable), and the type of normalization
to apply to that feature.'''
predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type'])

transformer_map = {'minMax':MinMaxScaler(),
                   'maxAbs':MaxAbsScaler(),
                   'standScal':StandardScaler(),
                   'log_minmax': LCM.log_minmax(),
                   'robScal':RobustScaler()}

predictors = [
            predictor('acc_now_delinq','num delinq accounts','maxAbs'),
            predictor('annual_inc','annual income','log_minmax'),
            predictor('collections_12_mths_ex_med','num recent collections','maxAbs'),
            predictor('cr_line_dur', 'duration cred line','standScal'),
            predictor('delinq_2yrs', 'num recent delinq','maxAbs'),
            predictor('desc_length', 'loan desc length','maxAbs'),
            predictor('dti', 'debt-income ratio','standScal'),
            predictor('emp_length', 'employment length','maxAbs'),
#            predictor('funded_amnt','loan amount','maxAbs'),
            predictor('loan_amnt','loan amount','maxAbs'),
            predictor('inq_last_6mths', 'num recent inqs','maxAbs'),
            predictor('int_rate', 'interest rate','maxAbs'),
Beispiel #5
0
LD = pd.read_csv(data_dir + data_name, parse_dates=[
    'issue_d',
])

#%% Set up list of predictors and their properties
'''Store info for each predictor as a named tuple containing the col-name within
the pandas dataframe, the full_name (human readable), and the type of normalization
to apply to that feature.'''
predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type'])

#dict to create transformers for each specified type
transformer_map = {
    'minMax': MinMaxScaler(),
    'maxAbs': MaxAbsScaler(),
    'standScal': StandardScaler(),
    'log_minmax': LCM.log_minmax(),
    'robScal': RobustScaler()
}

predictors = [
    predictor('acc_now_delinq', 'num delinq accounts', 'maxAbs'),
    predictor('annual_inc', 'annual income', 'log_minmax'),
    predictor('collections_12_mths_ex_med', 'num recent collections',
              'maxAbs'),
    predictor('cr_line_dur', 'duration cred line', 'standScal'),
    predictor('delinq_2yrs', 'num recent delinq', 'maxAbs'),
    predictor('desc_length', 'loan desc length', 'maxAbs'),
    predictor('dti', 'debt-income ratio', 'standScal'),
    predictor('emp_length', 'employment length', 'maxAbs'),
    predictor('funded_amnt', 'loan amount', 'maxAbs'),
    predictor('inq_last_6mths', 'num recent inqs', 'maxAbs'),