예제 #1
0
def get_latest_records(auth_key,
                       zip3_loc_data,
                       use_grades=['A', 'B', 'C', 'D', 'E', 'F']):
    header = {'Authorization': auth_key, 'Content-Type': 'application/json'}
    apiVersion = 'v1'
    loanListURL = 'https://api.lendingclub.com/api/investor/' + apiVersion + \
            '/loans/listing'
    payload = {'showAll': 'true'}
    resp = requests.get(loanListURL, headers=header, params=payload)
    loans = resp.json()['loans']
    '''Make a list of tuples specifying the name of each data column to pull, and 
    a function to use to grab that piece of data from the raw loan data'''
    record_map = (
        ('acc_now_delinq', lambda x: x['accNowDelinq']),
        ('annual_inc', lambda x: x['annualInc']),
        ('collections_12_mths_ex_med', lambda x: x['collections12MthsExMed']),
        ('cr_line_dur', lambda x: LCL.get_cr_line_dur(x['earliestCrLine'])),
        ('delinq_2yrs', lambda x: x['delinq2Yrs']),
        ('desc_length', lambda x: LCL.get_desc_length(x['desc'])),
        ('dti', lambda x: x['dti']),
        ('emp_length', lambda x: LCL.get_emp_length(x['empLength'])
         ),  #convert to years from months
        ('id', lambda x: x['id']),
        #            ('funded_amnt', lambda x: x['loanAmount']), #use amount requested rather than funded amnt!
        ('loan_amnt', lambda x: x['loanAmount']
         ),  #use amount requested rather than funded amnt!
        ('inq_last_6mths', lambda x: x['inqLast6Mths']),
        ('int_rate', lambda x: x['intRate']),
        ('mths_since_last_delinq',
         lambda x: LCL.get_mnths_since(x['mthsSinceLastDelinq'])),
        ('mths_since_last_major_derog',
         lambda x: LCL.get_mnths_since(x['mthsSinceLastMajorDerog'])),
        ('mths_since_last_record',
         lambda x: LCL.get_mnths_since(x['mthsSinceLastRecord'])),
        ('num_add_desc', lambda x: LCL.get_num_descs(x['desc'])),
        ('open_acc', lambda x: x['openAcc']),
        ('pub_rec', lambda x: x['pubRec']),
        ('revol_bal', lambda x: x['revolBal']),
        ('revol_util', lambda x: x['revolUtil']),
        ('term', lambda x: x['term']),
        ('total_acc', lambda x: x['totalAcc']),
        ('tot_cur_bal', lambda x: x['totCurBal']),
        ('addr_state', lambda x: x['addrState']),
        ('home_ownership', lambda x: x['homeOwnership']),
        ('grade', lambda x: x['grade']),
        ('purpose', lambda x: x['purpose']),
        ('latitude',
         lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'latitude')),
        ('longitude',
         lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'longitude')))

    records = [make_record(loan, record_map) for loan in loans]
    records = [record for record in records if record['grade'] in use_grades]
    return records
예제 #2
0
def get_latest_records(auth_key, zip3_loc_data, use_grades = ['A','B','C','D','E','F']):
    header = {'Authorization' : auth_key, 
              'Content-Type': 'application/json'}
    apiVersion = 'v1'
    loanListURL = 'https://api.lendingclub.com/api/investor/' + apiVersion + \
            '/loans/listing'
    payload = {'showAll' : 'true'}
    resp = requests.get(loanListURL, headers=header, params=payload)
    loans = resp.json()['loans']
        
    '''Make a list of tuples specifying the name of each data column to pull, and 
    a function to use to grab that piece of data from the raw loan data'''
    record_map = (('acc_now_delinq', lambda x: x['accNowDelinq']),
                ('annual_inc', lambda x: x['annualInc']),
                ('collections_12_mths_ex_med', lambda x: x['collections12MthsExMed']),
                ('cr_line_dur', lambda x: LCL.get_cr_line_dur(x['earliestCrLine'])),
                ('delinq_2yrs', lambda x: x['delinq2Yrs']),
                ('desc_length', lambda x: LCL.get_desc_length(x['desc'])),
                ('dti', lambda x: x['dti']),
                ('emp_length', lambda x: LCL.get_emp_length(x['empLength'])), #convert to years from months
                ('id', lambda x: x['id']),
    #            ('funded_amnt', lambda x: x['loanAmount']), #use amount requested rather than funded amnt!
                ('loan_amnt', lambda x: x['loanAmount']), #use amount requested rather than funded amnt!
                ('inq_last_6mths', lambda x: x['inqLast6Mths']), 
                ('int_rate', lambda x: x['intRate']), 
                ('mths_since_last_delinq', lambda x: LCL.get_mnths_since(x['mthsSinceLastDelinq'])), 
                ('mths_since_last_major_derog', lambda x: LCL.get_mnths_since(x['mthsSinceLastMajorDerog'])), 
                ('mths_since_last_record', lambda x: LCL.get_mnths_since(x['mthsSinceLastRecord'])), 
                ('num_add_desc', lambda x: LCL.get_num_descs(x['desc'])),
                ('open_acc', lambda x: x['openAcc']),
                ('pub_rec', lambda x: x['pubRec']),
                ('revol_bal', lambda x: x['revolBal']),
                ('revol_util', lambda x: x['revolUtil']),
                ('term', lambda x: x['term']),
                ('total_acc', lambda x: x['totalAcc']),
                ('tot_cur_bal', lambda x: x['totCurBal']),
                ('addr_state', lambda x: x['addrState']),
                ('home_ownership', lambda x: x['homeOwnership']),
                ('grade', lambda x: x['grade']),
                ('purpose', lambda x: x['purpose']),
                ('latitude', lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'latitude')),
                ('longitude', lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'longitude')))
    
    records = [make_record(loan, record_map) for loan in loans]
    records = [record for record in records if record['grade'] in use_grades]
    return records
예제 #3
0
                  'LoanStats3b.csv': '2012-2013',
                  'LoanStats3c.csv': '2013-2014',
                  'LoanStats3d.csv': '2015'}
"""

load_files = ['LoanStats3a.csv','LoanStats3b.csv','LoanStats3c.csv','LoanStats3d.csv'] 

#list of loan statuses to keep
keep_status = ['Current','Fully Paid','Late (16-30 days)','Late (31-120 days)',
               'Default','Charged Off','In Grace Period'] 

keep_terms = [36, 60] #list of loan terms to keep (36,60)
keep_grades = ['A','B','C','D','E','F'] #list of loan grades to keep [A-G]
load_files = [data_dir + file for file in load_files]

LD = LCL.load_lending_data(load_files,keep_status,keep_terms,keep_grades)

print('loaded {0} loans'.format(len(LD)))

print_figs = False

#%%
#load long/lat data for each zip-code
zip3_data = LCL.load_location_data(data_dir,group_by='zip3') 
LD['zip3'] = LD['zip3'].astype(int)       
LD = pd.merge(LD, zip3_data, how='inner', left_on='zip3', right_index=True)

#%% Compute hazard functions for each loan grade and term 
term_bandwidths = [4., 8.] #list of NAF smoothing bandwidth (for each term)
naf = NelsonAalenFitter(nelson_aalen_smoothing=False) #init NAF model
예제 #4
0
import cairosvg
from scipy.spatial import KDTree  # for finding KNN
import scipy 
import os

base_dir = '/Users/james/Data_Incubator/loan-picker'
#base_dir = os.path.dirname(os.path.realpath(__file__))
data_dir = os.path.join(base_dir,'static/data/')
fig_dir = os.path.join(base_dir,'static/images/')
movie_dir = os.path.join(base_dir,'static/movies/')

data_name = 'all_loans_proc'
LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d'])

fips_data = LCL.load_location_data(data_dir, group_by='fips')
zip3_data = LCL.load_location_data(data_dir, group_by='zip3')
fips_to_zip = LCL.make_fips_to_zip_dict(data_dir, group_by='zip')
        
#%% make a k-tree for doing nearest neighbor imputation of missing data
base_map = LCL.load_base_map(fig_dir + 'USA_Counties_text.svg', ax_xml=True)
(county_paths,state_paths) = LCL.get_map_paths(base_map,fips_to_zip)
title_path = base_map.findAll('text')[0]
map_coords = LCH.extract_fips_coords(county_paths)
ktree = KDTree(map_coords.values) #make nearest neighbor tree

#%% make sequence of decision trees and build a movie
X = LD[['longitude','latitude']]
y = LD['ROI'] #plot average return by area, not portfolio return

max_levels = 16
예제 #5
0
load_files = [
    'LoanStats3a.csv', 'LoanStats3b.csv', 'LoanStats3c.csv', 'LoanStats3d.csv'
]

#list of loan statuses to keep
keep_status = [
    'Current', 'Fully Paid', 'Late (16-30 days)', 'Late (31-120 days)',
    'Default', 'Charged Off', 'In Grace Period'
]

keep_terms = [36, 60]  #list of loan terms to keep (36,60)
keep_grades = ['A', 'B', 'C', 'D', 'E',
               'F']  #list of loan grades to keep [A-G]
load_files = [data_dir + file for file in load_files]

LD = LCL.load_lending_data(load_files, keep_status, keep_terms, keep_grades)

print('loaded {0} loans'.format(len(LD)))

print_figs = False

#%%
#load long/lat data for each zip-code
zip3_data = LCL.load_location_data(data_dir, group_by='zip3')
LD['zip3'] = LD['zip3'].astype(int)
LD = pd.merge(LD, zip3_data, how='inner', left_on='zip3', right_index=True)

#%% Compute hazard functions for each loan grade and term
term_bandwidths = [4., 8.]  #list of NAF smoothing bandwidth (for each term)
naf = NelsonAalenFitter(nelson_aalen_smoothing=False)  #init NAF model
예제 #6
0
def reset_map():
    print('resetting map')
    app.base_map = LCL.load_base_map(fig_dir + map_name)
    (app.county_paths,app.state_paths) = LCL.get_map_paths(app.base_map,fips_to_zip)
    return redirect('/loan_mapping') 
예제 #7
0
#load lookup tables for converting zips and states to county FIPS codes for plotting
fips_to_zip = dill.load(open(data_dir + 'fips_to_zip.p', "rb" ) )
state_fips_dict = dill.load(open(data_dir + 'state_fips_dict.p',"rb"))

#get lat/long coordinates for each 3-digit zip
zip3_loc_path = os.path.join(data_dir,'zip3_loc_data.p')
with open(zip3_loc_path,'rb') as in_strm:
    zip3_loc_data = dill.load(in_strm)       

# precompute additional columns for convenience when plotting
LD['short_purpose'] = LD['purpose'].map(purpose_map)
LD['issue_year'] = LD['issue_d'].dt.year

# load base map and get state and county paths
app.base_map = LCL.load_base_map(fig_dir + map_name)
(app.county_paths,app.state_paths) = LCL.get_map_paths(app.base_map,fips_to_zip)

predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type'])
model_data = LCP.load_pickled_models()
sim_lookup = LCP.get_validation_data()

#%%
use_grades = ['A','B','C','D','E','F']
load_time = time.time()
print('Grabbing loan data at {}'.format(load_time))
predictions = LCP.get_LC_loans(auth_keys['LC_auth_key'], model_data,
                               zip3_loc_data, use_grades)
                               
#%%
@app.route('/') #redirect to index page
예제 #8
0
def reset_map():
    print('resetting map')
    app.base_map = LCL.load_base_map(fig_dir + map_name)
    (app.county_paths,
     app.state_paths) = LCL.get_map_paths(app.base_map, fips_to_zip)
    return redirect('/loan_mapping')
예제 #9
0
#load lookup tables for converting zips and states to county FIPS codes for plotting
fips_to_zip = dill.load(open(data_dir + 'fips_to_zip.p', "rb"))
state_fips_dict = dill.load(open(data_dir + 'state_fips_dict.p', "rb"))

#get lat/long coordinates for each 3-digit zip
zip3_loc_path = os.path.join(data_dir, 'zip3_loc_data.p')
with open(zip3_loc_path, 'rb') as in_strm:
    zip3_loc_data = dill.load(in_strm)

# precompute additional columns for convenience when plotting
LD['short_purpose'] = LD['purpose'].map(purpose_map)
LD['issue_year'] = LD['issue_d'].dt.year

# load base map and get state and county paths
app.base_map = LCL.load_base_map(fig_dir + map_name)
(app.county_paths,
 app.state_paths) = LCL.get_map_paths(app.base_map, fips_to_zip)

predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type'])
model_data = LCP.load_pickled_models()
sim_lookup = LCP.get_validation_data()

#%%
use_grades = ['A', 'B', 'C', 'D', 'E', 'F']
load_time = time.time()
print('Grabbing loan data at {}'.format(load_time))
predictions = LCP.get_LC_loans(auth_keys['LC_auth_key'], model_data,
                               zip3_loc_data, use_grades)