def get_latest_records(auth_key, zip3_loc_data, use_grades=['A', 'B', 'C', 'D', 'E', 'F']): header = {'Authorization': auth_key, 'Content-Type': 'application/json'} apiVersion = 'v1' loanListURL = 'https://api.lendingclub.com/api/investor/' + apiVersion + \ '/loans/listing' payload = {'showAll': 'true'} resp = requests.get(loanListURL, headers=header, params=payload) loans = resp.json()['loans'] '''Make a list of tuples specifying the name of each data column to pull, and a function to use to grab that piece of data from the raw loan data''' record_map = ( ('acc_now_delinq', lambda x: x['accNowDelinq']), ('annual_inc', lambda x: x['annualInc']), ('collections_12_mths_ex_med', lambda x: x['collections12MthsExMed']), ('cr_line_dur', lambda x: LCL.get_cr_line_dur(x['earliestCrLine'])), ('delinq_2yrs', lambda x: x['delinq2Yrs']), ('desc_length', lambda x: LCL.get_desc_length(x['desc'])), ('dti', lambda x: x['dti']), ('emp_length', lambda x: LCL.get_emp_length(x['empLength']) ), #convert to years from months ('id', lambda x: x['id']), # ('funded_amnt', lambda x: x['loanAmount']), #use amount requested rather than funded amnt! ('loan_amnt', lambda x: x['loanAmount'] ), #use amount requested rather than funded amnt! ('inq_last_6mths', lambda x: x['inqLast6Mths']), ('int_rate', lambda x: x['intRate']), ('mths_since_last_delinq', lambda x: LCL.get_mnths_since(x['mthsSinceLastDelinq'])), ('mths_since_last_major_derog', lambda x: LCL.get_mnths_since(x['mthsSinceLastMajorDerog'])), ('mths_since_last_record', lambda x: LCL.get_mnths_since(x['mthsSinceLastRecord'])), ('num_add_desc', lambda x: LCL.get_num_descs(x['desc'])), ('open_acc', lambda x: x['openAcc']), ('pub_rec', lambda x: x['pubRec']), ('revol_bal', lambda x: x['revolBal']), ('revol_util', lambda x: x['revolUtil']), ('term', lambda x: x['term']), ('total_acc', lambda x: x['totalAcc']), ('tot_cur_bal', lambda x: x['totCurBal']), ('addr_state', lambda x: x['addrState']), ('home_ownership', lambda x: x['homeOwnership']), ('grade', lambda x: x['grade']), ('purpose', lambda x: x['purpose']), ('latitude', lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'latitude')), ('longitude', lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'longitude'))) records = [make_record(loan, record_map) for loan in loans] records = [record for record in records if record['grade'] in use_grades] return records
def get_latest_records(auth_key, zip3_loc_data, use_grades = ['A','B','C','D','E','F']): header = {'Authorization' : auth_key, 'Content-Type': 'application/json'} apiVersion = 'v1' loanListURL = 'https://api.lendingclub.com/api/investor/' + apiVersion + \ '/loans/listing' payload = {'showAll' : 'true'} resp = requests.get(loanListURL, headers=header, params=payload) loans = resp.json()['loans'] '''Make a list of tuples specifying the name of each data column to pull, and a function to use to grab that piece of data from the raw loan data''' record_map = (('acc_now_delinq', lambda x: x['accNowDelinq']), ('annual_inc', lambda x: x['annualInc']), ('collections_12_mths_ex_med', lambda x: x['collections12MthsExMed']), ('cr_line_dur', lambda x: LCL.get_cr_line_dur(x['earliestCrLine'])), ('delinq_2yrs', lambda x: x['delinq2Yrs']), ('desc_length', lambda x: LCL.get_desc_length(x['desc'])), ('dti', lambda x: x['dti']), ('emp_length', lambda x: LCL.get_emp_length(x['empLength'])), #convert to years from months ('id', lambda x: x['id']), # ('funded_amnt', lambda x: x['loanAmount']), #use amount requested rather than funded amnt! ('loan_amnt', lambda x: x['loanAmount']), #use amount requested rather than funded amnt! ('inq_last_6mths', lambda x: x['inqLast6Mths']), ('int_rate', lambda x: x['intRate']), ('mths_since_last_delinq', lambda x: LCL.get_mnths_since(x['mthsSinceLastDelinq'])), ('mths_since_last_major_derog', lambda x: LCL.get_mnths_since(x['mthsSinceLastMajorDerog'])), ('mths_since_last_record', lambda x: LCL.get_mnths_since(x['mthsSinceLastRecord'])), ('num_add_desc', lambda x: LCL.get_num_descs(x['desc'])), ('open_acc', lambda x: x['openAcc']), ('pub_rec', lambda x: x['pubRec']), ('revol_bal', lambda x: x['revolBal']), ('revol_util', lambda x: x['revolUtil']), ('term', lambda x: x['term']), ('total_acc', lambda x: x['totalAcc']), ('tot_cur_bal', lambda x: x['totCurBal']), ('addr_state', lambda x: x['addrState']), ('home_ownership', lambda x: x['homeOwnership']), ('grade', lambda x: x['grade']), ('purpose', lambda x: x['purpose']), ('latitude', lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'latitude')), ('longitude', lambda x: LCL.get_zip_loc(x['addrZip'], zip3_loc_data, 'longitude'))) records = [make_record(loan, record_map) for loan in loans] records = [record for record in records if record['grade'] in use_grades] return records
'LoanStats3b.csv': '2012-2013', 'LoanStats3c.csv': '2013-2014', 'LoanStats3d.csv': '2015'} """ load_files = ['LoanStats3a.csv','LoanStats3b.csv','LoanStats3c.csv','LoanStats3d.csv'] #list of loan statuses to keep keep_status = ['Current','Fully Paid','Late (16-30 days)','Late (31-120 days)', 'Default','Charged Off','In Grace Period'] keep_terms = [36, 60] #list of loan terms to keep (36,60) keep_grades = ['A','B','C','D','E','F'] #list of loan grades to keep [A-G] load_files = [data_dir + file for file in load_files] LD = LCL.load_lending_data(load_files,keep_status,keep_terms,keep_grades) print('loaded {0} loans'.format(len(LD))) print_figs = False #%% #load long/lat data for each zip-code zip3_data = LCL.load_location_data(data_dir,group_by='zip3') LD['zip3'] = LD['zip3'].astype(int) LD = pd.merge(LD, zip3_data, how='inner', left_on='zip3', right_index=True) #%% Compute hazard functions for each loan grade and term term_bandwidths = [4., 8.] #list of NAF smoothing bandwidth (for each term) naf = NelsonAalenFitter(nelson_aalen_smoothing=False) #init NAF model
import cairosvg from scipy.spatial import KDTree # for finding KNN import scipy import os base_dir = '/Users/james/Data_Incubator/loan-picker' #base_dir = os.path.dirname(os.path.realpath(__file__)) data_dir = os.path.join(base_dir,'static/data/') fig_dir = os.path.join(base_dir,'static/images/') movie_dir = os.path.join(base_dir,'static/movies/') data_name = 'all_loans_proc' LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d']) fips_data = LCL.load_location_data(data_dir, group_by='fips') zip3_data = LCL.load_location_data(data_dir, group_by='zip3') fips_to_zip = LCL.make_fips_to_zip_dict(data_dir, group_by='zip') #%% make a k-tree for doing nearest neighbor imputation of missing data base_map = LCL.load_base_map(fig_dir + 'USA_Counties_text.svg', ax_xml=True) (county_paths,state_paths) = LCL.get_map_paths(base_map,fips_to_zip) title_path = base_map.findAll('text')[0] map_coords = LCH.extract_fips_coords(county_paths) ktree = KDTree(map_coords.values) #make nearest neighbor tree #%% make sequence of decision trees and build a movie X = LD[['longitude','latitude']] y = LD['ROI'] #plot average return by area, not portfolio return max_levels = 16
load_files = [ 'LoanStats3a.csv', 'LoanStats3b.csv', 'LoanStats3c.csv', 'LoanStats3d.csv' ] #list of loan statuses to keep keep_status = [ 'Current', 'Fully Paid', 'Late (16-30 days)', 'Late (31-120 days)', 'Default', 'Charged Off', 'In Grace Period' ] keep_terms = [36, 60] #list of loan terms to keep (36,60) keep_grades = ['A', 'B', 'C', 'D', 'E', 'F'] #list of loan grades to keep [A-G] load_files = [data_dir + file for file in load_files] LD = LCL.load_lending_data(load_files, keep_status, keep_terms, keep_grades) print('loaded {0} loans'.format(len(LD))) print_figs = False #%% #load long/lat data for each zip-code zip3_data = LCL.load_location_data(data_dir, group_by='zip3') LD['zip3'] = LD['zip3'].astype(int) LD = pd.merge(LD, zip3_data, how='inner', left_on='zip3', right_index=True) #%% Compute hazard functions for each loan grade and term term_bandwidths = [4., 8.] #list of NAF smoothing bandwidth (for each term) naf = NelsonAalenFitter(nelson_aalen_smoothing=False) #init NAF model
def reset_map(): print('resetting map') app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths,app.state_paths) = LCL.get_map_paths(app.base_map,fips_to_zip) return redirect('/loan_mapping')
#load lookup tables for converting zips and states to county FIPS codes for plotting fips_to_zip = dill.load(open(data_dir + 'fips_to_zip.p', "rb" ) ) state_fips_dict = dill.load(open(data_dir + 'state_fips_dict.p',"rb")) #get lat/long coordinates for each 3-digit zip zip3_loc_path = os.path.join(data_dir,'zip3_loc_data.p') with open(zip3_loc_path,'rb') as in_strm: zip3_loc_data = dill.load(in_strm) # precompute additional columns for convenience when plotting LD['short_purpose'] = LD['purpose'].map(purpose_map) LD['issue_year'] = LD['issue_d'].dt.year # load base map and get state and county paths app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths,app.state_paths) = LCL.get_map_paths(app.base_map,fips_to_zip) predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) model_data = LCP.load_pickled_models() sim_lookup = LCP.get_validation_data() #%% use_grades = ['A','B','C','D','E','F'] load_time = time.time() print('Grabbing loan data at {}'.format(load_time)) predictions = LCP.get_LC_loans(auth_keys['LC_auth_key'], model_data, zip3_loc_data, use_grades) #%% @app.route('/') #redirect to index page
def reset_map(): print('resetting map') app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths, app.state_paths) = LCL.get_map_paths(app.base_map, fips_to_zip) return redirect('/loan_mapping')
#load lookup tables for converting zips and states to county FIPS codes for plotting fips_to_zip = dill.load(open(data_dir + 'fips_to_zip.p', "rb")) state_fips_dict = dill.load(open(data_dir + 'state_fips_dict.p', "rb")) #get lat/long coordinates for each 3-digit zip zip3_loc_path = os.path.join(data_dir, 'zip3_loc_data.p') with open(zip3_loc_path, 'rb') as in_strm: zip3_loc_data = dill.load(in_strm) # precompute additional columns for convenience when plotting LD['short_purpose'] = LD['purpose'].map(purpose_map) LD['issue_year'] = LD['issue_d'].dt.year # load base map and get state and county paths app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths, app.state_paths) = LCL.get_map_paths(app.base_map, fips_to_zip) predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) model_data = LCP.load_pickled_models() sim_lookup = LCP.get_validation_data() #%% use_grades = ['A', 'B', 'C', 'D', 'E', 'F'] load_time = time.time() print('Grabbing loan data at {}'.format(load_time)) predictions = LCP.get_LC_loans(auth_keys['LC_auth_key'], model_data, zip3_loc_data, use_grades)