def loan_mapping(map_rendered=False): mform = map_form(request.form) if request.method == 'POST' and mform.validate( ): #if user is posting, get form data and store it map_rendered = True # compute desired group-by/agg operation data = LCH.compute_group_avgs(LD, mform.data['col_name'], mform.data['grouping_var'], mform.data['agg_fun'], state_fips_dict=state_fips_dict, min_counts=50) data.name = mform.data['col_name'] if mform.data['agg_fun'] == 'count': data.name = 'counts' # paint base map by county pal = LCH.paint_map(data, app.base_map, app.county_paths, fips_to_zip, color='cube', name_legend_map=name_legend_map, agg_fun=mform.data['agg_fun']) # save colorbar for map as a png plt.savefig(fig_dir + 'map_cbar.png', dpi=500, format='png') plt.close() return render_template('loan_mapping.html', map_form=mform, svg=Markup(str(app.base_map)), rnum=np.random.randint(0, 100000), map_rendered=map_rendered)
def loan_mapping(map_rendered=False): mform = map_form(request.form) if request.method == 'POST' and mform.validate(): #if user is posting, get form data and store it map_rendered = True # compute desired group-by/agg operation data = LCH.compute_group_avgs(LD, mform.data['col_name'], mform.data['grouping_var'], mform.data['agg_fun'], state_fips_dict=state_fips_dict, min_counts=50) data.name = mform.data['col_name'] if mform.data['agg_fun'] == 'count': data.name = 'counts' # paint base map by county pal = LCH.paint_map(data, app.base_map, app.county_paths, fips_to_zip, color='cube', name_legend_map=name_legend_map, agg_fun=mform.data['agg_fun']) # save colorbar for map as a png plt.savefig(fig_dir + 'map_cbar.png', dpi=500, format='png') plt.close() return render_template('loan_mapping.html', map_form=mform, svg=Markup(str(app.base_map)), rnum=np.random.randint(0,100000), map_rendered=map_rendered)
data_dir = os.path.join(base_dir,'static/data/') fig_dir = os.path.join(base_dir,'static/images/') movie_dir = os.path.join(base_dir,'static/movies/') data_name = 'all_loans_proc' LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d']) fips_data = LCL.load_location_data(data_dir, group_by='fips') zip3_data = LCL.load_location_data(data_dir, group_by='zip3') fips_to_zip = LCL.make_fips_to_zip_dict(data_dir, group_by='zip') #%% make a k-tree for doing nearest neighbor imputation of missing data base_map = LCL.load_base_map(fig_dir + 'USA_Counties_text.svg', ax_xml=True) (county_paths,state_paths) = LCL.get_map_paths(base_map,fips_to_zip) title_path = base_map.findAll('text')[0] map_coords = LCH.extract_fips_coords(county_paths) ktree = KDTree(map_coords.values) #make nearest neighbor tree #%% make sequence of decision trees and build a movie X = LD[['longitude','latitude']] y = LD['ROI'] #plot average return by area, not portfolio return max_levels = 16 min_samples_leaf = 50 pred_arr = np.zeros((len(fips_data),max_levels)) for n in xrange(max_levels): clf = tree.DecisionTreeRegressor(max_depth=n+1, min_samples_leaf=min_samples_leaf, random_state=0) clf.fit(X, y) pred_arr[:,n] = clf.predict(fips_data[['longitude','latitude']].values) #%% generate pngs for reg-tree at each depth value
all_hazards[term] = np.zeros((len(keep_grades),term+1)) #initialize matrix of hazard functions for gidx,grade in enumerate(keep_grades): #fit model for each grade grade_data = cur_data.grade == grade naf.fit(lifetimes[grade_data],event_observed=is_observed[grade_data],label=grade,timeline=np.arange(term+1)) all_hazards[term][gidx,:] = naf.smoothed_hazard_(term_bandwidths[idx]).squeeze() #%% terms = LD.term.unique() #set of unique loan terms for term in terms: #for each possible loan term #get relevant set of loans cur_loans = LD.term == term cur_LD = LD[cur_loans] (NAR, net_returns, p_csum) = LCH.get_NARs(cur_LD, term) LD.ix[cur_loans,'ROI'] = NAR #measured performance of each loan LD.ix[cur_loans,'net_returns'] = net_returns #principal weighted avg monthly returns LD.ix[cur_loans,'prnc_weight'] = p_csum #principal weighted avg monthly returns LD.ix[cur_loans,'default_prob'] = LD.ix[cur_loans,'is_observed'].astype(float) #principal weighted avg monthly returns (exp_NAR, tot_default_prob, exp_num_pymnts, exp_net_returns, exp_csum) = \ LCH.get_expected_NARs(cur_LD, term, all_hazards[term]) LD.ix[cur_loans,'ROI'] = exp_NAR LD.ix[cur_loans,'default_prob'] = tot_default_prob LD.ix[cur_loans,'exp_num_pymnts'] = exp_num_pymnts LD.ix[cur_loans,'net_returns'] = exp_net_returns LD.ix[cur_loans,'prnc_weight'] = exp_csum LD.ix[cur_loans, 'best_NAR'] = LCH.get_best_returns(cur_LD, term)
grade_data = cur_data.grade == grade naf.fit(lifetimes[grade_data], event_observed=is_observed[grade_data], label=grade, timeline=np.arange(term + 1)) all_hazards[term][gidx, :] = naf.smoothed_hazard_( term_bandwidths[idx]).squeeze() #%% terms = LD.term.unique() #set of unique loan terms for term in terms: #for each possible loan term #get relevant set of loans cur_loans = LD.term == term cur_LD = LD[cur_loans] (NAR, net_returns, p_csum) = LCH.get_NARs(cur_LD, term) LD.ix[cur_loans, 'ROI'] = NAR #measured performance of each loan LD.ix[cur_loans, 'net_returns'] = net_returns #principal weighted avg monthly returns LD.ix[cur_loans, 'prnc_weight'] = p_csum #principal weighted avg monthly returns LD.ix[cur_loans, 'default_prob'] = LD.ix[cur_loans, 'is_observed'].astype( float) #principal weighted avg monthly returns (exp_NAR, tot_default_prob, exp_num_pymnts, exp_net_returns, exp_csum) = \ LCH.get_expected_NARs(cur_LD, term, all_hazards[term]) LD.ix[cur_loans, 'ROI'] = exp_NAR LD.ix[cur_loans, 'default_prob'] = tot_default_prob LD.ix[cur_loans, 'exp_num_pymnts'] = exp_num_pymnts LD.ix[cur_loans, 'net_returns'] = exp_net_returns LD.ix[cur_loans, 'prnc_weight'] = exp_csum