base_dir = '/Users/james/Data_Incubator/loan-picker' #base_dir = os.path.dirname(os.path.realpath(__file__)) data_dir = os.path.join(base_dir,'static/data/') fig_dir = os.path.join(base_dir,'static/images/') movie_dir = os.path.join(base_dir,'static/movies/') data_name = 'all_loans_proc' LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d']) fips_data = LCL.load_location_data(data_dir, group_by='fips') zip3_data = LCL.load_location_data(data_dir, group_by='zip3') fips_to_zip = LCL.make_fips_to_zip_dict(data_dir, group_by='zip') #%% make a k-tree for doing nearest neighbor imputation of missing data base_map = LCL.load_base_map(fig_dir + 'USA_Counties_text.svg', ax_xml=True) (county_paths,state_paths) = LCL.get_map_paths(base_map,fips_to_zip) title_path = base_map.findAll('text')[0] map_coords = LCH.extract_fips_coords(county_paths) ktree = KDTree(map_coords.values) #make nearest neighbor tree #%% make sequence of decision trees and build a movie X = LD[['longitude','latitude']] y = LD['ROI'] #plot average return by area, not portfolio return max_levels = 16 min_samples_leaf = 50 pred_arr = np.zeros((len(fips_data),max_levels)) for n in xrange(max_levels): clf = tree.DecisionTreeRegressor(max_depth=n+1, min_samples_leaf=min_samples_leaf, random_state=0) clf.fit(X, y) pred_arr[:,n] = clf.predict(fips_data[['longitude','latitude']].values)
def reset_map(): print('resetting map') app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths,app.state_paths) = LCL.get_map_paths(app.base_map,fips_to_zip) return redirect('/loan_mapping')
def reset_map(): print('resetting map') app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths, app.state_paths) = LCL.get_map_paths(app.base_map, fips_to_zip) return redirect('/loan_mapping')
#load lookup tables for converting zips and states to county FIPS codes for plotting fips_to_zip = dill.load(open(data_dir + 'fips_to_zip.p', "rb" ) ) state_fips_dict = dill.load(open(data_dir + 'state_fips_dict.p',"rb")) #get lat/long coordinates for each 3-digit zip zip3_loc_path = os.path.join(data_dir,'zip3_loc_data.p') with open(zip3_loc_path,'rb') as in_strm: zip3_loc_data = dill.load(in_strm) # precompute additional columns for convenience when plotting LD['short_purpose'] = LD['purpose'].map(purpose_map) LD['issue_year'] = LD['issue_d'].dt.year # load base map and get state and county paths app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths,app.state_paths) = LCL.get_map_paths(app.base_map,fips_to_zip) predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) model_data = LCP.load_pickled_models() sim_lookup = LCP.get_validation_data() #%% use_grades = ['A','B','C','D','E','F'] load_time = time.time() print('Grabbing loan data at {}'.format(load_time)) predictions = LCP.get_LC_loans(auth_keys['LC_auth_key'], model_data, zip3_loc_data, use_grades) #%% @app.route('/') #redirect to index page def main():
fips_to_zip = dill.load(open(data_dir + 'fips_to_zip.p', "rb")) state_fips_dict = dill.load(open(data_dir + 'state_fips_dict.p', "rb")) #get lat/long coordinates for each 3-digit zip zip3_loc_path = os.path.join(data_dir, 'zip3_loc_data.p') with open(zip3_loc_path, 'rb') as in_strm: zip3_loc_data = dill.load(in_strm) # precompute additional columns for convenience when plotting LD['short_purpose'] = LD['purpose'].map(purpose_map) LD['issue_year'] = LD['issue_d'].dt.year # load base map and get state and county paths app.base_map = LCL.load_base_map(fig_dir + map_name) (app.county_paths, app.state_paths) = LCL.get_map_paths(app.base_map, fips_to_zip) predictor = namedtuple('predictor', ['col_name', 'full_name', 'norm_type']) model_data = LCP.load_pickled_models() sim_lookup = LCP.get_validation_data() #%% use_grades = ['A', 'B', 'C', 'D', 'E', 'F'] load_time = time.time() print('Grabbing loan data at {}'.format(load_time)) predictions = LCP.get_LC_loans(auth_keys['LC_auth_key'], model_data, zip3_loc_data, use_grades) #%% @app.route('/') #redirect to index page