import cairosvg from scipy.spatial import KDTree # for finding KNN import scipy import os base_dir = '/Users/james/Data_Incubator/loan-picker' #base_dir = os.path.dirname(os.path.realpath(__file__)) data_dir = os.path.join(base_dir,'static/data/') fig_dir = os.path.join(base_dir,'static/images/') movie_dir = os.path.join(base_dir,'static/movies/') data_name = 'all_loans_proc' LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d']) fips_data = LCL.load_location_data(data_dir, group_by='fips') zip3_data = LCL.load_location_data(data_dir, group_by='zip3') fips_to_zip = LCL.make_fips_to_zip_dict(data_dir, group_by='zip') #%% make a k-tree for doing nearest neighbor imputation of missing data base_map = LCL.load_base_map(fig_dir + 'USA_Counties_text.svg', ax_xml=True) (county_paths,state_paths) = LCL.get_map_paths(base_map,fips_to_zip) title_path = base_map.findAll('text')[0] map_coords = LCH.extract_fips_coords(county_paths) ktree = KDTree(map_coords.values) #make nearest neighbor tree #%% make sequence of decision trees and build a movie X = LD[['longitude','latitude']] y = LD['ROI'] #plot average return by area, not portfolio return max_levels = 16