data_dir = os.path.join(base_dir,'static/data/')
fig_dir = os.path.join(base_dir,'static/images/')
movie_dir = os.path.join(base_dir,'static/movies/')

data_name = 'all_loans_proc'
LD = pd.read_csv(data_dir + data_name, parse_dates=['issue_d'])

fips_data = LCL.load_location_data(data_dir, group_by='fips')
zip3_data = LCL.load_location_data(data_dir, group_by='zip3')
fips_to_zip = LCL.make_fips_to_zip_dict(data_dir, group_by='zip')
        
#%% make a k-tree for doing nearest neighbor imputation of missing data
base_map = LCL.load_base_map(fig_dir + 'USA_Counties_text.svg', ax_xml=True)
(county_paths,state_paths) = LCL.get_map_paths(base_map,fips_to_zip)
title_path = base_map.findAll('text')[0]
map_coords = LCH.extract_fips_coords(county_paths)
ktree = KDTree(map_coords.values) #make nearest neighbor tree

#%% make sequence of decision trees and build a movie
X = LD[['longitude','latitude']]
y = LD['ROI'] #plot average return by area, not portfolio return

max_levels = 16
min_samples_leaf = 50
pred_arr = np.zeros((len(fips_data),max_levels))
for n in xrange(max_levels):
    clf = tree.DecisionTreeRegressor(max_depth=n+1, min_samples_leaf=min_samples_leaf, random_state=0)
    clf.fit(X, y)
    pred_arr[:,n] = clf.predict(fips_data[['longitude','latitude']].values)
    
#%% generate pngs for reg-tree at each depth value