def estimate_transformation(source, target): ''' source -> n x 2 array of source points. target -> n x 2 array of source points. return -> bending energy, TPS class for transformation ''' T = tps.TPS() BE = T.fit(source, target) return (BE, T)
ecozone_values = [] for zone in ecozones: cwd = os.getcwd() ecozone_shapefile = cwd + '/ecozone_shp/' + zone + '.shp' boolean_map = GD.get_intersect_boolean_array( ecozone_shapefile, shapefile, False, True) surface, maxmin = rf.random_forest_interpolator( latlon_station, days_dict, year, '# Days', shapefile, False, file_path_elev, idx_list, True) AvVal = GD.get_average_in_ecozone(boolean_map, surface) ecozone_values.append(AvVal) elif interpolator == 'TPSS': grd_size, maxmin = tps.TPS(latlon_station, days_dict, year, '# Days', shapefile, False, None, True, True) try: inBoreal = GD.is_station_in_boreal( latlon_station, days_dict, boreal_shapefile) filtered_dict = { k: v for k, v in days_dict.items() if k in inBoreal } num_stations = len(filtered_dict.keys( )) #Number clusters= # stations / 3, /5, /10 cluster_num1 = int(round(num_stations / 3)) cluster_num2 = int(round(num_stations / 5)) cluster_num3 = int(round(num_stations / 10)) cluster_num,MAE,stdev_stations = tps.select_block_size_tps(10,'clusters',latlon_station,days_dict,grd_size,shapefile,\
def run_comparison(var_name,input_date,interpolation_types,rep,loc_dictionary,cvar_dictionary,file_path_elev,elev_array,idx_list,phi_input=None,calc_phi=True,\ kernels={'temp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)']\ ,'rh':['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'],\ 'pcp':['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'],\ 'wind':['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)']}): '''Execute the shuffle-split cross-validation for the given interpolation types Parameters interpolation_types (list of str): list of interpolation types to consider Returns interpolation_best (str): returns the selected interpolation type name ''' MAE_dict = {} for method in interpolation_types[var_name]: if method not in [ 'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS', 'GPR', 'RF' ]: print( 'The method %s is not currently a supported interpolation type.' % (method)) sys.exit() else: if method == 'IDW2': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 2, rep, False, res=10000) MAE_dict[method] = MAE if method == 'IDW3': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 3, rep, False, res=10000) MAE_dict[method] = MAE if method == 'IDW4': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 4, rep, False, res=10000) MAE_dict[method] = MAE if method == 'IDEW2': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 2, rep, res=10000) MAE_dict[method] = MAE if method == 'IDEW3': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 3, rep, res=10000) MAE_dict[method] = MAE if method == 'IDEW4': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 4, rep, res=10000) MAE_dict[method] = MAE if method == 'TPS': MAE = tps.shuffle_split_tps(loc_dictionary, cvar_dictionary, shapefile, 10, res=10000) MAE_dict[method] = MAE if method == 'RF': MAE = rf.shuffle_split_rf(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 10, res=10000) MAE_dict[method] = MAE if method == 'GPR': MAE = gpr.shuffle_split_gpr(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, kernels[var_name], 10, res=10000) MAE_dict[method] = MAE best_method = min(MAE_dict, key=MAE_dict.get) print('The best method for %s is: %s' % (var_name, best_method)) if method == 'IDW2': choix_surf, maxmin = idw.IDW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 2, False, res=10000) #Expand_area is not supported yet if method == 'IDW3': choix_surf, maxmin = idw.IDW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 3, False, res=10000) #Expand_area is not supported yet if method == 'IDW4': choix_surf, maxmin = idw.IDW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 4, False, res=10000) #Expand_area is not supported yet if method == 'IDEW2': choix_surf, maxmin, elev_array = idew.IDEW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 2, False, res=10000) #Expand_area is not supported yet if method == 'IDEW3': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 3, False, res=10000) if method == 'IDEW4': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 4, False, res=10000) if method == 'TPS': choix_surf, maxmin = tps.TPS(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, phi_input, False, calc_phi, res=10000) if method == 'RF': choix_surf, maxmin = rf.random_forest_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\ file_path_elev,idx_list,False,res=10000) if method == 'GPR': choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary,cvar_dictionary,input_date,'Variable',shapefile,False,\ file_path_elev,idx_list,False,kernels[var_name],0,False,False,res=10000) return best_method, choix_surf, maxmin
def run_comparison( var_name, input_date, interpolation_types, rep, loc_dictionary, cvar_dictionary, file_path_elev, elev_array, idx_list, phi_input=None, calc_phi=True, kernels={ 'temp': ['316**2 * Matern(length_scale=[5e+05, 5e+05, 6.01e+03], nu=0.5)'], 'rh': ['307**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'], 'pcp': ['316**2 * Matern(length_scale=[5e+05, 5e+05, 4.67e+05], nu=0.5)'], 'wind': ['316**2 * Matern(length_scale=[5e+05, 6.62e+04, 1.07e+04], nu=0.5)'] }): '''Execute the shuffle-split cross-validation for the given interpolation types Parameters ---------- var_name : string name of weather variable you are interpolating input_date : string date of weather data (day of fire season) interpolation_types : list list of interpolation types to consider rep : int number of replications to run loc_dictionary : dictionary dictionary of station locations cvar_dictionary : dictionary dictionary containing the weather data for each station available file_path_elev : string path to the elevation lookup file elev_array : ndarray array for elevation, create using IDEW interpolation (this is a trick to speed up code) idx_list : int position of the elevation column in the lookup file phi_input : float smoothing parameter for the thin plate spline, if 0 no smoothing, default is None (it is calculated) calc_phi : bool whether to calculate phi in the function, if True, phi can = None kernels : dictionary the kernels for each weather variable for gaussian process regression Returns ---------- string - returns the selected interpolation type name ''' MAE_dict = {} for method in interpolation_types: if method not in [ 'IDW2', 'IDW3', 'IDW4', 'IDEW2', 'IDEW3', 'IDEW4', 'TPS', 'GPR', 'RF' ]: print( 'The method %s is not currently a supported interpolation type.' % (method)) sys.exit() else: if method == 'IDW2': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 2, rep, False) MAE_dict[method] = MAE if method == 'IDW3': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 3, rep, False) MAE_dict[method] = MAE if method == 'IDW4': MAE = idw.shuffle_split(loc_dictionary, cvar_dictionary, shapefile, 4, rep, False) MAE_dict[method] = MAE if method == 'IDEW2': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 2, rep) MAE_dict[method] = MAE if method == 'IDEW3': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 3, rep) MAE_dict[method] = MAE if method == 'IDEW4': MAE = idew.shuffle_split_IDEW(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 4, rep) MAE_dict[method] = MAE if method == 'TPS': MAE = tps.shuffle_split_tps(loc_dictionary, cvar_dictionary, shapefile, 10) MAE_dict[method] = MAE if method == 'RF': MAE = rf.shuffle_split_rf(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, 10) MAE_dict[method] = MAE if method == 'GPR': MAE = gpr.shuffle_split_gpr(loc_dictionary, cvar_dictionary, shapefile, file_path_elev, elev_array, idx_list, kernels[var_name], 10) MAE_dict[method] = MAE best_method = min(MAE_dict, key=MAE_dict.get) print('The best method for %s is: %s' % (var_name, best_method)) if method == 'IDW2': choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 2, False) # Expand_area is not supported yet if method == 'IDW3': choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 3, False) # Expand_area is not supported yet if method == 'IDW4': choix_surf, maxmin = idw.IDW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, 4, False) # Expand_area is not supported yet if method == 'IDEW2': choix_surf, maxmin, elev_array = idew.IDEW( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 2) # Expand_area is not supported yet if method == 'IDEW3': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 3) if method == 'IDEW4': choix_surf, maxmin, elev_array = idew.IDEW(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, 4) if method == 'TPS': choix_surf, maxmin = tps.TPS(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, phi_input, False, calc_phi) if method == 'RF': choix_surf, maxmin = rf.random_forest_interpolator( loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, False) if method == 'GPR': choix_surf, maxmin = gpr.GPR_interpolator(loc_dictionary, cvar_dictionary, input_date, 'Variable', shapefile, False, file_path_elev, idx_list, False, kernels[var_name], None, None, False, False) return best_method, choix_surf, maxmin
def stack_and_average(year1, year2, file_path_daily, file_path_hourly, shapefile, file_path_elev, idx_list, method): '''Get the fire season duration for every year in between the two input years and average them. Output the average array. Parameters ---------- year1 : int first year taken into account year2 : int last year taken into account file_path_daily : string path to the daily weather csv files from Environment & Climate Change Canada file_path_hourly : string path to the hourly feather files shapefile : string path to the study area shapefile file_path_elev : string path to the elevation lookup file idx_list : list column index of elevation information in the lookup file method : string type of interpolation to use to create the yearly arrays, one of: 'IDW2', 'IDW3', 'IDW4', 'TPSS', 'RF' Returns ---------- ndarray - average of each pixel of all the years considered in array format ''' list_of_arrays = [] for year in range(int(year1), int(year2) + 1): print('Processing...' + str(year)) days_dict, latlon_station = fwi.start_date_calendar_csv( file_path_daily, str(year)) end_dict, latlon_station2 = fwi.end_date_calendar_csv( file_path_daily, str(year), 'oct') if year >= 2020: hourly_dict, latlon_stationH = fwi.start_date_add_hourly( file_path_hourly, str(year)) hourly_end, latlon_stationE = fwi.end_date_add_hourly( file_path_hourly, str(year)) days_dict = combine_stations(days_dict, hourly_dict) latlon_station = combine_stations(latlon_station, latlon_stationH) end_dict = combine_stations(end_dict, hourly_end) latlon_station2 = combine_stations(latlon_station2, latlon_stationE) if method == 'IDW2': start_surface, maxmin = idw.IDW(latlon_station, days_dict, str(year), 'Start', shapefile, False, 2, True) end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year), 'End', shapefile, False, 2, True) elif method == 'IDW3': start_surface, maxmin = idw.IDW(latlon_station, days_dict, str(year), 'Start', shapefile, False, 3, True) end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year), 'End', shapefile, False, 3, True) elif method == 'IDW4': start_surface, maxmin = idw.IDW(latlon_station, days_dict, str(year), 'Start', shapefile, False, 4, True) end_surface, maxmin = idw.IDW(latlon_station2, end_dict, str(year), 'End', shapefile, False, 4, True) elif method == 'TPSS': num_stationsS = int(len(days_dict.keys())) phi_inputS = int(num_stations) - (math.sqrt(2 * num_stations)) num_stationsE = int(len(end_dict.keys())) phi_inputE = int(num_stations) - (math.sqrt(2 * num_stations)) start_surface, maxmin = tps.TPS(latlon_station, days_dict, str(year), 'Start', shapefile, False, phi_inputS, True, True) end_surface, maxmin = tps.TPS(latlon_station2, end_dict, str(year), 'End', shapefile, False, phi_inputE, True, True) elif method == 'RF': start_surface, maxmin = rf.random_forest_interpolator( latlon_station, days_dict, str(year), 'Start', shapefile, False, file_path_elev, idx_list, True) end_surface, maxmin = rf.random_forest_interpolator( latlon_station2, end_dict, str(year), 'End', shapefile, False, file_path_elev, idx_list, True) else: print( 'Either that method does not exist or there is no support for it. You can use IDW2-4, TPSS, or RF' ) dur_array = calc_season_duration(start_surface, end_surface, year) list_of_arrays.append(dur_array) voxels = np.dstack(list_of_arrays) #stack arrays based on depth averaged_voxels = np.array([[np.mean(x) for x in group] for group in voxels]) return averaged_voxels
def createTPS(self): return tps.TPS(self.staticCPs, self.movingCPs)