def apply_bootstrap_to_data(streamflow = None, times = None, period_start_month = 1, period_end_month = 12, start_date = datetime(1970,1,1,0,0), end_date = datetime(1999,12,31,0,0), event_duration_days = timedelta(days = 1), n_samples = 1, high_flow = True, return_periods = [], process_pool = None, out_file = ''): """ select extremes and perform bootstraping on streamflow the shape of the streamflow is (time, position) """ all_extremes = [] positions = [i for i in range(streamflow.shape[1])] for pos in positions: all_extremes.append([]) for pos in positions: if high_flow: data = data_select.get_period_maxima(streamflow[:, pos], times, start_date = start_date, end_date = end_date, start_month = period_start_month, end_month = period_end_month, event_duration = event_duration_days ) else: data = data_select.get_period_minima(streamflow[:, pos], times, start_date = start_date, end_date = end_date, start_month = period_start_month, end_month = period_end_month, event_duration = event_duration_days ) all_extremes[pos].extend(list(data.values())) all_extremes = np.array(all_extremes).transpose() print('all_extremes.shape = ', all_extremes.shape) print('n_samples = ', n_samples) apply_bootstrap_to_extremes(all_extremes, n_samples = n_samples, out_file = out_file, return_periods = return_periods, process_pool = process_pool, positions = positions )
def optimize_stationary_for_period_and_all_cells( data_file = 'data/streamflows/hydrosheds_euler9/aex_discharge_1970_01_01_00_00.nc', paramfile = 'gev_params_stationary', high_flow = True, start_month = 1, end_month = 12, start_date = datetime(1970,1,1,0,0), end_date = datetime(1999,12, 31,0,0), event_duration = timedelta(days = 1)): print(paramfile) #check whether optimization is required if os.path.isfile(paramfile): print('already optimized, if you want to reoptimize delete %s' % paramfile) pars_set = pickle.load(open(paramfile)) return pars_set #get streamflow data streamflow, times, xs, ys = data_select.get_data_from_file(path = data_file) data = [] for pos in range(streamflow.shape[1]): if high_flow: data1 = data_select.get_period_maxima(streamflow[:,pos], times, start_date = start_date, end_date = end_date, start_month = start_month, end_month = end_month, event_duration = event_duration ) else: data1 = data_select.get_period_minima(streamflow[:, pos], times, start_date = start_date, end_date = end_date, start_month = start_month, end_month = end_month, event_duration = event_duration ) data.append(list(data1.values())) data = np.array(data).transpose() pars_set = optimize_stationary_for_period_and_all_cells_using_data(data = data, high_flow = high_flow) f = open(paramfile ,'w') pickle.dump(pars_set, f) f.close() return pars_set
def get_extremes_list(data_path = "", member_ids = None, high_flow = True, start_date = None, end_date = None, event_duration = timedelta(days = 1), period_start_month = 1, period_end_month = 12 ): """ returns list of 2d arrays of extremes, the 2d arrays have the shape = (time, cell_index) """ file_paths = [] for the_name in os.listdir(data_path): prefix = the_name.split('_')[0] if prefix in member_ids: file_paths += [os.path.join(data_path, the_name)] #merge extreme data all_extremes = [] i_indices = None j_indices = None for the_path in file_paths: streamflow, times, i_indices, j_indices = data_select.get_data_from_file(the_path) domain_extremes = [[] for pos in range(len(i_indices))] for pos, point_extrems in enumerate(domain_extremes): if high_flow: extremes = data_select.get_period_maxima(streamflows=streamflow[:, pos], times = times, start_date = start_date, end_date = end_date, event_duration = event_duration, start_month = period_start_month, end_month = period_end_month ) else: extremes = data_select.get_period_minima(streamflows=streamflow[:, pos], times = times, start_date = start_date, end_date = end_date, event_duration = event_duration, start_month = period_start_month, end_month = period_end_month ) point_extrems.extend(list(extremes.values())) all_extremes.append(np.transpose( np.array(domain_extremes) )) return all_extremes, i_indices, j_indices
def apply_bootstrap_to_all_members_merged(file_paths = None, high_flow = True, n_samples = 10, out_file = '', process_pool = None, start_date = None, end_date = None, start_month = None, end_month = None, duration_days = None, return_periods = None ): """ duration_days - timedelta object """ if os.path.isfile(out_file): print("{0} already exists, skipping ".format(out_file)) return #select data all_extremes = [] streamflow = None for the_path in file_paths: print(the_path) streamflow, times, i_indices, j_indices = data_select.get_data_from_file(the_path) if not len(all_extremes): all_extremes = [[] for i in range(streamflow.shape[1])] for pos in range(streamflow.shape[1]): if high_flow: data1 = data_select.get_period_maxima(streamflow[:, pos], times, start_date = start_date, end_date = end_date, start_month = start_month, end_month = end_month, event_duration = duration_days ) else: data1 = data_select.get_period_minima(streamflow[:, pos], times, start_date = start_date, end_date = end_date, start_month = start_month, end_month = end_month, event_duration = duration_days ) all_extremes[pos].extend(list(data1.values())) #axes order: (time, position) all_extremes = np.array(all_extremes).transpose() bootstrap.apply_bootstrap_to_extremes(all_extremes, n_samples = n_samples, out_file = out_file, process_pool = process_pool, return_periods = return_periods, positions = range(streamflow.shape[1]), high_flow = high_flow, restrict_indices_to_member=True, n_values_per_member= all_extremes.shape[0] / len(file_paths) ) print("n_indices per member = ", all_extremes.shape[0] / len(file_paths)) pass
def gev_fit_all_members( high_flow=True, member_ids=[], data_folder="", file_name_pattern="", start_date=None, end_date=None, start_month=1, end_month=12, duration_days=timedelta(days=1), ): """ gev fit using data from all members data_folder - path to the folder with input data (streamflow) start_month - end_month - """ param_file = "high" if high_flow else "low" for id in member_ids: param_file += "_" + id if os.path.isfile(param_file): print("delete {0}, to reoptimize".format(param_file)) return pickle.load(open(param_file)) # select data path_pattern = os.path.join(data_folder, file_name_pattern) all_extremes = [] for id in member_ids: print(id) the_path = path_pattern.format(id) streamflow, times, i_indices, j_indices = data_select.get_data_from_file(the_path) if not len(all_extremes): for i in range(streamflow.shape[1]): all_extremes.append([]) for pos in range(streamflow.shape[1]): if high_flow: data1 = data_select.get_period_maxima( streamflow[:, pos], times, start_date=start_date, end_date=end_date, start_month=start_month, end_month=end_month, event_duration=duration_days, ) else: data1 = data_select.get_period_minima( streamflow[:, pos], times, start_date=start_date, end_date=end_date, start_month=start_month, end_month=end_month, event_duration=duration_days, ) all_extremes[pos].extend(list(data1.values())) # axes order: (time, position) all_extremes = np.array(all_extremes).transpose() if np.any(all_extremes is None): assert False, "all_extremes = " + str(all_extremes) # optimize print(all_extremes.shape) assert all_extremes.shape[1] == 547, "all_extremes.shape[1] != 547" param_set = gevfit.optimize_stationary_for_period_and_all_cells_using_data(data=all_extremes, high_flow=high_flow) pickle.dump(param_set, open(param_file, "wb")) return param_set pass