def generate_return_periods(qout_file, return_period_file, storm_duration_days=7): """ Generate return period from RAPID Qout file """ #get ERA Interim Data Analyzed with RAPIDDataset(qout_file) as qout_nc_file: print "Setting up Return Periods File ..." return_period_nc = nc.Dataset(return_period_file, 'w') return_period_nc.createDimension('rivid', qout_nc_file.size_river_id) timeSeries_var = return_period_nc.createVariable('rivid', 'i4', ('rivid',)) timeSeries_var.long_name = ( 'Unique NHDPlus COMID identifier for each river reach feature') max_flow_var = return_period_nc.createVariable('max_flow', 'f8', ('rivid',)) return_period_20_var = return_period_nc.createVariable('return_period_20', 'f8', ('rivid',)) return_period_10_var = return_period_nc.createVariable('return_period_10', 'f8', ('rivid',)) return_period_2_var = return_period_nc.createVariable('return_period_2', 'f8', ('rivid',)) lat_var = return_period_nc.createVariable('lat', 'f8', ('rivid',), fill_value=-9999.0) lat_var.long_name = 'latitude' lat_var.standard_name = 'latitude' lat_var.units = 'degrees_north' lat_var.axis = 'Y' lon_var = return_period_nc.createVariable('lon', 'f8', ('rivid',), fill_value=-9999.0) lon_var.long_name = 'longitude' lon_var.standard_name = 'longitude' lon_var.units = 'degrees_east' lon_var.axis = 'X' return_period_nc.variables['lat'][:] = qout_nc_file.qout_nc.variables['lat'][:] return_period_nc.variables['lon'][:] = qout_nc_file.qout_nc.variables['lon'][:] river_id_list = qout_nc_file.get_river_id_array() return_period_nc.variables['rivid'][:] = river_id_list print "Extracting Data and Generating Return Periods ..." time_array = qout_nc_file.get_time_array() num_years = int((datetime.utcfromtimestamp(time_array[-1])-datetime.utcfromtimestamp(time_array[0])).days/365.2425) time_steps_per_day = (24*3600)/float((datetime.utcfromtimestamp(time_array[1])-datetime.utcfromtimestamp(time_array[0])).seconds) step = max(1,int(time_steps_per_day * storm_duration_days)) for comid_index, comid in enumerate(river_id_list): filtered_flow_data = qout_nc_file.get_daily_qout_index(comid_index, steps_per_group=step, mode="max") sorted_flow_data = np.sort(filtered_flow_data)[:num_years:-1] rp_index_20 = int((num_years + 1)/20.0) rp_index_10 = int((num_years + 1)/10.0) rp_index_2 = int((num_years + 1)/2.0) max_flow_var[comid_index] = sorted_flow_data[0] return_period_20_var[comid_index] = sorted_flow_data[rp_index_20] return_period_10_var[comid_index] = sorted_flow_data[rp_index_10] return_period_2_var[comid_index] = sorted_flow_data[rp_index_2] return_period_nc.close()
def compute_init_flows_from_past_forecast(self, forecasted_streamflow_files): """ Compute initial flows from the past ECMWF forecast ensemble """ if forecasted_streamflow_files: #get list of COMIDS print( "Computing initial flows from the past ECMWF forecast ensemble ..." ) with RAPIDDataset(forecasted_streamflow_files[0]) as qout_nc: comid_index_list, reordered_comid_list, ignored_comid_list = qout_nc.get_subset_riverid_index_list( self.stream_id_array) print("Extracting data ...") reach_prediciton_array = np.zeros( (len(self.stream_id_array), len(forecasted_streamflow_files), 1)) #get information from datasets for file_index, forecasted_streamflow_file in enumerate( forecasted_streamflow_files): try: ensemble_index = int( os.path.basename(forecasted_streamflow_file).split(".") [0].split("_")[-1]) try: #Get hydrograph data from ECMWF Ensemble with RAPIDDataset(forecasted_streamflow_file ) as predicted_qout_nc: time_length = predicted_qout_nc.size_time if not predicted_qout_nc.is_time_variable_valid(): #data is raw rapid output data_values_2d_array = predicted_qout_nc.get_qout_index( comid_index_list, time_index=1) else: #the data is CF compliant and has time=0 added to output if ensemble_index == 52: if time_length == 125: data_values_2d_array = predicted_qout_nc.get_qout_index( comid_index_list, time_index=12) else: data_values_2d_array = predicted_qout_nc.get_qout_index( comid_index_list, time_index=2) else: if time_length == 85: data_values_2d_array = predicted_qout_nc.get_qout_index( comid_index_list, time_index=4) else: data_values_2d_array = predicted_qout_nc.get_qout_index( comid_index_list, time_index=2) except Exception: print("Invalid ECMWF forecast file {0}".format( forecasted_streamflow_file)) continue #organize the data for comid_index, comid in enumerate(reordered_comid_list): reach_prediciton_array[comid_index][ file_index] = data_values_2d_array[comid_index] except Exception as e: print(e) #pass print("Analyzing data ...") for index in range(len(self.stream_segments)): try: #get where comids are in netcdf file data_index = np.where( reordered_comid_list == self.stream_segments[index].stream_id)[0][0] self.stream_segments[index].init_flow = np.mean( reach_prediciton_array[data_index]) except Exception: #stream id not found in list. Adding zero init flow ... self.stream_segments[index].init_flow = 0 pass continue print("Initialization Complete!")
def generate_warning_points(ecmwf_prediction_folder, return_period_file, out_directory, threshold): """ Create warning points from return periods and ECMWD prediction data """ #Get list of prediciton files prediction_files = sorted([os.path.join(ecmwf_prediction_folder,f) for f in os.listdir(ecmwf_prediction_folder) \ if not os.path.isdir(os.path.join(ecmwf_prediction_folder, f)) and f.lower().endswith('.nc')]) #get the comids in ECMWF files with RAPIDDataset(prediction_files[0]) as qout_nc: prediction_rivids = qout_nc.get_river_id_array() comid_list_length = qout_nc.size_river_id size_time = qout_nc.size_time first_half_size = 40 #run 6-hr resolution for all if qout_nc.is_time_variable_valid(): if size_time == 41 or size_time == 61: #run at full or 6-hr resolution for high res and 6-hr for low res first_half_size = 41 elif size_time == 85 or size_time == 125: #run at full resolution for all first_half_size = 65 forecast_date_timestep = os.path.basename(ecmwf_prediction_folder) forecast_start_date = datetime.strptime(forecast_date_timestep[:11], "%Y%m%d.%H") time_array = qout_nc.get_time_array( datetime_simulation_start=forecast_start_date, simulation_time_step_seconds=6 * 3600, return_datetime=True) current_day = forecast_start_date daily_time_index_array = [0] for idx, var_time in enumerate(time_array): if current_day.day != var_time.day: daily_time_index_array.append(idx) current_day = var_time print("Extracting Forecast Data ...") #get information from datasets reach_prediciton_array_first_half = np.zeros( (comid_list_length, len(prediction_files), first_half_size)) reach_prediciton_array_second_half = np.zeros( (comid_list_length, len(prediction_files), 20)) for file_index, prediction_file in enumerate(prediction_files): data_values_2d_array = [] try: ensemble_index = int( os.path.basename(prediction_file)[:-3].split("_")[-1]) #Get hydrograph data from ECMWF Ensemble with RAPIDDataset(prediction_file) as qout_nc: data_values_2d_array = qout_nc.get_qout() except Exception, e: print(e) #add data to main arrays and order in order of interim rivids if len(data_values_2d_array) > 0: for comid_index, comid in enumerate(prediction_rivids): if (ensemble_index < 52): reach_prediciton_array_first_half[comid_index][ file_index] = data_values_2d_array[ comid_index][:first_half_size] reach_prediciton_array_second_half[comid_index][ file_index] = data_values_2d_array[comid_index][ first_half_size:] if (ensemble_index == 52): if first_half_size == 65: #convert to 3hr-6hr streamflow_1hr = data_values_2d_array[ comid_index][:90:3] # get the time series of 3 hr/6 hr data streamflow_3hr_6hr = data_values_2d_array[comid_index][ 90:] # concatenate all time series reach_prediciton_array_first_half[comid_index][ file_index] = np.concatenate( [streamflow_1hr, streamflow_3hr_6hr]) elif len(data_values_2d_array[comid_index]) == 125: #convert to 6hr streamflow_1hr = data_values_2d_array[ comid_index][:90:6] # calculate time series of 6 hr data from 3 hr data streamflow_3hr = data_values_2d_array[comid_index][ 90:109:2] # get the time series of 6 hr data streamflow_6hr = data_values_2d_array[comid_index][ 109:] # concatenate all time series reach_prediciton_array_first_half[comid_index][ file_index] = np.concatenate([ streamflow_1hr, streamflow_3hr, streamflow_6hr ]) else: reach_prediciton_array_first_half[comid_index][ file_index] = data_values_2d_array[comid_index][:]
def append_streamflow_from_rapid_output(self, rapid_output_file, date_peak_search_start=None, date_peak_search_end=None): """ Generate StreamFlow raster Create AutoRAPID INPUT from single RAPID output """ print "Appending streamflow for:", self.stream_info_file #get information from datasets #get list of streamids stream_info_table = csv_to_list(self.stream_info_file, ", ")[1:] #Columns: DEM_1D_Index Row Col StreamID StreamDirection streamid_list_full = np.array([row[3] for row in stream_info_table], dtype=np.int32) streamid_list_unique = np.unique(streamid_list_full) temp_stream_info_file = "{0}_temp.txt".format(os.path.splitext(self.stream_info_file)[0]) print "Analyzing data and appending to list ..." with open(temp_stream_info_file, 'wb') as outfile: writer = csv.writer(outfile, delimiter=" ") writer.writerow(["DEM_1D_Index", "Row", "Col", "StreamID", "StreamDirection", "Slope", "Flow"]) with RAPIDDataset(rapid_output_file) as data_nc: time_range = data_nc.get_time_index_range(date_search_start=date_peak_search_start, date_search_end=date_peak_search_end) #perform operation in max chunk size of 4,000 max_chunk_size = 8*365*5*4000 #5 years of 3hr data (8/day) with 4000 comids at a time time_length = 8*365*5 #assume 5 years of 3hr data if time_range is not None: time_length = len(time_range) else: time_length = data_nc.size_time streamid_list_length = len(streamid_list_unique) if streamid_list_length <=0: raise IndexError("Invalid stream info file {0}." \ " No stream ID's found ...".format(self.stream_info_file)) step_size = min(max_chunk_size/time_length, streamid_list_length) for list_index_start in xrange(0, streamid_list_length, step_size): list_index_end = min(list_index_start+step_size, streamid_list_length) print "River ID subset range {0} to {1} of {2} ...".format(list_index_start, list_index_end, streamid_list_length) print "Extracting data ..." valid_stream_indices, valid_stream_ids, missing_stream_ids = \ data_nc.get_subset_riverid_index_list(streamid_list_unique[list_index_start:list_index_end]) streamflow_array = data_nc.get_qout_index(valid_stream_indices, time_index_array=time_range) print "Calculating peakflow and writing to file ..." for streamid_index, streamid in enumerate(valid_stream_ids): #get where streamids are in the lookup grid id table peak_flow = max(streamflow_array[streamid_index]) raster_index_list = np.where(streamid_list_full==streamid)[0] for raster_index in raster_index_list: writer.writerow(stream_info_table[raster_index][:6] + [peak_flow]) for missing_streamid in missing_stream_ids: #set flow to zero for missing stream ids raster_index_list = np.where(streamid_list_full==missing_streamid)[0] for raster_index in raster_index_list: writer.writerow(stream_info_table[raster_index][:6] + [0]) os.remove(self.stream_info_file) os.rename(temp_stream_info_file, self.stream_info_file) print "Appending streamflow complete for:", self.stream_info_file
def append_streamflow_from_ecmwf_rapid_output(self, prediction_folder, method_x, method_y): """ Generate StreamFlow raster Create AutoRAPID INPUT from ECMWF predicitons method_x = the first axis - it produces the max, min, mean, mean_plus_std, mean_minus_std hydrograph data for the 52 ensembles method_y = the second axis - it calculates the max, min, mean, mean_plus_std, mean_minus_std value from method_x """ print "Generating Streamflow Raster ..." #get list of streamidS stream_info_table = csv_to_list(self.stream_info_file, ", ")[1:] #Columns: DEM_1D_Index Row Col StreamID StreamDirection streamid_list_full = np.array([row[3] for row in stream_info_table], dtype=np.int32) streamid_list_unique = np.unique(streamid_list_full) if not streamid_list_unique: raise Exception("ERROR: No stream id values found in stream info file.") #Get list of prediciton files prediction_files = sorted([os.path.join(prediction_folder,f) for f in os.listdir(prediction_folder) \ if not os.path.isdir(os.path.join(prediction_folder, f)) and f.lower().endswith('.nc')], reverse=True) print "Finding streamid indices ..." with RAPIDDataset(prediction_files[0]) as data_nc: reordered_streamid_index_list = data_nc.get_subset_riverid_index_list(streamid_list_unique)[0] first_half_size = 40 if data_nc.size_time == 41 or data_nc.size_time == 61: first_half_size = 41 elif data_nc.size_time == 85 or data_nc.size_time == 125: #run at full resolution for all first_half_size = 65 print "Extracting Data ..." reach_prediciton_array_first_half = np.zeros((len(streamid_list_unique),len(prediction_files),first_half_size)) reach_prediciton_array_second_half = np.zeros((len(streamid_list_unique),len(prediction_files),20)) #get information from datasets for file_index, prediction_file in enumerate(prediction_files): data_values_2d_array = [] try: ensemble_index = int(os.path.basename(prediction_file)[:-3].split("_")[-1]) #Get hydrograph data from ECMWF Ensemble with RAPIDDataset(prediction_file) as data_nc: data_values_2d_array = data_nc.get_qout_index(reordered_streamid_index_list) #add data to main arrays and order in order of interim comids if len(data_values_2d_array) > 0: for comid_index in range(len(streamid_list_unique)): if(ensemble_index < 52): reach_prediciton_array_first_half[comid_index][file_index] = data_values_2d_array[comid_index][:first_half_size] reach_prediciton_array_second_half[comid_index][file_index] = data_values_2d_array[comid_index][first_half_size:] if(ensemble_index == 52): if first_half_size == 65: #convert to 3hr-6hr streamflow_1hr = data_values_2d_array[comid_index][:90:3] # get the time series of 3 hr/6 hr data streamflow_3hr_6hr = data_values_2d_array[comid_index][90:] # concatenate all time series reach_prediciton_array_first_half[comid_index][file_index] = np.concatenate([streamflow_1hr, streamflow_3hr_6hr]) elif data_nc.size_time == 125: #convert to 6hr streamflow_1hr = data_values_2d_array[comid_index][:90:6] # calculate time series of 6 hr data from 3 hr data streamflow_3hr = data_values_2d_array[comid_index][90:109:2] # get the time series of 6 hr data streamflow_6hr = data_values_2d_array[comid_index][109:] # concatenate all time series reach_prediciton_array_first_half[comid_index][file_index] = np.concatenate([streamflow_1hr, streamflow_3hr, streamflow_6hr]) else: reach_prediciton_array_first_half[comid_index][file_index] = data_values_2d_array[comid_index][:] except Exception as e: print e #pass print "Analyzing data and writing output ..." temp_stream_info_file = "{0}_temp.txt".format(os.path.splitext(self.stream_info_file)[0]) with open(temp_stream_info_file, 'wb') as outfile: writer = csv.writer(outfile, delimiter=" ") writer.writerow(["DEM_1D_Index", "Row", "Col", "StreamID", "StreamDirection", "Slope", "Flow"]) for streamid_index, streamid in enumerate(streamid_list_unique): #perform analysis on datasets all_data_first = reach_prediciton_array_first_half[streamid_index] all_data_second = reach_prediciton_array_second_half[streamid_index] series = [] if "mean" in method_x: #get mean mean_data_first = np.mean(all_data_first, axis=0) mean_data_second = np.mean(all_data_second, axis=0) series = np.concatenate([mean_data_first,mean_data_second]) if "std" in method_x: #get std dev std_dev_first = np.std(all_data_first, axis=0) std_dev_second = np.std(all_data_second, axis=0) std_dev = np.concatenate([std_dev_first,std_dev_second]) if method_x == "mean_plus_std": #mean plus std series += std_dev elif method_x == "mean_minus_std": #mean minus std series -= std_dev elif method_x == "max": #get max max_data_first = np.amax(all_data_first, axis=0) max_data_second = np.amax(all_data_second, axis=0) series = np.concatenate([max_data_first,max_data_second]) elif method_x == "min": #get min min_data_first = np.amin(all_data_first, axis=0) min_data_second = np.amin(all_data_second, axis=0) series = np.concatenate([min_data_first,min_data_second]) data_val = 0 if "mean" in method_y: #get mean data_val = np.mean(series) if "std" in method_y: #get std dev std_dev = np.std(series) if method_y == "mean_plus_std": #mean plus std data_val += std_dev elif method_y == "mean_minus_std": #mean minus std data_val -= std_dev elif method_y == "max": #get max data_val = np.amax(series) elif method_y == "min": #get min data_val = np.amin(series) #get where streamids are in the lookup grid id table raster_index_list = np.where(streamid_list_full==streamid)[0] for raster_index in raster_index_list: writer.writerow(stream_info_table[raster_index][:6] + [data_val]) os.remove(self.stream_info_file) os.rename(temp_stream_info_file, self.stream_info_file)
def compute_init_flows_from_past_forecast(self, forecasted_streamflow_files): """ Compute initial flows from the past ECMWF forecast ensemble """ if forecasted_streamflow_files: #get list of COMIDS print("Computing initial flows from the past ECMWF forecast ensemble ...") with RAPIDDataset(forecasted_streamflow_files[0]) as qout_nc: comid_index_list, reordered_comid_list, ignored_comid_list = qout_nc.get_subset_riverid_index_list(self.stream_id_array) print("Extracting data ...") reach_prediciton_array = np.zeros((len(self.stream_id_array),len(forecasted_streamflow_files),1)) #get information from datasets for file_index, forecasted_streamflow_file in enumerate(forecasted_streamflow_files): try: ensemble_index = int(os.path.basename(forecasted_streamflow_file).split(".")[0].split("_")[-1]) try: #Get hydrograph data from ECMWF Ensemble with RAPIDDataset(forecasted_streamflow_file) as predicted_qout_nc: time_length = predicted_qout_nc.size_time if not predicted_qout_nc.is_time_variable_valid(): #data is raw rapid output data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, time_index=1) else: #Added following comment lines 20190115 CJB #the data is CF compliant and has time=0 added to output #time_index seems to be the multiplier for a 12-hour increment in the data: #HRES (#52) is 1-hourly to start with so 12 x 1 = 12. If HRES (#52) doesn't #have 1-hourly then it is 6-hourly so 2 x 6 = 12. #ENS is 3-hourly so 4 x 3 = 12 otherwise it may be 6-hourly: 2 x 6 = 12 again. #It may be better to use: # time_interval = CreateInflowFileFromECMWFRunoff.dataIdentify(forecasted_streamflow_file) #Added following lines 20190115 CJB # RAPIDinflowECMWF_tool = CreateInflowFileFromECMWFRunoff() time_interval = RAPIDinflowECMWF_tool.dataIdentify(forecasted_streamflow_file) print(time_interval) if ensemble_index == 52: if time_interval == 'HRES1' or time_interval == 'HRES13' or time_interval == 'HRES136': #if time_length == 125: # Added following lines 20190115 CJB data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, time_index=12) else: #otherwise it is ENS6 data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, time_index=2) else: if time_interval == 'ENS3' or time_interval == 'ENS36': #if time_length == 85: # Added following lines 20190115 CJB data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, time_index=4) else: # otherwise it is ENS6 data_values_2d_array = predicted_qout_nc.get_qout_index(comid_index_list, time_index=2) except Exception: print("Invalid ECMWF forecast file {0}".format(forecasted_streamflow_file)) continue #organize the data for comid_index, comid in enumerate(reordered_comid_list): reach_prediciton_array[comid_index][file_index] = data_values_2d_array[comid_index] except Exception as e: print(e) #pass print("Analyzing data ...") for index in range(len(self.stream_segments)): try: #get where comids are in netcdf file data_index = np.where(reordered_comid_list==self.stream_segments[index].stream_id)[0][0] self.stream_segments[index].init_flow = np.mean(reach_prediciton_array[data_index]) except Exception: #stream id not found in list. Adding zero init flow ... self.stream_segments[index].init_flow = 0 pass continue print("Initialization Complete!")