def find_hash_id_of_nearest_rainfall_station(curw_obs_pool, curw_sim_pool, lat, lon): obs_connection = curw_obs_pool.connection() Sim_Ts = Timeseries(pool=curw_sim_pool) try: with obs_connection.cursor() as cursor0: cursor0.callproc('getNearestWeatherStation', (lat, lon)) obs_station = cursor0.fetchone() obs_id = obs_station['id'] obs_station_name = obs_station['name'] grid_id = 'rainfall_{}_{}_MDPA'.format( obs_id, obs_station_name) # rainfall_100057_Naula_MDPA return Sim_Ts.get_timeseries_id(grid_id=grid_id, method='MME') except Exception as e: traceback.print_exc()
def prepare_mike_rf_input(start, end, coefficients): try: #### process staton based hybrid timeseries #### distinct_obs_ids = coefficients.curw_obs_id.unique() hybrid_ts_df = pd.DataFrame() hybrid_ts_df['time'] = pd.date_range(start=start, end=end, freq='5min') pool = get_Pool(host=con_params.CURW_SIM_HOST, port=con_params.CURW_SIM_PORT, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, db=con_params.CURW_SIM_DATABASE) TS = Timeseries(pool) obs_id_hash_id_mapping = get_all_obs_rain_hashids_from_curw_sim(pool) for obs_id in distinct_obs_ids: # taking data from curw_sim database (data prepared based on active stations for hechms) ts = TS.get_timeseries(id_=obs_id_hash_id_mapping.get(str(obs_id)), start_date=start, end_date=end) ts.insert(0, ['time', obs_id]) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df[obs_id] = ts_df[obs_id].astype('float64') hybrid_ts_df = pd.merge(hybrid_ts_df, ts_df, how="left", on='time') hybrid_ts_df.set_index('time', inplace=True) hybrid_ts_df = hybrid_ts_df.resample('15min', label='right', closed='right').sum() # pd.set_option('display.max_rows', hybrid_ts_df.shape[0]+1) # pd.set_option('display.max_columns', hybrid_ts_df.shape[1]+1) # print(hybrid_ts_df) hybrid_ts_df = replace_negative_numbers_with_nan(hybrid_ts_df) # print(hybrid_ts_df) hybrid_ts_df = replace_nan_with_row_average(hybrid_ts_df) # print(hybrid_ts_df) #### process mike input #### distinct_names = coefficients.name.unique() mike_input = pd.DataFrame() mike_input_initialized = False for name in distinct_names: catchment_coefficients = coefficients[coefficients.name == name] # print(catchment_coefficients) catchment = pd.DataFrame() catchment_initialized = False for index, row in catchment_coefficients.iterrows(): # print(index, row['curw_obs_id'], row['coefficient']) if not catchment_initialized: catchment = (hybrid_ts_df[row['curw_obs_id']] * row['coefficient']).to_frame( name=row['curw_obs_id']) catchment_initialized = True else: new = (hybrid_ts_df[row['curw_obs_id']] * row['coefficient']).to_frame( name=row['curw_obs_id']) catchment = pd.merge(catchment, new, how="left", on='time') if not mike_input_initialized: mike_input[name] = catchment.sum(axis=1) mike_input_initialized = True else: mike_input = pd.merge( mike_input, (catchment.sum(axis=1)).to_frame(name=name), how="left", on='time') mike_input.round(1) return mike_input except Exception: traceback.print_exc() finally: destroy_Pool(pool)
def prepare_mike_rf_input(start, end): try: mike_obs_stations = read_csv( os.path.join(ROOT_DIRECTORY, 'inputs', 'params', 'mike_rainfall_stations.csv')) selected_obs_ids = [] for list in mike_obs_stations: selected_obs_ids.append(str(list[1])) # [hash_id,station_id,station_name,latitude,longitude] #### process staton based hybrid timeseries #### hybrid_ts_df = pd.DataFrame() hybrid_ts_df['time'] = pd.date_range(start=start, end=end, freq='5min') pool = get_Pool(host=con_params.CURW_SIM_HOST, port=con_params.CURW_SIM_PORT, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, db=con_params.CURW_SIM_DATABASE) obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, port=con_params.CURW_OBS_PORT, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, db=con_params.CURW_OBS_DATABASE) TS = Timeseries(pool) obs_id_hash_id_mapping = get_all_obs_rain_hashids_from_curw_sim(pool) obs_stations = extract_active_curw_obs_rainfall_stations( curw_obs_pool=obs_pool, start_time=start, end_time=end) obs_obs_mapping = find_nearest_obs_stations_for_mike_rainfall_stations( mike_obs_stations=mike_obs_stations, obs_stations=obs_stations) for obs_id in selected_obs_ids: if obs_id in obs_id_hash_id_mapping.keys(): # taking data from curw_sim database (data prepared based on active stations for hechms) ts = TS.get_timeseries(id_=obs_id_hash_id_mapping.get(obs_id), start_date=start, end_date=end) else: ts = [] ts.insert(0, ['time', obs_id]) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df[obs_id] = ts_df[obs_id].astype('float64') hybrid_ts_df = pd.merge(hybrid_ts_df, ts_df, how="left", on='time') hybrid_ts_df.set_index('time', inplace=True) hybrid_ts_df = hybrid_ts_df.resample('15min', label='right', closed='right').sum() mike_input = replace_negative_numbers_with_nan(hybrid_ts_df) for col in mike_input.columns: if len(obs_obs_mapping[col]) > 0: print(col, obs_obs_mapping[col][0]) mike_input[col] = mike_input[col].fillna( mike_input[obs_obs_mapping[col][0]]) if len(obs_obs_mapping[col]) > 1: print(col, obs_obs_mapping[col][1]) mike_input[col] = mike_input[col].fillna( mike_input[obs_obs_mapping[col][1]]) mike_input = replace_nan_with_row_average(mike_input) mike_input = mike_input.round(1) station_name_dict = {} for i in range(len(mike_obs_stations)): station_name_dict[str( mike_obs_stations[i][1])] = mike_obs_stations[i][2] for col in mike_input.columns: mike_input = mike_input.rename( columns={col: station_name_dict.get(col)}) # pd.set_option('display.max_rows', mike_input.shape[0]+1) # pd.set_option('display.max_columns', mike_input.shape[1]+1) # print(mike_input) return mike_input except Exception: traceback.print_exc() finally: destroy_Pool(pool) destroy_Pool(obs_pool)
def update_rainfall_obs(target_model, method, timestep, start_time, end_time): """ Update rainfall observations for flo2d models :param model: target model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: :return: """ obs_start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: # Connect to the database curw_obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, port=con_params.CURW_OBS_PORT, db=con_params.CURW_OBS_DATABASE) curw_obs_connection = curw_obs_pool.connection() curw_sim_pool = get_Pool(host=con_params.CURW_SIM_HOST, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, port=con_params.CURW_SIM_PORT, db=con_params.CURW_SIM_DATABASE) TS = Timeseries(pool=curw_sim_pool) # [hash_id, station_id, station_name, latitude, longitude] active_obs_stations = extract_active_curw_obs_rainfall_stations( start_time=start_time, end_time=end_time)[1:] obs_stations_dict = { } # keys: obs station id , value: [hash id, name, latitude, longitude] for obs_index in range(len(active_obs_stations)): obs_stations_dict[active_obs_stations[obs_index][1]] = [ active_obs_stations[obs_index][0], active_obs_stations[obs_index][2], active_obs_stations[obs_index][3], active_obs_stations[obs_index][4] ] for obs_id in obs_stations_dict.keys(): meta_data = { 'latitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[2])), 'longitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[3])), 'model': target_model, 'method': method, 'grid_id': 'rainfall_{}_{}'.format(obs_id, obs_stations_dict.get(obs_id)[1]) } tms_id = TS.get_timeseries_id_if_exists(meta_data=meta_data) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) TS.update_grid_id(id_=tms_id, grid_id=meta_data['grid_id']) obs_hash_id = obs_stations_dict.get(obs_id)[0] obs_timeseries = [] if timestep == 5: ts = extract_obs_rain_5_min_ts(connection=curw_obs_connection, start_time=obs_start, end_time=end_time, id=obs_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_5_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] elif timestep == 15: ts = extract_obs_rain_15_min_ts(connection=curw_obs_connection, start_time=obs_start, end_time=end_time, id=obs_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_15_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] # for i in range(len(obs_timeseries)): # if obs_timeseries[i][1] == -99999: # obs_timeseries[i][1] = 0 if obs_timeseries is not None and len(obs_timeseries) > 0: TS.replace_data(timeseries=obs_timeseries, tms_id=tms_id) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool)
def update_rainfall_from_file(curw_sim_pool, flo2d_grid_polygon_map, stations_dict, rainfall_df, flo2d_model, method, grid_interpolation, timestep, start_time=None, end_time=None): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ # start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: TS = Sim_Timeseries(pool=curw_sim_pool) # # [hash_id, station_id, station_name, latitude, longitude] # flo2d_grid_polygon_map :: [Grid_ ID, X(longitude), Y(latitude), matching_point] # stations_dict_for_obs = { } # keys: obs station id , value: hash id for grid in flo2d_grid_polygon_map: lat = grid[2] lon = grid[1] cell_id = grid[0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } if len(grid) > 3: polygon = grid[3] poly_lat = stations_dict.get(polygon)[1] poly_lon = stations_dict.get(polygon)[0] processed_ts = rainfall_df.loc[ (rainfall_df['latitude'] == poly_lat) & (rainfall_df['longitude'] == poly_lon)][[ 'time', 'value' ]].values.tolist() else: processed_ts = rainfall_df.groupby('time').mean().round( 3)['value'].reset_index().values.tolist() tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) print("grid_id:", meta_data['grid_id']) print(processed_ts) # for i in range(len(obs_timeseries)): # if obs_timeseries[i][1] == -99999: # obs_timeseries[i][1] = 0 if processed_ts is not None and len(processed_ts) > 0: TS.insert_data(timeseries=processed_ts, tms_id=tms_id, upsert=True) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: destroy_Pool(pool=curw_sim_pool) logger.info("Process finished")
def update_rainfall_obs(flo2d_model, method, grid_interpolation, timestep, start_time, end_time): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ obs_start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: # Connect to the database curw_obs_pool = get_Pool(host=con_params.CURW_OBS_HOST, user=con_params.CURW_OBS_USERNAME, password=con_params.CURW_OBS_PASSWORD, port=con_params.CURW_OBS_PORT, db=con_params.CURW_OBS_DATABASE) curw_obs_connection = curw_obs_pool.connection() curw_sim_pool = get_Pool(host=con_params.CURW_SIM_HOST, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, port=con_params.CURW_SIM_PORT, db=con_params.CURW_SIM_DATABASE) TS = Sim_Timeseries(pool=curw_sim_pool) # [hash_id, station_id, station_name, latitude, longitude] # active_obs_stations = read_csv(os.path.join(ROOT_DIR,'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv')) active_obs_stations = extract_active_curw_obs_rainfall_stations( start_time=start_time, end_time=end_time)[1:] flo2d_grids = read_csv( os.path.join(ROOT_DIR, 'grids/flo2d/{}m.csv'.format( flo2d_model))) # [Grid_ ID, X(longitude), Y(latitude)] stations_dict_for_obs = {} # keys: obs station id , value: hash id for obs_index in range(len(active_obs_stations)): stations_dict_for_obs[active_obs_stations[obs_index] [1]] = active_obs_stations[obs_index][0] # flo2d_obs_mapping = get_flo2d_cells_to_obs_grid_mappings(pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) flo2d_obs_mapping = find_nearest_obs_stations_for_flo2d_stations( flo2d_stations_csv=os.path.join( ROOT_DIR, 'grids/flo2d/{}m.csv'.format(flo2d_model)), obs_stations=active_obs_stations, flo2d_model=flo2d_model) for flo2d_index in range(len(flo2d_grids)): lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) print("grid_id:", cell_id) print("grid map:", flo2d_obs_mapping.get(cell_id)) obs1_station_id = flo2d_obs_mapping.get(cell_id)[0] obs2_station_id = flo2d_obs_mapping.get(cell_id)[1] obs3_station_id = flo2d_obs_mapping.get(cell_id)[2] obs_timeseries = [] if timestep == 5: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id, end_time=end_time) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_5_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id, end_time=end_time) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id, end_time=end_time) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) elif timestep == 15: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id, end_time=end_time) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_15_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id, end_time=end_time) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id, end_time=end_time) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) for i in range(len(obs_timeseries)): if obs_timeseries[i][1] == -99999: obs_timeseries[i][1] = 0 print("### obs timeseries length ###", len(obs_timeseries)) if obs_timeseries is not None and len( obs_timeseries) > 0 and obs_timeseries[-1][0] != end_time: obs_timeseries.append( [datetime.strptime(end_time, DATE_TIME_FORMAT), 0]) final_ts = process_continuous_ts(original_ts=obs_timeseries, expected_start=datetime.strptime( start_time, DATE_TIME_FORMAT), filling_value=0, timestep=timestep) if final_ts is not None and len(final_ts) > 0: TS.insert_data(timeseries=final_ts, tms_id=tms_id, upsert=True) TS.update_latest_obs(id_=tms_id, obs_end=(final_ts[-1][1])) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool) logger.info("Process finished")
try: pool = get_Pool(host=con_params.CURW_SIM_HOST, port=con_params.CURW_SIM_PORT, user=con_params.CURW_SIM_USERNAME, password=con_params.CURW_SIM_PASSWORD, db=con_params.CURW_SIM_DATABASE) method = MethodEnum.getAbbreviation(MethodEnum.MME) run_table = "run" data_table = "data" end = (datetime.now() - timedelta(days=51)).strftime("%Y-%m-%d %H:%M:00") hash_ids = flush_common.get_curw_sim_hash_ids(pool=pool, run_table=run_table, model=HecHMS, method=method, obs_end_start=None, obs_end_end=None, grid_id=None) TS = flush_common.Timeseries(pool=pool, run_table=run_table, data_table=data_table) TS_rain = Timeseries(pool) ########################################################################### # Delete run entries without any observed data within last 50 days period # ########################################################################### count = 0 for id in hash_ids: obs_end = TS_rain.get_obs_end(id) if (obs_end is None) or (obs_end.strftime("%Y-%m-%d %H:%M:%S") < end): count += 1 TS.delete_all_by_hash_id(id) print(count, id) hash_ids.remove(id) ##################################################################################################### # delete a specific timeseries defined by a given hash id from data table for specified time period #
def update_rainfall_obs(curw_obs_pool, curw_sim_pool, flo2d_model, method, grid_interpolation, timestep, start_time, end_time): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ # obs_start = datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') try: curw_obs_connection = curw_obs_pool.connection() # [hash_id, station_id, station_name, latitude, longitude] # active_obs_stations = read_csv(os.path.join(ROOT_DIR,'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv')) active_obs_stations = extract_active_curw_obs_rainfall_stations(curw_obs_pool=curw_obs_pool, start_time=start_time, end_time=end_time)[1:] flo2d_grids = read_csv(os.path.join(ROOT_DIR,'grids/flo2d/{}m.csv'.format(flo2d_model))) # [Grid_ ID, X(longitude), Y(latitude)] stations_dict_for_obs = { } # keys: obs station id , value: hash id for obs_index in range(len(active_obs_stations)): stations_dict_for_obs[active_obs_stations[obs_index][1]] = active_obs_stations[obs_index][0] # flo2d_obs_mapping = get_flo2d_cells_to_obs_grid_mappings(pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) flo2d_obs_mapping = find_nearest_obs_stations_for_flo2d_stations( flo2d_stations_csv=os.path.join(ROOT_DIR,'grids/flo2d/{}m.csv'.format(flo2d_model)), obs_stations=active_obs_stations, flo2d_model=flo2d_model) # retrieve observed timeseries obs_df = pd.DataFrame() obs_df['time'] = pd.date_range(start=start_time, end=end_time, freq='5min') for obs_id in stations_dict_for_obs.keys(): ts = extract_obs_rain_5_min_ts(connection=curw_obs_connection, start_time=start_time, id=stations_dict_for_obs.get(obs_id), end_time=end_time) ts.insert(0, ['time', obs_id]) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df[obs_id] = ts_df[obs_id].astype('float64') obs_df = pd.merge(obs_df, ts_df, how="left", on='time') obs_df.set_index('time', inplace=True) obs_df['0'] = 0 if timestep == 15: obs_df = obs_df.resample('15min', label='right', closed='right').sum() TS = Sim_Timeseries(pool=curw_sim_pool) for flo2d_index in range(len(flo2d_grids)): lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) print(datetime.now().strftime(DATE_TIME_FORMAT)) print("grid_id:", cell_id) obs_station_ids = flo2d_obs_mapping.get(cell_id) if len(obs_station_ids) == 1: obs_ts_df = obs_df[obs_station_ids].to_frame(name='final') elif len(obs_station_ids) == 2: obs_ts_df = obs_df[obs_station_ids] obs_ts_df[obs_station_ids[0]] = obs_ts_df[obs_station_ids[0]].fillna(obs_ts_df[obs_station_ids[1]]) obs_ts_df['final'] = obs_ts_df[obs_station_ids[0]] elif len(obs_station_ids) == 3: obs_ts_df = obs_df[obs_station_ids] obs_ts_df[obs_station_ids[1]] = obs_ts_df[obs_station_ids[1]].fillna(obs_ts_df[obs_station_ids[2]]) obs_ts_df[obs_station_ids[0]] = obs_ts_df[obs_station_ids[0]].fillna(obs_ts_df[obs_station_ids[1]]) obs_ts_df['final'] = obs_ts_df[obs_station_ids[0]] else: obs_ts_df = obs_df['0'].to_frame(name='final') final_ts_df = obs_ts_df['final'].reset_index() final_ts_df['time'] = final_ts_df['time'].dt.strftime(DATE_TIME_FORMAT) final_ts = final_ts_df.values.tolist() if final_ts is not None and len(final_ts) > 0: TS.replace_data(timeseries=final_ts, tms_id=tms_id) TS.update_latest_obs(id_=tms_id, obs_end=(final_ts[-1][1])) except Exception as e: traceback.print_exc() logger.error("Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool) logger.info("Process finished")
def prepare_rain(curw_sim_pool, rain_file_path, curw_sim_hash_id, start_time, end_time, target_model): # retrieve observed timeseries df = pd.DataFrame() df['time'] = pd.date_range(start=start_time, end=end_time, freq='5min') TS = Timeseries(curw_sim_pool) ts = TS.get_timeseries(id_=curw_sim_hash_id, start_date=start_time, end_date=end_time) ts.insert(0, ['time', 'value']) ts_df = list_of_lists_to_df_first_row_as_columns(ts) ts_df['value'] = ts_df['value'].astype('float64') df = pd.merge(df, ts_df, how="left", on='time') df.set_index('time', inplace=True) df = df.dropna() if target_model == "flo2d_250": timestep = 5 elif target_model in ("flo2d_150", "flo2d_150_v2"): timestep = 15 else: timestep = 5 if timestep == 15: df = df.resample('15min', label='right', closed='right').sum() df = replace_negative_numbers_with_nan(df) timeseries = df['value'].reset_index().values.tolist() start_time = datetime.strptime(start_time, DATE_TIME_FORMAT) rain_dat = [] total_rain = 0 cumulative_timeseries = [] for i in range(len(timeseries)): total_rain += float(timeseries[i][1]) cumulative_timeseries.append(total_rain) for i in range(len(timeseries)): time_col = '%.3f' % (( (timeseries[i][0] - start_time).total_seconds()) / 3600) if total_rain > 0: rain_col = '%.3f' % (cumulative_timeseries[i] / total_rain) else: rain_col = '%.3f' % (0) rain_dat.append("R " + time_col.ljust(14) + rain_col + " ") rain_dat.insert( 0, " {} 5 0 0 ".format('%.3f' % total_rain)) rain_dat.insert(0, " 0 0 ") write_to_file(rain_file_path, rain_dat)
def update_rainfall_fcsts(flo2d_model, method, grid_interpolation, model_list, timestep): """ Update rainfall forecasts for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param model_list: list of forecast model and their versions used to calculate the rainfall e.g.: [["WRF_E", "v4"],["WRF_SE", "v4"]] :param timestep: output timeseries timestep :return: """ try: # Connect to the database curw_sim_pool = get_Pool(host=CURW_SIM_HOST, user=CURW_SIM_USERNAME, password=CURW_SIM_PASSWORD, port=CURW_SIM_PORT, db=CURW_SIM_DATABASE) curw_fcst_pool = get_Pool(host=CURW_FCST_HOST, user=CURW_FCST_USERNAME, password=CURW_FCST_PASSWORD, port=CURW_FCST_PORT, db=CURW_FCST_DATABASE) Sim_TS = Sim_Timeseries(pool=curw_sim_pool) Fcst_TS = Fcst_Timeseries(pool=curw_fcst_pool) flo2d_grids = read_csv('grids/flo2d/{}m.csv'.format( flo2d_model)) # [Grid_ ID, X(longitude), Y(latitude)] flo2d_wrf_mapping = get_flo2d_cells_to_wrf_grid_mappings( pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) for flo2d_index in range(len(flo2d_grids)): # len(flo2d_grids) lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = Sim_TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = Sim_TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id Sim_TS.insert_run(meta_data=meta_data) obs_end = Sim_TS.get_obs_end(id_=tms_id) fcst_timeseries = [] for i in range(len(model_list)): source_id = get_source_id(pool=curw_fcst_pool, model=model_list[i][0], version=model_list[i][1]) sim_tag = model_list[i][2] coefficient = model_list[i][3] temp_timeseries = [] if timestep == 5: if obs_end is not None: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=flo2d_wrf_mapping. get(meta_data['grid_id']), start=obs_end, source_id=source_id, variable_id=1, unit_id=1)) else: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=flo2d_wrf_mapping. get(meta_data['grid_id']), source_id=source_id, variable_id=1, unit_id=1)) elif timestep == 15: if obs_end is not None: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=flo2d_wrf_mapping.get( meta_data['grid_id']), start=obs_end, source_id=source_id, variable_id=1, unit_id=1) else: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=flo2d_wrf_mapping.get( meta_data['grid_id']), source_id=source_id, variable_id=1, unit_id=1) if coefficient != 1: for j in range(len(temp_timeseries)): temp_timeseries[j][1] = float( temp_timeseries[j][1]) * coefficient if i == 0: fcst_timeseries = temp_timeseries else: fcst_timeseries = append_value_for_timestamp( existing_ts=fcst_timeseries, new_ts=temp_timeseries) sum_timeseries = summed_timeseries(fcst_timeseries) for i in range(len(sum_timeseries)): if float(sum_timeseries[i][1]) < 0: sum_timeseries[i][1] = 0 if sum_timeseries is not None and len(sum_timeseries) > 0: Sim_TS.insert_data(timeseries=sum_timeseries, tms_id=tms_id, upsert=True) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating fcst rainfalls in curw_sim.") finally: destroy_Pool(curw_sim_pool) destroy_Pool(curw_fcst_pool)
def update_rainfall_obs(flo2d_model, method, grid_interpolation, timestep): """ Update rainfall observations for flo2d models :param flo2d_model: flo2d model :param method: value interpolation method :param grid_interpolation: grid interpolation method :param timestep: output timeseries timestep :return: """ now = datetime.now() OBS_START_STRING = (now - timedelta(days=10)).strftime('%Y-%m-%d %H:00:00') OBS_START = datetime.strptime(OBS_START_STRING, '%Y-%m-%d %H:%M:%S') try: # Connect to the database curw_obs_pool = get_Pool(host=CURW_OBS_HOST, user=CURW_OBS_USERNAME, password=CURW_OBS_PASSWORD, port=CURW_OBS_PORT, db=CURW_OBS_DATABASE) curw_obs_connection = curw_obs_pool.connection() curw_sim_pool = get_Pool(host=CURW_SIM_HOST, user=CURW_SIM_USERNAME, password=CURW_SIM_PASSWORD, port=CURW_SIM_PORT, db=CURW_SIM_DATABASE) # test ###### # pool = get_Pool(host=HOST, user=USERNAME, password=PASSWORD, port=PORT, db=DATABASE) TS = Sim_Timeseries(pool=curw_sim_pool) # [hash_id, station_id, station_name, latitude, longitude] active_obs_stations = read_csv( 'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv' ) flo2d_grids = read_csv('grids/flo2d/{}m.csv'.format( flo2d_model)) # [Grid_ ID, X(longitude), Y(latitude)] stations_dict_for_obs = {} # keys: obs station id , value: hash id for obs_index in range(len(active_obs_stations)): stations_dict_for_obs[active_obs_stations[obs_index] [1]] = active_obs_stations[obs_index][0] flo2d_obs_mapping = get_flo2d_cells_to_obs_grid_mappings( pool=curw_sim_pool, grid_interpolation=grid_interpolation, flo2d_model=flo2d_model) for flo2d_index in range(len(flo2d_grids)): obs_start = OBS_START lat = flo2d_grids[flo2d_index][2] lon = flo2d_grids[flo2d_index][1] cell_id = flo2d_grids[flo2d_index][0] meta_data = { 'latitude': float('%.6f' % float(lat)), 'longitude': float('%.6f' % float(lon)), 'model': flo2d_model, 'method': method, 'grid_id': '{}_{}_{}'.format(flo2d_model, grid_interpolation, (str(cell_id)).zfill(10)) } tms_id = TS.get_timeseries_id(grid_id=meta_data.get('grid_id'), method=meta_data.get('method')) if tms_id is None: tms_id = TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id TS.insert_run(meta_data=meta_data) obs_end = TS.get_obs_end(id_=tms_id) if obs_end is not None: obs_start = obs_end - timedelta(hours=1) obs1_station_id = str( flo2d_obs_mapping.get(meta_data['grid_id'])[0]) obs2_station_id = str( flo2d_obs_mapping.get(meta_data['grid_id'])[1]) obs3_station_id = str( flo2d_obs_mapping.get(meta_data['grid_id'])[2]) obs_timeseries = [] if timestep == 5: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_5_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_5_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_5_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) elif timestep == 15: if obs1_station_id != str(-1): obs1_hash_id = stations_dict_for_obs.get(obs1_station_id) ts = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs1_hash_id) if ts is not None and len(ts) > 1: obs_timeseries.extend( process_15_min_ts(newly_extracted_timeseries=ts, expected_start=obs_start)[1:]) # obs_start = ts[-1][0] if obs2_station_id != str(-1): obs2_hash_id = stations_dict_for_obs.get( obs2_station_id) ts2 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs2_hash_id) if ts2 is not None and len(ts2) > 1: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts2, OBS_TS=obs_timeseries) if obs_timeseries is not None and len( obs_timeseries) > 0: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts2, expected_start=expected_start)[1:]) # obs_start = ts2[-1][0] if obs3_station_id != str(-1): obs3_hash_id = stations_dict_for_obs.get( obs3_station_id) ts3 = extract_obs_rain_15_min_ts( connection=curw_obs_connection, start_time=obs_start, id=obs3_hash_id) if ts3 is not None and len(ts3) > 1 and len( obs_timeseries) > 0: obs_timeseries = fill_missing_values( newly_extracted_timeseries=ts3, OBS_TS=obs_timeseries) if obs_timeseries is not None: expected_start = obs_timeseries[-1][0] else: expected_start = obs_start obs_timeseries.extend( process_15_min_ts( newly_extracted_timeseries=ts3, expected_start=expected_start)[1:]) for i in range(len(obs_timeseries)): if obs_timeseries[i][1] == -99999: obs_timeseries[i][1] = 0 if obs_timeseries is not None and len(obs_timeseries) > 0: TS.insert_data(timeseries=obs_timeseries, tms_id=tms_id, upsert=True) TS.update_latest_obs(id_=tms_id, obs_end=(obs_timeseries[-1][1])) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating obs rainfalls in curw_sim.") finally: curw_obs_connection.close() destroy_Pool(pool=curw_sim_pool) destroy_Pool(pool=curw_obs_pool) logger.info("Process finished")
def update_rainfall_fcsts(target_model, method, grid_interpolation, model_list, timestep): """ Update rainfall forecasts for flo2d models :param target_model: target model for which input ins prepared :param method: value interpolation method :param grid_interpolation: grid interpolation method :param model_list: list of forecast model and their versions used to calculate the rainfall e.g.: [["WRF_E", "4.0", "evening_18hrs"],["WRF_SE", "v4", ,"evening_18hrs"],["WRF_Ensemble", "4.0", ,"MME"]] :param timestep: output timeseries timestep :return: """ try: # Connect to the database curw_sim_pool = get_Pool(host=CURW_SIM_HOST, user=CURW_SIM_USERNAME, password=CURW_SIM_PASSWORD, port=CURW_SIM_PORT, db=CURW_SIM_DATABASE) curw_fcst_pool = get_Pool(host=CURW_FCST_HOST, user=CURW_FCST_USERNAME, password=CURW_FCST_PASSWORD, port=CURW_FCST_PORT, db=CURW_FCST_DATABASE) Sim_TS = Sim_Timeseries(pool=curw_sim_pool) Fcst_TS = Fcst_Timeseries(pool=curw_fcst_pool) # [hash_id, station_id, station_name, latitude, longitude] active_obs_stations = read_csv( 'grids/obs_stations/rainfall/curw_active_rainfall_obs_stations.csv' ) obs_stations_dict = { } # keys: obs station id , value: [name, latitude, longitude] for obs_index in range(len(active_obs_stations)): obs_stations_dict[active_obs_stations[obs_index][1]] = [ active_obs_stations[obs_index][2], active_obs_stations[obs_index][3], active_obs_stations[obs_index][4] ] obs_d03_mapping = get_obs_to_d03_grid_mappings_for_rainfall( pool=curw_sim_pool, grid_interpolation=grid_interpolation) for obs_id in obs_stations_dict.keys(): meta_data = { 'latitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[1])), 'longitude': float('%.6f' % float(obs_stations_dict.get(obs_id)[2])), 'model': target_model, 'method': method, 'grid_id': 'rainfall_{}_{}_{}'.format(obs_id, obs_stations_dict.get(obs_id)[0], grid_interpolation) } tms_id = Sim_TS.get_timeseries_id_if_exists(meta_data=meta_data) if tms_id is None: tms_id = Sim_TS.generate_timeseries_id(meta_data=meta_data) meta_data['id'] = tms_id Sim_TS.insert_run(meta_data=meta_data) obs_end = Sim_TS.get_obs_end(id_=tms_id) fcst_timeseries = [] for i in range(len(model_list)): source_id = get_source_id(pool=curw_fcst_pool, model=model_list[i][0], version=model_list[i][1]) sim_tag = model_list[i][2] coefficient = model_list[i][3] temp_timeseries = [] if timestep == 5: if obs_end is not None: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=obs_d03_mapping. get(meta_data['grid_id'])[0], start=obs_end, source_id=source_id, variable_id=1, unit_id=1)) else: temp_timeseries = convert_15_min_ts_to_5_mins_ts( newly_extracted_timeseries=Fcst_TS. get_latest_timeseries(sim_tag=sim_tag, station_id=obs_d03_mapping. get(meta_data['grid_id'])[0], source_id=source_id, variable_id=1, unit_id=1)) elif timestep == 15: if obs_end is not None: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=obs_d03_mapping.get( meta_data['grid_id'])[0], start=obs_end, source_id=source_id, variable_id=1, unit_id=1) else: temp_timeseries = Fcst_TS.get_latest_timeseries( sim_tag=sim_tag, station_id=obs_d03_mapping.get( meta_data['grid_id'])[0], source_id=source_id, variable_id=1, unit_id=1) if coefficient != 1: for j in range(len(temp_timeseries)): temp_timeseries[j][1] = float( temp_timeseries[j][1]) * coefficient if i == 0: fcst_timeseries = temp_timeseries else: fcst_timeseries = append_value_for_timestamp( existing_ts=fcst_timeseries, new_ts=temp_timeseries) sum_timeseries = summed_timeseries(fcst_timeseries) for i in range(len(sum_timeseries)): if float(sum_timeseries[i][1]) < 0: sum_timeseries[i][1] = 0 if sum_timeseries is not None and len(sum_timeseries) > 0: Sim_TS.insert_data(timeseries=sum_timeseries, tms_id=tms_id, upsert=True) except Exception as e: traceback.print_exc() logger.error( "Exception occurred while updating fcst rainfalls in curw_sim.") finally: destroy_Pool(curw_sim_pool) destroy_Pool(curw_fcst_pool)