def __init__(self, leadTimeLabel, leadTimeValue, countryCodeISO3, model): self.leadTimeLabel = leadTimeLabel self.leadTimeValue = leadTimeValue self.db = DatabaseManager(leadTimeLabel, countryCodeISO3) self.admin_area_gdf = self.db.downloadGeoDataFromDb( 'IBF-app', 'adminArea', countryCodeISO3=countryCodeISO3) if model == 'glofas': self.glofas_stations = self.db.apiGetRequest( 'glofasStations', countryCodeISO3=countryCodeISO3) self.district_mapping = self.db.apiGetRequest( 'adminAreas/station-mapping', countryCodeISO3=countryCodeISO3) self.glofasData = GlofasData(leadTimeLabel, leadTimeValue, countryCodeISO3, self.glofas_stations, self.district_mapping) self.floodExtent = FloodExtent(leadTimeLabel, leadTimeValue, countryCodeISO3, self.district_mapping, self.admin_area_gdf) self.exposure = Exposure(leadTimeLabel, countryCodeISO3, self.admin_area_gdf, self.district_mapping) if model == 'rainfall': self.rainfall_triggers = self.db.apiGetRequest( 'rainfallTriggers', countryCodeISO3=countryCodeISO3) self.rainfallData = RainfallData(leadTimeLabel, leadTimeValue, countryCodeISO3, self.admin_area_gdf, self.rainfall_triggers) self.exposure = Exposure(leadTimeLabel, countryCodeISO3, self.admin_area_gdf)
def __init__(self, leadTimeLabel, leadTimeValue, countryCodeISO3, glofas_stations, district_mapping): self.db = DatabaseManager(leadTimeLabel, countryCodeISO3) self.leadTimeLabel = leadTimeLabel self.leadTimeValue = leadTimeValue self.countryCodeISO3 = countryCodeISO3 self.inputPath = PIPELINE_DATA + 'input/glofas/' self.triggerPerDay = PIPELINE_OUTPUT + \ 'triggers_rp_per_station/trigger_per_day_' + countryCodeISO3 + '.json' self.extractedGlofasPath = PIPELINE_OUTPUT + \ 'glofas_extraction/glofas_forecast_' + \ self.leadTimeLabel + '_' + countryCodeISO3 + '.json' self.triggersPerStationPath = PIPELINE_OUTPUT + \ 'triggers_rp_per_station/triggers_rp_' + \ self.leadTimeLabel + '_' + countryCodeISO3 + '.json' self.GLOFAS_STATIONS = glofas_stations self.DISTRICT_MAPPING = district_mapping self.current_date = CURRENT_DATE.strftime('%Y%m%d')
class Forecast: def __init__(self, leadTimeLabel, leadTimeValue, countryCodeISO3, model): self.leadTimeLabel = leadTimeLabel self.leadTimeValue = leadTimeValue self.db = DatabaseManager(leadTimeLabel, countryCodeISO3) admin_area_json = self.db.apiGetRequest( 'admin-areas/raw', countryCodeISO3=countryCodeISO3) for index in range(len(admin_area_json)): admin_area_json[index]['geometry'] = admin_area_json[index]['geom'] admin_area_json[index]['properties'] = { 'placeCode': admin_area_json[index]['placeCode'], 'name': admin_area_json[index]['name'] } self.admin_area_gdf = geopandas.GeoDataFrame.from_features( admin_area_json) if model == 'glofas': self.glofas_stations = self.db.apiGetRequest( 'glofas-stations', countryCodeISO3=countryCodeISO3) self.district_mapping = self.db.apiGetRequest( 'admin-areas/raw', countryCodeISO3=countryCodeISO3) self.glofasData = GlofasData(leadTimeLabel, leadTimeValue, countryCodeISO3, self.glofas_stations, self.district_mapping) self.floodExtent = FloodExtent(leadTimeLabel, leadTimeValue, countryCodeISO3, self.district_mapping, self.admin_area_gdf) self.exposure = Exposure(leadTimeLabel, countryCodeISO3, self.admin_area_gdf, self.district_mapping) if model == 'rainfall': self.rainfall_triggers = self.db.apiGetRequest( 'rainfall-triggers', countryCodeISO3=countryCodeISO3) self.rainfallData = RainfallData(leadTimeLabel, leadTimeValue, countryCodeISO3, self.admin_area_gdf, self.rainfall_triggers) self.exposure = Exposure(leadTimeLabel, countryCodeISO3, self.admin_area_gdf)
class GlofasData: def __init__(self, leadTimeLabel, leadTimeValue, countryCodeISO3, glofas_stations, district_mapping): self.db = DatabaseManager(leadTimeLabel, countryCodeISO3) self.leadTimeLabel = leadTimeLabel self.leadTimeValue = leadTimeValue self.countryCodeISO3 = countryCodeISO3 self.inputPath = PIPELINE_DATA + 'input/glofas/' self.triggerPerDay = PIPELINE_OUTPUT + \ 'triggers_rp_per_station/trigger_per_day_' + countryCodeISO3 + '.json' self.extractedGlofasPath = PIPELINE_OUTPUT + \ 'glofas_extraction/glofas_forecast_' + \ self.leadTimeLabel + '_' + countryCodeISO3 + '.json' self.triggersPerStationPath = PIPELINE_OUTPUT + \ 'triggers_rp_per_station/triggers_rp_' + \ self.leadTimeLabel + '_' + countryCodeISO3 + '.json' self.GLOFAS_STATIONS = glofas_stations self.DISTRICT_MAPPING = district_mapping self.current_date = CURRENT_DATE.strftime('%Y%m%d') def process(self): if SETTINGS_SECRET[self.countryCodeISO3]['mock'] == False: self.removeOldGlofasData() self.download() if SETTINGS_SECRET[self.countryCodeISO3]['mock'] == True: self.extractMockData() else: self.extractGlofasData() self.findTrigger() def removeOldGlofasData(self): for f in [f for f in os.listdir(self.inputPath)]: os.remove(os.path.join(self.inputPath, f)) def download(self): downloadDone = False timeToTryDownload = 43200 timeToRetry = 600 start = time.time() end = start + timeToTryDownload while downloadDone == False and time.time() < end: try: self.getGlofasData() downloadDone = True except Exception as exception: error = 'Download data failed. Trying again in {} minutes.\n{}'.format( timeToRetry // 60, exception) print(error) logger.info(error) time.sleep(timeToRetry) if downloadDone == False: raise ValueError('GLofas download failed for ' + str(timeToTryDownload / 3600) + ' hours, no new dataset was found') def getGlofasData(self): filename = GLOFAS_FILENAME + '_' + self.current_date + '00.tar.gz' path = 'glofas/' + filename glofasDataFile = self.db.getDataFromDatalake(path) if glofasDataFile.status_code >= 400: raise ValueError() open(self.inputPath + filename, 'wb').write(glofasDataFile.content) tar = tarfile.open(self.inputPath + filename, "r:gz") tar.extractall(self.inputPath) tar.close() def extractGlofasData(self): print('\nExtracting Glofas (FTP) Data\n') files = [ f for f in listdir(self.inputPath) if isfile(join(self.inputPath, f)) and f.endswith('.nc') ] df_thresholds = pd.read_json(json.dumps(self.GLOFAS_STATIONS)) df_thresholds = df_thresholds.set_index("stationCode", drop=False) df_district_mapping = pd.read_json(json.dumps(self.DISTRICT_MAPPING)) df_district_mapping = df_district_mapping.set_index("glofasStation", drop=False) stations = [] trigger_per_day = { '1-day': False, '2-day': False, '3-day': False, '4-day': False, '5-day': False, '6-day': False, '7-day': False, } for i in range(0, len(files)): logging.info("Extracting glofas data from %s", i) Filename = os.path.join(self.inputPath, files[i]) # Skip old stations > need to be removed from FTP if 'G5230_Na_ZambiaRedcross' in Filename or 'G5196_Uganda_Gauge' in Filename: continue station = {} station['code'] = files[i].split('_')[2] data = xr.open_dataset(Filename) # Get threshold for this specific station if station['code'] in df_thresholds['stationCode'] and station[ 'code'] in df_district_mapping['glofasStation']: print(Filename) threshold = df_thresholds[df_thresholds['stationCode'] == station['code']][TRIGGER_LEVEL][0] # Set dimension-values time = 0 for step in range(1, 8): # Loop through 51 ensembles, get forecast and compare to threshold ensemble_options = 51 count = 0 dis_sum = 0 for ensemble in range(0, ensemble_options): discharge = data['dis'].sel(ensemble=ensemble, step=step).values[time][0] if discharge >= threshold: count = count + 1 dis_sum = dis_sum + discharge prob = count / ensemble_options dis_avg = dis_sum / ensemble_options station['fc'] = dis_avg station['fc_prob'] = prob station['fc_trigger'] = 1 if prob > TRIGGER_LEVELS[ 'minimum'] else 0 if station['fc_trigger'] == 1: trigger_per_day[str(step) + '-day'] = True if step == self.leadTimeValue: stations.append(station) station = {} station['code'] = files[i].split('_')[2] data.close() # Add 'no_station' for station_code in ['no_station']: station = {} station['code'] = station_code station['fc'] = 0 station['fc_prob'] = 0 station['fc_trigger'] = 0 stations.append(station) with open(self.extractedGlofasPath, 'w') as fp: json.dump(stations, fp) print('Extracted Glofas data - File saved') with open(self.triggerPerDay, 'w') as fp: json.dump([trigger_per_day], fp) print('Extracted Glofas data - Trigger per day File saved') def extractMockData(self): print('\nExtracting Glofas (mock) Data\n') # Load input data df_thresholds = pd.read_json(json.dumps(self.GLOFAS_STATIONS)) df_thresholds = df_thresholds.set_index("stationCode", drop=False) df_district_mapping = pd.read_json(json.dumps(self.DISTRICT_MAPPING)) df_district_mapping = df_district_mapping.set_index("glofasStation", drop=False) # Set up variables to fill stations = [] trigger_per_day = { '1-day': False, '2-day': False, '3-day': False, '4-day': False, '5-day': False, '6-day': False, '7-day': False, } for index, row in df_thresholds.iterrows(): station = {} station['code'] = row['stationCode'] if station['code'] in df_district_mapping[ 'glofasStation'] and station['code'] != 'no_station': print(station['code']) threshold = df_thresholds[df_thresholds['stationCode'] == station['code']][TRIGGER_LEVEL][0] for step in range(1, 8): # Loop through 51 ensembles, get forecast and compare to threshold ensemble_options = 51 count = 0 dis_sum = 0 for ensemble in range(1, ensemble_options): # MOCK OVERWRITE DEPENDING ON COUNTRY SETTING if SETTINGS_SECRET[self.countryCodeISO3][ 'if_mock_trigger'] == True: if step < 5: # Only dummy trigger for 5-day and above discharge = 0 elif station[ 'code'] == 'G5220': # UGA dummy flood station 1 discharge = 600 elif station[ 'code'] == 'G1067': # ETH dummy flood station 1 discharge = 1000 elif station[ 'code'] == 'G1904': # ETH dummy flood station 2 discharge = 2000 elif station[ 'code'] == 'G5194': # KEN dummy flood station discharge = 2000 elif station[ 'code'] == 'G1361': # ZMB dummy flood station 1 discharge = 8000 elif station[ 'code'] == 'G1328': # ZMB dummy flood station 2 discharge = 9000 elif station[ 'code'] == 'G1319': # ZMB dummy flood station 3 discharge = 1400 else: discharge = 0 else: discharge = 0 if discharge >= threshold: count = count + 1 dis_sum = dis_sum + discharge prob = count / ensemble_options dis_avg = dis_sum / ensemble_options station['fc'] = dis_avg station['fc_prob'] = prob station['fc_trigger'] = 1 if prob > TRIGGER_LEVELS[ 'minimum'] else 0 if station['fc_trigger'] == 1: trigger_per_day[str(step) + '-day'] = True if step == self.leadTimeValue: stations.append(station) station = {} station['code'] = row['stationCode'] # Add 'no_station' for station_code in ['no_station']: station = {} station['code'] = station_code station['fc'] = 0 station['fc_prob'] = 0 station['fc_trigger'] = 0 stations.append(station) with open(self.extractedGlofasPath, 'w') as fp: json.dump(stations, fp) print('Extracted Glofas data - File saved') with open(self.triggerPerDay, 'w') as fp: json.dump([trigger_per_day], fp) print('Extracted Glofas data - Trigger per day File saved') def findTrigger(self): logging.info("Started processing glofas data: " + self.leadTimeLabel) # Load (static) threshold values per station df_thresholds = pd.read_json(json.dumps(self.GLOFAS_STATIONS)) df_thresholds = df_thresholds.set_index("stationCode", drop=False) df_thresholds.sort_index(inplace=True) # Load extracted Glofas discharge levels per station with open(self.extractedGlofasPath) as json_data: d = json.load(json_data) df_discharge = pd.DataFrame(d) df_discharge.index = df_discharge['code'] df_discharge.sort_index(inplace=True) # Merge two datasets df = pd.merge(df_thresholds, df_discharge, left_index=True, right_index=True) del df['lat'] del df['lon'] # Dtermine trigger + return period per water station for index, row in df.iterrows(): fc = float(row['fc']) trigger = int(row['fc_trigger']) if trigger == 1: if self.countryCodeISO3 == 'ZMB': if fc >= row['threshold20Year']: return_period_flood_extent = 20 else: return_period_flood_extent = 10 else: return_period_flood_extent = 25 else: return_period_flood_extent = None if fc >= row['threshold20Year']: return_period = 20 elif fc >= row['threshold10Year']: return_period = 10 elif fc >= row['threshold5Year']: return_period = 5 elif fc >= row['threshold2Year']: return_period = 2 else: return_period = None df.at[index, 'fc_rp_flood_extent'] = return_period_flood_extent df.at[index, 'fc_rp'] = return_period out = df.to_json(orient='records') with open(self.triggersPerStationPath, 'w') as fp: fp.write(out) print('Processed Glofas data - File saved')