def generate_warnings(self): """ Generates and yields a series of marine weather warnings for a given marine weather area. Warnings are returned as Elasticsearch bulk API upsert actions, with a single document for the marine weather region in GeoJSON to match the Elasticsearch index mappings. :returns: Generator of Elasticsearch actions to upsert the marine weather warnings. """ warnings = self.root.findall('warnings/') feature = {'type': 'Feature', 'geometry': {}, 'properties': {}} feature['geometry'] = self.area['geometry'] feature['properties']['area_{}'.format( self.language)] = self.area['name'] feature['properties']['region_{}'.format( self.language)] = self.area['region'] feature['properties']['sub_region_{}'.format( self.language)] = self.area['subRegion'] feature['properties']['warnings_{}'.format(self.language)] = [] if len(warnings) > 0: for elem in warnings: datetimes = self.create_datetime_dict( elem.findall('event/' 'dateTime')) location = { 'location_{}'.format(self.language): elem.attrib['name'], 'issued_datetime_utc_{}'.format(self.language): strftime_rfc3339(datetimes['utc']), 'issued_datetime_local_{}'.format(self.language): strftime_rfc3339(datetimes['local']), 'event_type_{}'.format(self.language): elem.find('event').attrib['type'], 'event_category_{}'.format(self.language): elem.find('event').attrib['category'], 'event_name_{}'.format(self.language): elem.find('event').attrib['name'], 'event_status_{}'.format(self.language): elem.find('event').attrib['status'], } feature['properties']['warnings_{}'.format( self.language)].append(location) self.items.append(feature) action = { '_id': self.filepath.stem.split('_')[0], '_index': 'marine_weather_warnings', '_op_type': 'update', 'doc': feature, 'doc_as_upsert': True, } yield action
def generate_geojson_features(self): """ Generates and yields a series of storm forecasts, one for each feature in <self.filepath>. Observations are returned as Elasticsearch bulk API upsert actions, with documents in GeoJSON to match the Elasticsearch index mappings. :returns: Generator of Elasticsearch actions to upsert the storm forecasts """ driver = ogr.GetDriverByName('ESRI Shapefile') filepath = str(self.filepath.resolve()) data = driver.Open(filepath, 0) lyr = data.GetLayer(0) file_datetime_str = strftime_rfc3339(self.date_) for feature in lyr: feature_json = feature.ExportToJson(as_object=True) feature_json['properties']['active'] = True feature_json['properties']['filename'] = self.filepath.stem feature_json['properties']['filedate'] = file_datetime_str # noqa # TODO: Remove once upstream data is patched # clean rad consecutive coordinates in geometry (temporary fix) if self.storm_variable == 'rad': feature_json['geometry'][ 'coordinates'] = self.clean_consecutive_coordinates( feature_json['geometry']['coordinates']) # format pts ADVDATE if self.storm_variable == 'pts': feature_json['properties']['ADVDATE'] = \ strftime_rfc3339( datetime.strptime( feature_json['properties']['ADVDATE'], '%y%m%d/%H%M' ) ) self.items.append(feature_json) action = { '_id': '{}-{}-{}-{}-{}'.format(self.storm_name, self.storm_variable, file_datetime_str, self.fh, feature_json['id']), '_index': INDEX_NAME.format(self.storm_variable), '_op_type': 'update', 'doc': feature_json, 'doc_as_upsert': True } yield action
def generate_extended_forecasts(self): """ Generates and yields a series of marine weather extended forecasts for a given marine weather area. Each extended forecast is returned as Elasticsearch bulk API upsert actions, with documents in GeoJSON to match the Elasticsearch index mappings. :returns: Generator of Elasticsearch actions to upsert the marine weather extended forecast. """ extended_forecasts = self.root.findall('extendedForecast/') feature = {'type': 'Feature', 'geometry': {}, 'properties': {}} feature['geometry'] = self.area['geometry'] feature['properties']['area_{}'.format(self.language)] = self.area[ 'name' ] feature['properties']['region_{}'.format(self.language)] = self.area[ 'region' ] feature['properties'][ 'sub_region_{}'.format(self.language) ] = self.area['subRegion'] feature['properties'][ 'extended_forecasts_{}'.format(self.language) ] = [] if len(extended_forecasts) > 0: datetimes = self.create_datetime_dict( [ element for element in extended_forecasts if element.tag == 'dateTime' ] ) feature['properties']['issued_datetime_utc'] = strftime_rfc3339( datetimes['utc'] ) feature['properties']['issued_datetime_local'] = strftime_rfc3339( datetimes['local'] ) locations = [ element for element in extended_forecasts if element.tag == 'location' ] for location in locations: location = { 'location_{}'.format(self.language): location.attrib[ 'name' ] if 'name' in location.attrib else self.area['name'], 'forecast_periods_{}'.format(self.language): [ { 'forecast_period_{}'.format( self.language ): forecast_period.attrib['name'], 'forecast_{}'.format( self.language ): forecast_period.text, } for forecast_period in location.findall( 'weatherCondition/' ) if location.findall('weatherCondition/') is not None ], 'status_statement_{}'.format(self.language): location.find( 'statusStatement' ).text if location.find('statusStatement') is not None else None, } feature['properties'][ 'extended_forecasts_{}'.format(self.language) ].append(location) self.items.append(feature) action = { '_id': self.filepath.stem.split('_')[0], '_index': 'marine_weather_extended-forecasts', '_op_type': 'update', 'doc': feature, 'doc_as_upsert': True, } yield action
def generate_daily_snow_extremes(self): """ Queries stations data from the db, and reformats data so it can be inserted into Elasticsearch. Returns a generator of dictionaries that represent upsert actions into Elasticsearch's bulk API. :param cur: oracle cursor to perform queries against. :returns: generator of bulk API upsert actions. """ try: self.cur.execute( ( "SELECT t1.*, t2.* " "FROM ARKEON2DWH.RECORD_VIRTUAL_SNOWFALL t1 " "JOIN ARKEON2DWH.EXTREME_VIRTUAL_SNOWFALL t2 " "ON t1.VIRTUAL_CLIMATE_ID = t2.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t2.LOCAL_MONTH " "AND t1.LOCAL_DAY = t2.LOCAL_DAY " ) ) except Exception as err: LOGGER.error( 'Could not fetch records from oracle due to: {}.'.format( str(err) ) ) stations_dict = {} for row in self.cur: insert_dict = dict(zip([x[0] for x in self.cur.description], row)) for key in insert_dict: if key in ['LAST_UPDATED']: insert_dict[key] = ( strftime_rfc3339(insert_dict[key]) if insert_dict[key] is not None else insert_dict[key] ) virtual_climate_id = insert_dict['VIRTUAL_CLIMATE_ID'] es_id = '{}-{}-{}'.format( insert_dict['VIRTUAL_CLIMATE_ID'], insert_dict["LOCAL_MONTH"], insert_dict["LOCAL_DAY"], ) # check if we have station IDs record begin and end if not retrieve if virtual_climate_id not in stations_dict: stations_dict[virtual_climate_id] = self.get_stations_info( 'DAILY TOTAL SNOWFALL', virtual_climate_id ) insert_dict['RECORD_BEGIN'] = stations_dict[virtual_climate_id][ 'record_begin' ] insert_dict['RECORD_END'] = stations_dict[virtual_climate_id][ 'record_end' ] insert_dict['CLIMATE_IDENTIFIER'] = stations_dict[ virtual_climate_id ]['climate_identifier'] insert_dict['ENG_STN_NAME'] = stations_dict[virtual_climate_id][ 'eng_stn_name' ] insert_dict['FRE_STN_NAME'] = stations_dict[virtual_climate_id][ 'fre_stn_name' ] insert_dict['PROVINCE_CODE'] = stations_dict[virtual_climate_id][ 'province_code' ] # cleanup unwanted fields retained from SQL join fields_to_delete = [ 'LOCAL_TIME', 'VIRTUAL_MEAS_DISPLAY_CODE', 'ENG_STN_NAME', 'FRE_STN_NAME', 'CLIMATE_IDENTIFIER', 'LAST_UPDATED', ] for field in fields_to_delete: insert_dict.pop(field) # set properties.IDENTIFIER insert_dict['IDENTIFIER'] = es_id wrapper = { 'id': es_id, 'type': 'Feature', 'properties': insert_dict, 'geometry': { 'type': 'Point', 'coordinates': stations_dict[virtual_climate_id]['coords'], }, } action = { '_id': es_id, '_index': 'ltce_snow_extremes', '_op_type': 'update', 'doc': wrapper, 'doc_as_upsert': True, } yield action
def generate_daily_temp_extremes(self): """ Queries stations data from the db, and reformats data so it can be inserted into Elasticsearch. Returns a generator of dictionaries that represent upsert actions into Elasticsearch's bulk API. :param cur: oracle cursor to perform queries against. :returns: generator of bulk API upsert actions. """ try: self.cur.execute( ( "SELECT t1.*, t2.*, t3.*, t4.*, t5.*, t6.*, t7.*, t8.* " "FROM ARKEON2DWH.RECORD_HIGH_VIRTUAL_MAX_TEMP t1 " "JOIN ARKEON2DWH.RECORD_LOW_VIRTUAL_MAX_TEMP t2 " "ON t1.VIRTUAL_CLIMATE_ID = t2.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t2.LOCAL_MONTH " "AND t1.LOCAL_DAY = t2.LOCAL_DAY " "JOIN ARKEON2DWH.RECORD_LOW_VIRTUAL_MIN_TEMP t3 " "ON t1.VIRTUAL_CLIMATE_ID = t3.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t3.LOCAL_MONTH " "AND t1.LOCAL_DAY = t3.LOCAL_DAY " "JOIN ARKEON2DWH.RECORD_HIGH_VIRTUAL_MIN_TEMP t4 " "ON t1.VIRTUAL_CLIMATE_ID = t4.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t4.LOCAL_MONTH " "AND t1.LOCAL_DAY = t4.LOCAL_DAY " "JOIN ARKEON2DWH.EXTREME_HIGH_VIRTUAL_MAX_TEMP t5 " "ON t1.VIRTUAL_CLIMATE_ID = t5.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t5.LOCAL_MONTH " "AND t1.LOCAL_DAY = t5.LOCAL_DAY " "JOIN ARKEON2DWH.EXTREME_LOW_VIRTUAL_MAX_TEMP t6 " "ON t1.VIRTUAL_CLIMATE_ID = t6.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t6.LOCAL_MONTH " "AND t1.LOCAL_DAY = t6.LOCAL_DAY " "JOIN ARKEON2DWH.EXTREME_HIGH_VIRTUAL_MIN_TEMP t7 " "ON t1.VIRTUAL_CLIMATE_ID = t7.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t7.LOCAL_MONTH " "AND t1.LOCAL_DAY = t7.LOCAL_DAY " "JOIN ARKEON2DWH.EXTREME_LOW_VIRTUAL_MIN_TEMP t8 " "ON t1.VIRTUAL_CLIMATE_ID = t8.VIRTUAL_CLIMATE_ID " "AND t1.LOCAL_MONTH = t8.LOCAL_MONTH " "AND t1.LOCAL_DAY = t8.LOCAL_DAY " ) ) except Exception as err: LOGGER.error( 'Could not fetch records from oracle due to: {}.'.format( str(err) ) ) # dictionnary to store stations information once retrieved stations_dict = {} for row in self.cur: insert_dict = dict(zip([x[0] for x in self.cur.description], row)) for key in insert_dict: if key in ['LAST_UPDATED']: insert_dict[key] = ( strftime_rfc3339(insert_dict[key]) if insert_dict[key] is not None else insert_dict[key] ) virtual_climate_id = insert_dict['VIRTUAL_CLIMATE_ID'] es_id = '{}-{}-{}'.format( insert_dict['VIRTUAL_CLIMATE_ID'], insert_dict["LOCAL_MONTH"], insert_dict["LOCAL_DAY"], ) # check if we have station IDs record begin and end. If not # retrieve the information and store in stations_dict if virtual_climate_id not in stations_dict: stations_dict[virtual_climate_id] = {} stations_dict[virtual_climate_id][ 'MIN' ] = self.get_stations_info( 'DAILY MINIMUM TEMPERATURE', virtual_climate_id ) stations_dict[virtual_climate_id][ 'MAX' ] = self.get_stations_info( 'DAILY MAXIMUM TEMPERATURE', virtual_climate_id ) # check if TEMEPERATURE MIN/MAX for most recent threaded station # have same climate identifier value min_climate_identifier = stations_dict[virtual_climate_id]['MIN'][ 'climate_identifier' ] max_climate_identifier = stations_dict[virtual_climate_id]['MAX'][ 'climate_identifier' ] if min_climate_identifier == max_climate_identifier: insert_dict['CLIMATE_IDENTIFIER'] = stations_dict[ virtual_climate_id ]['MAX']['climate_identifier'] insert_dict['ENG_STN_NAME'] = stations_dict[ virtual_climate_id ]['MAX']['eng_stn_name'] insert_dict['FRE_STN_NAME'] = stations_dict[ virtual_climate_id ]['MAX']['fre_stn_name'] insert_dict['PROVINCE_CODE'] = stations_dict[ virtual_climate_id ]['MAX']['province_code'] else: LOGGER.error( f'Currently threaded station climate identifier value ' f'does not match between DAILY MINIMUM TEMPERATURE' f'({min_climate_identifier}) and DAILY MAXIMUM ' f'TEMPERATURE({max_climate_identifier}) station threads ' f'for virtual climate ID {virtual_climate_id}.' ) continue # set new fields for level in ['MIN', 'MAX']: # set new insert_dict keys insert_dict[ '{}_TEMP_RECORD_BEGIN'.format(level) ] = stations_dict[virtual_climate_id][level]['record_begin'] insert_dict[ '{}_TEMP_RECORD_END'.format(level) ] = stations_dict[virtual_climate_id][level]['record_end'] # cleanup unwanted fields retained from SQL join fields_to_delete = [ 'LOCAL_TIME', 'VIRTUAL_MEAS_DISPLAY_CODE', 'ENG_STN_NAME', 'FRE_STN_NAME', 'CLIMATE_IDENTIFIER', ] for field in fields_to_delete: insert_dict.pop(field) # set properties.IDENTIFIER insert_dict['IDENTIFIER'] = es_id wrapper = { 'id': es_id, 'type': 'Feature', 'properties': insert_dict, 'geometry': { 'type': 'Point', 'coordinates': stations_dict[virtual_climate_id]['MAX'][ 'coords' ], }, } action = { '_id': es_id, '_index': 'ltce_temp_extremes', '_op_type': 'update', 'doc': wrapper, 'doc_as_upsert': True, } yield action
def generate_stations(self): """ Queries stations data from the db, and reformats data so it can be inserted into Elasticsearch. Returns a generator of dictionaries that represent upsert actions into Elasticsearch's bulk API. :param cur: oracle cursor to perform queries against. :returns: generator of bulk API upsert actions. """ try: self.cur.execute( ( "SELECT ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.*," "ARKEON2DWH.STATION_INFORMATION.ENG_STN_NAME," "ARKEON2DWH.STATION_INFORMATION.FRE_STN_NAME," "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.LAT," "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.LON," "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.PROVINCECODE " "FROM ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW " "LEFT JOIN ARKEON2DWH.STATION_INFORMATION " "ON ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.STN_ID = " "ARKEON2DWH.STATION_INFORMATION.STN_ID " "LEFT JOIN ARKEON2DWH.WXO_CITY_INFORMATION_MVW " "ON ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.WXO_CITY_CODE = " "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.CITYCODE " "WHERE " "ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.ELEMENT_NAME_E IN " "('DAILY MINIMUM TEMPERATURE', 'DAILY MAXIMUM TEMPERATURE'," # noqa "'DAILY TOTAL PRECIPITATION', 'DAILY TOTAL SNOWFALL')" ) ) except Exception as err: LOGGER.error( 'Could not fetch records from oracle due to: {}.'.format( str(err) ) ) for row in self.cur: insert_dict = dict(zip([x[0] for x in self.cur.description], row)) for key in insert_dict: if key in ['START_DATE', 'END_DATE']: insert_dict[key] = ( strftime_rfc3339(insert_dict[key]) if insert_dict[key] is not None else insert_dict[key] ) es_id = slugify( '{}-{}-{}-{}-{}'.format( insert_dict['VIRTUAL_CLIMATE_ID'], insert_dict["ELEMENT_NAME_E"], insert_dict["CLIMATE_IDENTIFIER"], insert_dict["START_DATE"], insert_dict["END_DATE"], ) ) coords = [ float(insert_dict['LON']), float(insert_dict['LAT']), ] # rename PROVINCECODE field to PROVINCE_CODE insert_dict['PROVINCE_CODE'] = insert_dict['PROVINCECODE'] # cleanup unwanted fields retained from SQL join fields_to_delete = [ 'STN_ID', 'ENG_PROV_NAME', 'FRE_PROV_NAME', 'REGION_CODE', 'CRITERIA', 'NOTES', 'VIRTUAL_STN_INFO_UPDATE_ID', 'CURRENT_FLAG', 'LON', 'LAT', 'PROVINCECODE', ] for field in fields_to_delete: insert_dict.pop(field) # set properties.IDENTIFIER insert_dict['IDENTIFIER'] = es_id wrapper = { 'id': es_id, 'type': 'Feature', 'properties': insert_dict, 'geometry': {'type': 'Point', 'coordinates': coords}, } action = { '_id': es_id, '_index': 'ltce_stations', '_op_type': 'update', 'doc': wrapper, 'doc_as_upsert': True, } yield action
def get_stations_info(self, element_name, station_id): """ Queries LTCE station data for a given element name (DAILY MINIMUM TEMPERATURE, DAILY MAXIMUM TEMPERATURE, etc.), and virtual station ID. Returns the ealiest start date of all returned stations and the end date climate identifier, and coordinates of the most recently threaded station. :param element_name: `str` of element name :param station_id: `str` of virtual climate station id :return: `dict` of stations information """ query = { "query": { "bool": { "filter": { "bool": { "must": [ { "term": { "properties.VIRTUAL_CLIMATE_ID.raw": station_id # noqa } }, { "term": { "properties.ELEMENT_NAME_E.raw": element_name # noqa } }, ] } } } } } results = self.conn.Elasticsearch.search( body=query, index='ltce_stations', _source=[ 'properties.CLIMATE_IDENTIFIER', 'properties.ENG_STN_NAME', 'properties.FRE_STN_NAME', 'properties.START_DATE', 'properties.END_DATE', 'properties.PROVINCE_CODE', 'geometry.coordinates', ], ) results = [result['_source'] for result in results['hits']['hits']] oldest_station = None most_recent_station = None for index, station in enumerate(results): # retrieve station start and end date dates = ( station['properties']['START_DATE'], station['properties']['END_DATE'], ) # convert station dates to datetime objects ( station['properties']['START_DATE'], station['properties']['END_DATE'], ) = (start_date, end_date) = [ datetime.strptime(date, DATETIME_RFC3339_FMT) if date is not None else None for date in dates ] # assign first station as oldest and most recent if index == 0: oldest_station = station most_recent_station = station continue # then compare all remaining stations and replace as necessary if start_date < oldest_station['properties']['START_DATE']: oldest_station = station if most_recent_station['properties']['END_DATE'] is not None and ( end_date is None or end_date > most_recent_station['properties']['END_DATE'] ): most_recent_station = station stations_info = { 'record_begin': strftime_rfc3339( oldest_station['properties']['START_DATE'] ), 'record_end': strftime_rfc3339( most_recent_station['properties']['END_DATE'] ) if most_recent_station['properties']['END_DATE'] else None, 'climate_identifier': most_recent_station['properties'][ 'CLIMATE_IDENTIFIER' ], 'eng_stn_name': most_recent_station['properties']['ENG_STN_NAME'], 'fre_stn_name': most_recent_station['properties']['FRE_STN_NAME'], 'coords': [ most_recent_station['geometry']['coordinates'][0], most_recent_station['geometry']['coordinates'][1], ], 'province_code': most_recent_station['properties'][ 'PROVINCE_CODE' ], } return stations_info