def generate_warnings(self):
        """
        Generates and yields a series of marine weather warnings
        for a given marine weather area. Warnings are returned
        as Elasticsearch bulk API upsert actions, with a single
        document for the marine weather region in GeoJSON to match the
        Elasticsearch index mappings.
        :returns: Generator of Elasticsearch actions to upsert the marine
                  weather warnings.
        """
        warnings = self.root.findall('warnings/')

        feature = {'type': 'Feature', 'geometry': {}, 'properties': {}}

        feature['geometry'] = self.area['geometry']

        feature['properties']['area_{}'.format(
            self.language)] = self.area['name']
        feature['properties']['region_{}'.format(
            self.language)] = self.area['region']
        feature['properties']['sub_region_{}'.format(
            self.language)] = self.area['subRegion']
        feature['properties']['warnings_{}'.format(self.language)] = []

        if len(warnings) > 0:
            for elem in warnings:
                datetimes = self.create_datetime_dict(
                    elem.findall('event/'
                                 'dateTime'))
                location = {
                    'location_{}'.format(self.language):
                    elem.attrib['name'],
                    'issued_datetime_utc_{}'.format(self.language):
                    strftime_rfc3339(datetimes['utc']),
                    'issued_datetime_local_{}'.format(self.language):
                    strftime_rfc3339(datetimes['local']),
                    'event_type_{}'.format(self.language):
                    elem.find('event').attrib['type'],
                    'event_category_{}'.format(self.language):
                    elem.find('event').attrib['category'],
                    'event_name_{}'.format(self.language):
                    elem.find('event').attrib['name'],
                    'event_status_{}'.format(self.language):
                    elem.find('event').attrib['status'],
                }
                feature['properties']['warnings_{}'.format(
                    self.language)].append(location)

        self.items.append(feature)

        action = {
            '_id': self.filepath.stem.split('_')[0],
            '_index': 'marine_weather_warnings',
            '_op_type': 'update',
            'doc': feature,
            'doc_as_upsert': True,
        }

        yield action
    def generate_geojson_features(self):
        """
        Generates and yields a series of storm forecasts,
        one for each feature in <self.filepath>. Observations are returned as
        Elasticsearch bulk API upsert actions, with documents in GeoJSON to
        match the Elasticsearch index mappings.
        :returns: Generator of Elasticsearch actions to upsert the storm
                  forecasts
        """
        driver = ogr.GetDriverByName('ESRI Shapefile')
        filepath = str(self.filepath.resolve())
        data = driver.Open(filepath, 0)
        lyr = data.GetLayer(0)
        file_datetime_str = strftime_rfc3339(self.date_)

        for feature in lyr:
            feature_json = feature.ExportToJson(as_object=True)
            feature_json['properties']['active'] = True
            feature_json['properties']['filename'] = self.filepath.stem
            feature_json['properties']['filedate'] = file_datetime_str  # noqa

            # TODO: Remove once upstream data is patched
            # clean rad consecutive coordinates in geometry (temporary fix)
            if self.storm_variable == 'rad':
                feature_json['geometry'][
                    'coordinates'] = self.clean_consecutive_coordinates(
                        feature_json['geometry']['coordinates'])

            # format pts ADVDATE
            if self.storm_variable == 'pts':
                feature_json['properties']['ADVDATE'] = \
                    strftime_rfc3339(
                        datetime.strptime(
                            feature_json['properties']['ADVDATE'],
                            '%y%m%d/%H%M'
                        )
                    )

            self.items.append(feature_json)

            action = {
                '_id':
                '{}-{}-{}-{}-{}'.format(self.storm_name, self.storm_variable,
                                        file_datetime_str, self.fh,
                                        feature_json['id']),
                '_index':
                INDEX_NAME.format(self.storm_variable),
                '_op_type':
                'update',
                'doc':
                feature_json,
                'doc_as_upsert':
                True
            }

            yield action
    def generate_extended_forecasts(self):
        """
        Generates and yields a series of marine weather extended forecasts
        for a given marine weather area. Each extended forecast is returned
        as Elasticsearch bulk API upsert actions, with documents in GeoJSON to
        match the Elasticsearch index mappings.
        :returns: Generator of Elasticsearch actions to upsert the marine
                  weather extended forecast.
        """
        extended_forecasts = self.root.findall('extendedForecast/')
        feature = {'type': 'Feature', 'geometry': {}, 'properties': {}}

        feature['geometry'] = self.area['geometry']
        feature['properties']['area_{}'.format(self.language)] = self.area[
            'name'
        ]
        feature['properties']['region_{}'.format(self.language)] = self.area[
            'region'
        ]
        feature['properties'][
            'sub_region_{}'.format(self.language)
        ] = self.area['subRegion']
        feature['properties'][
            'extended_forecasts_{}'.format(self.language)
        ] = []

        if len(extended_forecasts) > 0:
            datetimes = self.create_datetime_dict(
                [
                    element
                    for element in extended_forecasts
                    if element.tag == 'dateTime'
                ]
            )
            feature['properties']['issued_datetime_utc'] = strftime_rfc3339(
                datetimes['utc']
            )
            feature['properties']['issued_datetime_local'] = strftime_rfc3339(
                datetimes['local']
            )

            locations = [
                element
                for element in extended_forecasts
                if element.tag == 'location'
            ]
            for location in locations:
                location = {
                    'location_{}'.format(self.language): location.attrib[
                        'name'
                    ]
                    if 'name' in location.attrib
                    else self.area['name'],
                    'forecast_periods_{}'.format(self.language): [
                        {
                            'forecast_period_{}'.format(
                                self.language
                            ): forecast_period.attrib['name'],
                            'forecast_{}'.format(
                                self.language
                            ): forecast_period.text,
                        }
                        for forecast_period in location.findall(
                            'weatherCondition/'
                        )
                        if location.findall('weatherCondition/') is not None
                    ],
                    'status_statement_{}'.format(self.language): location.find(
                        'statusStatement'
                    ).text
                    if location.find('statusStatement') is not None
                    else None,
                }
                feature['properties'][
                    'extended_forecasts_{}'.format(self.language)
                ].append(location)

        self.items.append(feature)

        action = {
            '_id': self.filepath.stem.split('_')[0],
            '_index': 'marine_weather_extended-forecasts',
            '_op_type': 'update',
            'doc': feature,
            'doc_as_upsert': True,
        }

        yield action
Beispiel #4
0
    def generate_daily_snow_extremes(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute(
                (
                    "SELECT t1.*, t2.* "
                    "FROM ARKEON2DWH.RECORD_VIRTUAL_SNOWFALL t1 "
                    "JOIN ARKEON2DWH.EXTREME_VIRTUAL_SNOWFALL t2 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t2.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t2.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t2.LOCAL_DAY "
                )
            )
        except Exception as err:
            LOGGER.error(
                'Could not fetch records from oracle due to: {}.'.format(
                    str(err)
                )
            )

        stations_dict = {}

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))

            for key in insert_dict:
                if key in ['LAST_UPDATED']:
                    insert_dict[key] = (
                        strftime_rfc3339(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            virtual_climate_id = insert_dict['VIRTUAL_CLIMATE_ID']
            es_id = '{}-{}-{}'.format(
                insert_dict['VIRTUAL_CLIMATE_ID'],
                insert_dict["LOCAL_MONTH"],
                insert_dict["LOCAL_DAY"],
            )

            # check if we have station IDs record begin and end if not retrieve
            if virtual_climate_id not in stations_dict:
                stations_dict[virtual_climate_id] = self.get_stations_info(
                    'DAILY TOTAL SNOWFALL', virtual_climate_id
                )

            insert_dict['RECORD_BEGIN'] = stations_dict[virtual_climate_id][
                'record_begin'
            ]
            insert_dict['RECORD_END'] = stations_dict[virtual_climate_id][
                'record_end'
            ]

            insert_dict['CLIMATE_IDENTIFIER'] = stations_dict[
                virtual_climate_id
            ]['climate_identifier']
            insert_dict['ENG_STN_NAME'] = stations_dict[virtual_climate_id][
                'eng_stn_name'
            ]
            insert_dict['FRE_STN_NAME'] = stations_dict[virtual_climate_id][
                'fre_stn_name'
            ]

            insert_dict['PROVINCE_CODE'] = stations_dict[virtual_climate_id][
                'province_code'
            ]

            # cleanup unwanted fields retained from SQL join
            fields_to_delete = [
                'LOCAL_TIME',
                'VIRTUAL_MEAS_DISPLAY_CODE',
                'ENG_STN_NAME',
                'FRE_STN_NAME',
                'CLIMATE_IDENTIFIER',
                'LAST_UPDATED',
            ]
            for field in fields_to_delete:
                insert_dict.pop(field)

            # set properties.IDENTIFIER
            insert_dict['IDENTIFIER'] = es_id

            wrapper = {
                'id': es_id,
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {
                    'type': 'Point',
                    'coordinates': stations_dict[virtual_climate_id]['coords'],
                },
            }

            action = {
                '_id': es_id,
                '_index': 'ltce_snow_extremes',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }

            yield action
Beispiel #5
0
    def generate_daily_temp_extremes(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute(
                (
                    "SELECT t1.*, t2.*, t3.*, t4.*, t5.*, t6.*, t7.*, t8.* "
                    "FROM ARKEON2DWH.RECORD_HIGH_VIRTUAL_MAX_TEMP t1 "
                    "JOIN ARKEON2DWH.RECORD_LOW_VIRTUAL_MAX_TEMP t2 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t2.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t2.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t2.LOCAL_DAY "
                    "JOIN ARKEON2DWH.RECORD_LOW_VIRTUAL_MIN_TEMP t3 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t3.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t3.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t3.LOCAL_DAY "
                    "JOIN ARKEON2DWH.RECORD_HIGH_VIRTUAL_MIN_TEMP t4 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t4.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t4.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t4.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_HIGH_VIRTUAL_MAX_TEMP t5 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t5.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t5.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t5.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_LOW_VIRTUAL_MAX_TEMP t6 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t6.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t6.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t6.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_HIGH_VIRTUAL_MIN_TEMP t7 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t7.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t7.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t7.LOCAL_DAY "
                    "JOIN ARKEON2DWH.EXTREME_LOW_VIRTUAL_MIN_TEMP t8 "
                    "ON t1.VIRTUAL_CLIMATE_ID = t8.VIRTUAL_CLIMATE_ID "
                    "AND t1.LOCAL_MONTH = t8.LOCAL_MONTH "
                    "AND t1.LOCAL_DAY = t8.LOCAL_DAY "
                )
            )
        except Exception as err:
            LOGGER.error(
                'Could not fetch records from oracle due to: {}.'.format(
                    str(err)
                )
            )

        # dictionnary to store stations information once retrieved
        stations_dict = {}
        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))

            for key in insert_dict:
                if key in ['LAST_UPDATED']:
                    insert_dict[key] = (
                        strftime_rfc3339(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            virtual_climate_id = insert_dict['VIRTUAL_CLIMATE_ID']
            es_id = '{}-{}-{}'.format(
                insert_dict['VIRTUAL_CLIMATE_ID'],
                insert_dict["LOCAL_MONTH"],
                insert_dict["LOCAL_DAY"],
            )

            # check if we have station IDs record begin and end. If not
            # retrieve the information and store in stations_dict
            if virtual_climate_id not in stations_dict:
                stations_dict[virtual_climate_id] = {}
                stations_dict[virtual_climate_id][
                    'MIN'
                ] = self.get_stations_info(
                    'DAILY MINIMUM TEMPERATURE', virtual_climate_id
                )
                stations_dict[virtual_climate_id][
                    'MAX'
                ] = self.get_stations_info(
                    'DAILY MAXIMUM TEMPERATURE', virtual_climate_id
                )

            # check if TEMEPERATURE MIN/MAX for most recent threaded station
            # have same climate identifier value
            min_climate_identifier = stations_dict[virtual_climate_id]['MIN'][
                'climate_identifier'
            ]
            max_climate_identifier = stations_dict[virtual_climate_id]['MAX'][
                'climate_identifier'
            ]

            if min_climate_identifier == max_climate_identifier:
                insert_dict['CLIMATE_IDENTIFIER'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['climate_identifier']
                insert_dict['ENG_STN_NAME'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['eng_stn_name']
                insert_dict['FRE_STN_NAME'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['fre_stn_name']
                insert_dict['PROVINCE_CODE'] = stations_dict[
                    virtual_climate_id
                ]['MAX']['province_code']

            else:
                LOGGER.error(
                    f'Currently threaded station climate identifier value '
                    f'does not match between DAILY MINIMUM TEMPERATURE'
                    f'({min_climate_identifier}) and DAILY MAXIMUM '
                    f'TEMPERATURE({max_climate_identifier}) station threads '
                    f'for virtual climate ID {virtual_climate_id}.'
                )
                continue

            # set new fields
            for level in ['MIN', 'MAX']:
                # set new insert_dict keys
                insert_dict[
                    '{}_TEMP_RECORD_BEGIN'.format(level)
                ] = stations_dict[virtual_climate_id][level]['record_begin']
                insert_dict[
                    '{}_TEMP_RECORD_END'.format(level)
                ] = stations_dict[virtual_climate_id][level]['record_end']

            # cleanup unwanted fields retained from SQL join
            fields_to_delete = [
                'LOCAL_TIME',
                'VIRTUAL_MEAS_DISPLAY_CODE',
                'ENG_STN_NAME',
                'FRE_STN_NAME',
                'CLIMATE_IDENTIFIER',
            ]
            for field in fields_to_delete:
                insert_dict.pop(field)

            # set properties.IDENTIFIER
            insert_dict['IDENTIFIER'] = es_id

            wrapper = {
                'id': es_id,
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {
                    'type': 'Point',
                    'coordinates': stations_dict[virtual_climate_id]['MAX'][
                        'coords'
                    ],
                },
            }

            action = {
                '_id': es_id,
                '_index': 'ltce_temp_extremes',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }

            yield action
Beispiel #6
0
    def generate_stations(self):
        """
        Queries stations data from the db, and reformats
        data so it can be inserted into Elasticsearch.

        Returns a generator of dictionaries that represent upsert actions
        into Elasticsearch's bulk API.

        :param cur: oracle cursor to perform queries against.
        :returns: generator of bulk API upsert actions.
        """

        try:
            self.cur.execute(
                (
                    "SELECT ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.*,"
                    "ARKEON2DWH.STATION_INFORMATION.ENG_STN_NAME,"
                    "ARKEON2DWH.STATION_INFORMATION.FRE_STN_NAME,"
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.LAT,"
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.LON,"
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.PROVINCECODE "
                    "FROM ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW "
                    "LEFT JOIN ARKEON2DWH.STATION_INFORMATION "
                    "ON ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.STN_ID = "
                    "ARKEON2DWH.STATION_INFORMATION.STN_ID "
                    "LEFT JOIN ARKEON2DWH.WXO_CITY_INFORMATION_MVW "
                    "ON ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.WXO_CITY_CODE = "
                    "ARKEON2DWH.WXO_CITY_INFORMATION_MVW.CITYCODE "
                    "WHERE "
                    "ARKEON2DWH.VIRTUAL_STATION_INFO_F_MVW.ELEMENT_NAME_E IN "
                    "('DAILY MINIMUM TEMPERATURE', 'DAILY MAXIMUM TEMPERATURE',"  # noqa
                    "'DAILY TOTAL PRECIPITATION', 'DAILY TOTAL SNOWFALL')"
                )
            )
        except Exception as err:
            LOGGER.error(
                'Could not fetch records from oracle due to: {}.'.format(
                    str(err)
                )
            )

        for row in self.cur:
            insert_dict = dict(zip([x[0] for x in self.cur.description], row))
            for key in insert_dict:
                if key in ['START_DATE', 'END_DATE']:
                    insert_dict[key] = (
                        strftime_rfc3339(insert_dict[key])
                        if insert_dict[key] is not None
                        else insert_dict[key]
                    )

            es_id = slugify(
                '{}-{}-{}-{}-{}'.format(
                    insert_dict['VIRTUAL_CLIMATE_ID'],
                    insert_dict["ELEMENT_NAME_E"],
                    insert_dict["CLIMATE_IDENTIFIER"],
                    insert_dict["START_DATE"],
                    insert_dict["END_DATE"],
                )
            )

            coords = [
                float(insert_dict['LON']),
                float(insert_dict['LAT']),
            ]

            # rename PROVINCECODE field to PROVINCE_CODE
            insert_dict['PROVINCE_CODE'] = insert_dict['PROVINCECODE']

            # cleanup unwanted fields retained from SQL join
            fields_to_delete = [
                'STN_ID',
                'ENG_PROV_NAME',
                'FRE_PROV_NAME',
                'REGION_CODE',
                'CRITERIA',
                'NOTES',
                'VIRTUAL_STN_INFO_UPDATE_ID',
                'CURRENT_FLAG',
                'LON',
                'LAT',
                'PROVINCECODE',
            ]
            for field in fields_to_delete:
                insert_dict.pop(field)

            # set properties.IDENTIFIER
            insert_dict['IDENTIFIER'] = es_id

            wrapper = {
                'id': es_id,
                'type': 'Feature',
                'properties': insert_dict,
                'geometry': {'type': 'Point', 'coordinates': coords},
            }

            action = {
                '_id': es_id,
                '_index': 'ltce_stations',
                '_op_type': 'update',
                'doc': wrapper,
                'doc_as_upsert': True,
            }

            yield action
Beispiel #7
0
    def get_stations_info(self, element_name, station_id):
        """
        Queries LTCE station data for a given element name (DAILY MINIMUM
        TEMPERATURE, DAILY MAXIMUM TEMPERATURE, etc.), and virtual station ID.
        Returns the ealiest start date of all returned stations and the end
        date climate identifier, and coordinates of the most recently threaded
        station.
        :param element_name: `str` of element name
        :param station_id: `str` of virtual climate station id
        :return: `dict` of stations information
        """
        query = {
            "query": {
                "bool": {
                    "filter": {
                        "bool": {
                            "must": [
                                {
                                    "term": {
                                        "properties.VIRTUAL_CLIMATE_ID.raw": station_id  # noqa
                                    }
                                },
                                {
                                    "term": {
                                        "properties.ELEMENT_NAME_E.raw": element_name  # noqa
                                    }
                                },
                            ]
                        }
                    }
                }
            }
        }

        results = self.conn.Elasticsearch.search(
            body=query,
            index='ltce_stations',
            _source=[
                'properties.CLIMATE_IDENTIFIER',
                'properties.ENG_STN_NAME',
                'properties.FRE_STN_NAME',
                'properties.START_DATE',
                'properties.END_DATE',
                'properties.PROVINCE_CODE',
                'geometry.coordinates',
            ],
        )

        results = [result['_source'] for result in results['hits']['hits']]

        oldest_station = None
        most_recent_station = None

        for index, station in enumerate(results):
            # retrieve station start and end date
            dates = (
                station['properties']['START_DATE'],
                station['properties']['END_DATE'],
            )

            # convert station dates to datetime objects
            (
                station['properties']['START_DATE'],
                station['properties']['END_DATE'],
            ) = (start_date, end_date) = [
                datetime.strptime(date, DATETIME_RFC3339_FMT)
                if date is not None
                else None
                for date in dates
            ]

            # assign first station as oldest and most recent
            if index == 0:
                oldest_station = station
                most_recent_station = station
                continue

            # then compare all remaining stations and replace as necessary
            if start_date < oldest_station['properties']['START_DATE']:
                oldest_station = station
            if most_recent_station['properties']['END_DATE'] is not None and (
                end_date is None
                or end_date > most_recent_station['properties']['END_DATE']
            ):
                most_recent_station = station

        stations_info = {
            'record_begin': strftime_rfc3339(
                oldest_station['properties']['START_DATE']
            ),
            'record_end': strftime_rfc3339(
                most_recent_station['properties']['END_DATE']
            )
            if most_recent_station['properties']['END_DATE']
            else None,
            'climate_identifier': most_recent_station['properties'][
                'CLIMATE_IDENTIFIER'
            ],
            'eng_stn_name': most_recent_station['properties']['ENG_STN_NAME'],
            'fre_stn_name': most_recent_station['properties']['FRE_STN_NAME'],
            'coords': [
                most_recent_station['geometry']['coordinates'][0],
                most_recent_station['geometry']['coordinates'][1],
            ],
            'province_code': most_recent_station['properties'][
                'PROVINCE_CODE'
            ],
        }

        return stations_info