Example #1
0
async def _get_most_recent_model_run(model: ModelEnum,
                                     data_type: ObjectTypeEnum) -> datetime:
    """ Get the most recent model run date - if none exists, return None """
    # NOTE: This is a nasty, slow, brute force way of doing it!
    async with get_client() as (client, bucket):

        async def get_most_recent(result, depth):
            # use a reducer to iterate through the list of objects, returning the last one.
            if 'CommonPrefixes' in result:
                last_object = result['CommonPrefixes'][-1]
                object_name = last_object['Prefix']
            else:
                return None
            if depth == 3:
                return object_name
            return await get_most_recent(
                await client.list_objects_v2(Bucket=bucket,
                                             Prefix=object_name,
                                             Delimiter='/'), depth + 1)

        format_string = 'kml' if data_type == ObjectTypeEnum.KML else 'json'
        most_recent = await get_most_recent(
            await client.list_objects_v2(
                Bucket=bucket,
                Prefix=f'c-haines-polygons/{format_string}/{model}/',
                Delimiter='/'), 0)

        if most_recent is None:
            raise NoModelRunFound(f'no model run found for {model}')

        logger.info('most record model run: %s', most_recent)

        return extract_model_run_timestamp_from_path(most_recent)
Example #2
0
async def save_as_kml_to_s3(json_filename: str,
                            source_projection,
                            prediction_model: ModelEnum,
                            model_run_timestamp: datetime,
                            prediction_timestamp: datetime):
    """ Given a geojson file, generate KML and store to S3 """
    target_kml_path = generate_full_object_store_path(
        prediction_model, model_run_timestamp, prediction_timestamp, ObjectTypeEnum.KML)
    async with get_client() as (client, bucket):
        # let's save some time, and check if the file doesn't already exists.
        # it's super important we do this, since there are many c-haines cronjobs running in dev, all
        # pointing to the same s3 bucket.
        if await object_exists(client, bucket, target_kml_path):
            logger.info('kml (%s) already exists - skipping', target_kml_path)
            return

        # generate the kml file
        with io.StringIO() as sio:
            severity_geojson_to_kml(json_filename, source_projection, sio,
                                    prediction_model, model_run_timestamp, prediction_timestamp)
            # smash it into binary
            sio.seek(0)
            bio = io.BytesIO(sio.read().encode('utf8'))
            # go back to start
            bio.seek(0)
            # save it to s3
            logger.info('uploading %s', target_kml_path)
            await client.put_object(Bucket=bucket, Key=target_kml_path, Body=bio)
Example #3
0
async def save_as_geojson_to_s3(source_json_filename: str,
                                source_projection: str,
                                prediction_model: ModelEnum,
                                model_run_timestamp: datetime,
                                prediction_timestamp: datetime):
    """ Given a geojson file, ensure it's in the correct projection and then store to S3 """
    target_path = generate_full_object_store_path(
        prediction_model, model_run_timestamp, prediction_timestamp, ObjectTypeEnum.GEOJSON)
    # let's save some time, and check if the file doesn't already exists.
    # it's super important we do this, since there are many c-haines cronjobs running in dev, all
    # pointing to the same s3 bucket.
    async with get_client() as (client, bucket):
        if await object_exists(client, bucket, target_path):
            logger.info('json (%s) already exists - skipping', target_path)
            return

        # re-project the geojson file from whatever it was, to WGS84.
        re_projected_data = re_project_and_classify_geojson(source_json_filename, source_projection)

        with io.StringIO() as sio:
            json.dump(re_projected_data, sio)
            # smash it into binary
            sio.seek(0)
            bio = io.BytesIO(sio.read().encode('utf8'))
            # go back to start
            bio.seek(0)
            # smash it into the object store.
            logger.info('uploading %s', target_path)
            await client.put_object(Bucket=bucket, Key=target_path, Body=bio)
Example #4
0
async def fetch_model_runs(model_run_timestamp: datetime):
    """ Fetch recent model runs."""
    # NOTE: This is a horribly inefficient way of listing model runs - we're making 6 calls just to
    # list model runs.
    result = CHainesModelRuns(model_runs=[])
    # Get an async S3 client.
    async with get_client() as (client, bucket):
        # Create tasks for listing all the model runs.
        tasks = []
        # Iterate for date of interest and day before. If you only look for today, you may have an empty
        # list until the latest model runs come in, so better to also list data from the day before.
        for date in [
                model_run_timestamp, model_run_timestamp - timedelta(days=1)
        ]:
            # We're interested in all the model runs.
            for model in ['GDPS', 'RDPS', 'HRDPS']:
                # Construct a prefix to search for in S3 (basically path matching).
                prefix = f'c-haines-polygons/json/{model}/{date.year}/{date.month}/{date.day}/'
                logger.info(prefix)
                # Create the task to go and fetch the listing from S3.
                tasks.append(
                    asyncio.create_task(
                        client.list_objects_v2(Bucket=bucket, Prefix=prefix)))

        # Run all the tasks at once. (Basically listing folder contents on S3.)
        model_run_prediction_results = await asyncio.gather(*tasks)
        # Iterate through results.
        for prediction_result in model_run_prediction_results:
            # S3 data comes back as a dictionary with "Contents"
            if 'Contents' in prediction_result:
                model_run_predictions = None
                prev_model_run_timestamp = None
                # Iterate through all the contents.
                for prediction in prediction_result['Contents']:
                    # The path is stored in the "Key" field. We infer the model, model run timestamp and
                    # prediction timestamp from the path.
                    model, model_run_timestamp, prediction_timestamp = extract_model_run_prediction_from_path(
                        prediction['Key'])
                    # Check for new model runs to add to our list.
                    if prev_model_run_timestamp != model_run_timestamp:
                        # New model run? Make it and add it to the list.
                        prev_model_run_timestamp = model_run_timestamp
                        model_run_predictions = CHainesModelRunPredictions(
                            model=WeatherPredictionModel(name=model,
                                                         abbrev=model),
                            model_run_timestamp=model_run_timestamp,
                            prediction_timestamps=[
                                prediction_timestamp,
                            ])
                        result.model_runs.append(model_run_predictions)
                    else:
                        # Already have a model run, just at the prediction
                        model_run_predictions.prediction_timestamps.append(
                            prediction_timestamp)

    # Sort evertyhign by model run timestamp.
    result.model_runs.sort(key=lambda model_run: model_run.model_run_timestamp,
                           reverse=True)
    return result
Example #5
0
async def fetch_model_run_kml_streamer(
        model: ModelEnum, model_run_timestamp: datetime) -> Iterator[str]:
    """ Yield model run XML.
    Yielding allows streaming response to start while kml is being constructed.
    The KML we're making is essentially a list of network links for each prediction.
    """
    # We need to pass the API's url in, so that the KML know where to ask for network links.
    uri = config.get('BASE_URI')
    # Starting serving up the kml.
    yield get_kml_header()
    # Serve up the "look_at" which tells google earth when and where to take you.
    yield get_look_at(model, model_run_timestamp)
    # Serve up model folder and model run folder.
    yield f"<name>{model} {model_run_timestamp}</name>\n"
    yield '<Folder>'  # Open model run folder.
    yield f'<name>{model} {model_run_timestamp} model run</name>\n'

    # Get an async S3 client.
    async with get_client() as (client, bucket):
        # Construct model run path - so we can list contents that match that path.
        model_run_path = generate_object_store_model_run_path(
            model, model_run_timestamp, ObjectTypeEnum.KML)
        # List all files in folder (e.g. list all the prediction kml files).
        predictions = await client.list_objects_v2(Bucket=bucket,
                                                   Prefix=model_run_path)
        # File listing is in the "Contents" entry.
        if 'Contents' in predictions:
            # Iterate through each entry.
            for prediction in predictions['Contents']:
                # Filename is in the "Key" entry.
                object_name = prediction['Key']
                # Infer timestamp from filename.
                prediction_timestamp = object_name.split('/')[-1].split('.')[0]
                # Construct params for URL.
                kml_params = {
                    'model_run_timestamp': model_run_timestamp,
                    'prediction_timestamp': prediction_timestamp,
                    'response_format': 'KML'
                }
                # Create url (remembering to escape & for xml)
                kml_url = urljoin(uri, f'/api/c-haines/{model}/prediction') + \
                    '?' + urlencode(kml_params).replace('&', '&amp;')
                yield '<NetworkLink>\n'
                yield '<visibility>1</visibility>\n'
                yield f'<name>{prediction_timestamp}</name>\n'
                yield '<Link>\n'
                yield f'<href>{kml_url}</href>\n'
                yield '</Link>\n'
                yield '</NetworkLink>\n'

        yield '</Folder>'  # Close model run folder.
        # Close the KML document.
        yield '</Document>\n'
        yield '</kml>\n'
        logger.info('kml complete')
Example #6
0
async def main():
    """ Entry point for generating C-Haines severity index polygons. """
    async with get_client() as (client, bucket):
        models = (
            (ModelEnum.GDPS, ProjectionEnum.LATLON_15X_15),
            (ModelEnum.RDPS, ProjectionEnum.REGIONAL_PS),
            (ModelEnum.HRDPS, ProjectionEnum.HIGH_RES_CONTINENTAL),
        )
        for model, projection in models:
            logger.info('Generating C-Haines Severity Index for %s', model)
            generator = CHainesSeverityGenerator(model, projection, client,
                                                 bucket)
            await generator.generate()
Example #7
0
async def get_c_haines_model_run_prediction(
        model: ModelEnum,
        model_run_timestamp: datetime,
        prediction_timestamp: datetime,
        response_format: ObjectTypeEnum = ObjectTypeEnum.GEOJSON):
    """ Return geojson/kml polygons for c-haines """
    logger.info(
        '/c-haines/%s/prediction?model_run_timestamp=%s&prediction_timestamp=%s&response_format=%s',
        model, model_run_timestamp, prediction_timestamp, response_format)

    async with get_client() as (client, bucket):
        key = generate_full_object_store_path(model, model_run_timestamp,
                                              prediction_timestamp,
                                              response_format)
        response = await client.generate_presigned_url('get_object',
                                                       Params={
                                                           'Bucket': bucket,
                                                           'Key': key
                                                       })
        return RedirectResponse(url=response)