async def _get_most_recent_model_run(model: ModelEnum, data_type: ObjectTypeEnum) -> datetime: """ Get the most recent model run date - if none exists, return None """ # NOTE: This is a nasty, slow, brute force way of doing it! async with get_client() as (client, bucket): async def get_most_recent(result, depth): # use a reducer to iterate through the list of objects, returning the last one. if 'CommonPrefixes' in result: last_object = result['CommonPrefixes'][-1] object_name = last_object['Prefix'] else: return None if depth == 3: return object_name return await get_most_recent( await client.list_objects_v2(Bucket=bucket, Prefix=object_name, Delimiter='/'), depth + 1) format_string = 'kml' if data_type == ObjectTypeEnum.KML else 'json' most_recent = await get_most_recent( await client.list_objects_v2( Bucket=bucket, Prefix=f'c-haines-polygons/{format_string}/{model}/', Delimiter='/'), 0) if most_recent is None: raise NoModelRunFound(f'no model run found for {model}') logger.info('most record model run: %s', most_recent) return extract_model_run_timestamp_from_path(most_recent)
async def save_as_kml_to_s3(json_filename: str, source_projection, prediction_model: ModelEnum, model_run_timestamp: datetime, prediction_timestamp: datetime): """ Given a geojson file, generate KML and store to S3 """ target_kml_path = generate_full_object_store_path( prediction_model, model_run_timestamp, prediction_timestamp, ObjectTypeEnum.KML) async with get_client() as (client, bucket): # let's save some time, and check if the file doesn't already exists. # it's super important we do this, since there are many c-haines cronjobs running in dev, all # pointing to the same s3 bucket. if await object_exists(client, bucket, target_kml_path): logger.info('kml (%s) already exists - skipping', target_kml_path) return # generate the kml file with io.StringIO() as sio: severity_geojson_to_kml(json_filename, source_projection, sio, prediction_model, model_run_timestamp, prediction_timestamp) # smash it into binary sio.seek(0) bio = io.BytesIO(sio.read().encode('utf8')) # go back to start bio.seek(0) # save it to s3 logger.info('uploading %s', target_kml_path) await client.put_object(Bucket=bucket, Key=target_kml_path, Body=bio)
async def save_as_geojson_to_s3(source_json_filename: str, source_projection: str, prediction_model: ModelEnum, model_run_timestamp: datetime, prediction_timestamp: datetime): """ Given a geojson file, ensure it's in the correct projection and then store to S3 """ target_path = generate_full_object_store_path( prediction_model, model_run_timestamp, prediction_timestamp, ObjectTypeEnum.GEOJSON) # let's save some time, and check if the file doesn't already exists. # it's super important we do this, since there are many c-haines cronjobs running in dev, all # pointing to the same s3 bucket. async with get_client() as (client, bucket): if await object_exists(client, bucket, target_path): logger.info('json (%s) already exists - skipping', target_path) return # re-project the geojson file from whatever it was, to WGS84. re_projected_data = re_project_and_classify_geojson(source_json_filename, source_projection) with io.StringIO() as sio: json.dump(re_projected_data, sio) # smash it into binary sio.seek(0) bio = io.BytesIO(sio.read().encode('utf8')) # go back to start bio.seek(0) # smash it into the object store. logger.info('uploading %s', target_path) await client.put_object(Bucket=bucket, Key=target_path, Body=bio)
async def fetch_model_runs(model_run_timestamp: datetime): """ Fetch recent model runs.""" # NOTE: This is a horribly inefficient way of listing model runs - we're making 6 calls just to # list model runs. result = CHainesModelRuns(model_runs=[]) # Get an async S3 client. async with get_client() as (client, bucket): # Create tasks for listing all the model runs. tasks = [] # Iterate for date of interest and day before. If you only look for today, you may have an empty # list until the latest model runs come in, so better to also list data from the day before. for date in [ model_run_timestamp, model_run_timestamp - timedelta(days=1) ]: # We're interested in all the model runs. for model in ['GDPS', 'RDPS', 'HRDPS']: # Construct a prefix to search for in S3 (basically path matching). prefix = f'c-haines-polygons/json/{model}/{date.year}/{date.month}/{date.day}/' logger.info(prefix) # Create the task to go and fetch the listing from S3. tasks.append( asyncio.create_task( client.list_objects_v2(Bucket=bucket, Prefix=prefix))) # Run all the tasks at once. (Basically listing folder contents on S3.) model_run_prediction_results = await asyncio.gather(*tasks) # Iterate through results. for prediction_result in model_run_prediction_results: # S3 data comes back as a dictionary with "Contents" if 'Contents' in prediction_result: model_run_predictions = None prev_model_run_timestamp = None # Iterate through all the contents. for prediction in prediction_result['Contents']: # The path is stored in the "Key" field. We infer the model, model run timestamp and # prediction timestamp from the path. model, model_run_timestamp, prediction_timestamp = extract_model_run_prediction_from_path( prediction['Key']) # Check for new model runs to add to our list. if prev_model_run_timestamp != model_run_timestamp: # New model run? Make it and add it to the list. prev_model_run_timestamp = model_run_timestamp model_run_predictions = CHainesModelRunPredictions( model=WeatherPredictionModel(name=model, abbrev=model), model_run_timestamp=model_run_timestamp, prediction_timestamps=[ prediction_timestamp, ]) result.model_runs.append(model_run_predictions) else: # Already have a model run, just at the prediction model_run_predictions.prediction_timestamps.append( prediction_timestamp) # Sort evertyhign by model run timestamp. result.model_runs.sort(key=lambda model_run: model_run.model_run_timestamp, reverse=True) return result
async def fetch_model_run_kml_streamer( model: ModelEnum, model_run_timestamp: datetime) -> Iterator[str]: """ Yield model run XML. Yielding allows streaming response to start while kml is being constructed. The KML we're making is essentially a list of network links for each prediction. """ # We need to pass the API's url in, so that the KML know where to ask for network links. uri = config.get('BASE_URI') # Starting serving up the kml. yield get_kml_header() # Serve up the "look_at" which tells google earth when and where to take you. yield get_look_at(model, model_run_timestamp) # Serve up model folder and model run folder. yield f"<name>{model} {model_run_timestamp}</name>\n" yield '<Folder>' # Open model run folder. yield f'<name>{model} {model_run_timestamp} model run</name>\n' # Get an async S3 client. async with get_client() as (client, bucket): # Construct model run path - so we can list contents that match that path. model_run_path = generate_object_store_model_run_path( model, model_run_timestamp, ObjectTypeEnum.KML) # List all files in folder (e.g. list all the prediction kml files). predictions = await client.list_objects_v2(Bucket=bucket, Prefix=model_run_path) # File listing is in the "Contents" entry. if 'Contents' in predictions: # Iterate through each entry. for prediction in predictions['Contents']: # Filename is in the "Key" entry. object_name = prediction['Key'] # Infer timestamp from filename. prediction_timestamp = object_name.split('/')[-1].split('.')[0] # Construct params for URL. kml_params = { 'model_run_timestamp': model_run_timestamp, 'prediction_timestamp': prediction_timestamp, 'response_format': 'KML' } # Create url (remembering to escape & for xml) kml_url = urljoin(uri, f'/api/c-haines/{model}/prediction') + \ '?' + urlencode(kml_params).replace('&', '&') yield '<NetworkLink>\n' yield '<visibility>1</visibility>\n' yield f'<name>{prediction_timestamp}</name>\n' yield '<Link>\n' yield f'<href>{kml_url}</href>\n' yield '</Link>\n' yield '</NetworkLink>\n' yield '</Folder>' # Close model run folder. # Close the KML document. yield '</Document>\n' yield '</kml>\n' logger.info('kml complete')
async def main(): """ Entry point for generating C-Haines severity index polygons. """ async with get_client() as (client, bucket): models = ( (ModelEnum.GDPS, ProjectionEnum.LATLON_15X_15), (ModelEnum.RDPS, ProjectionEnum.REGIONAL_PS), (ModelEnum.HRDPS, ProjectionEnum.HIGH_RES_CONTINENTAL), ) for model, projection in models: logger.info('Generating C-Haines Severity Index for %s', model) generator = CHainesSeverityGenerator(model, projection, client, bucket) await generator.generate()
async def get_c_haines_model_run_prediction( model: ModelEnum, model_run_timestamp: datetime, prediction_timestamp: datetime, response_format: ObjectTypeEnum = ObjectTypeEnum.GEOJSON): """ Return geojson/kml polygons for c-haines """ logger.info( '/c-haines/%s/prediction?model_run_timestamp=%s&prediction_timestamp=%s&response_format=%s', model, model_run_timestamp, prediction_timestamp, response_format) async with get_client() as (client, bucket): key = generate_full_object_store_path(model, model_run_timestamp, prediction_timestamp, response_format) response = await client.generate_presigned_url('get_object', Params={ 'Bucket': bucket, 'Key': key }) return RedirectResponse(url=response)