def create_scene(owner, prefix, landsat_id, config, datasource): logger.info('Creating scene for landsat id {}'.format(landsat_id)) gcs_prefix = io.gcs_path_for_landsat_id(landsat_id) metadata_resp = requests.get(io.make_path_for_mtl(gcs_prefix, landsat_id)) if metadata_resp.status_code == 404: logger.error('Landsat scene %s is not available yet in GCS', landsat_id) raise Exception('Could not find landsat scene %s', landsat_id) filter_metadata = extract_metadata(metadata_resp.content) (filename, cog_fname) = process_to_cog(prefix, gcs_prefix, landsat_id, config) s3_location = upload_file(owner, filename, cog_fname) logger.info('Creating image') ingest_location = 's3://{}/{}'.format(data_bucket, urllib.quote(s3_location)) scene = Scene( 'PRIVATE', [], datasource, {}, landsat_id, 'SUCCESS', 'SUCCESS', 'INGESTED', [io.make_path_for_mtl(gcs_prefix, landsat_id)], ingestLocation=ingest_location, cloudCover=filter_metadata['cloud_cover'], acquisitionDate=filter_metadata['acquisition_date'], sceneType='COG', owner=owner) image = create_geotiff_image( filename, ingest_location, filename=cog_fname, owner=owner, scene=scene.id, band_create_function=lambda x: config.bands.values()) scene.images = [image] return scene
def create_scene(owner, prefix, landsat_id, config, datasource): logger.info('Creating scene for landsat id {}'.format(landsat_id)) gcs_prefix = io.gcs_path_for_landsat_id(landsat_id) metadata_resp = requests.get(io.make_path_for_mtl(gcs_prefix, landsat_id)) if metadata_resp.status_code == 404: logger.error('Landsat scene %s is not available yet in GCS', landsat_id) raise Exception('Could not find landsat scene %s', landsat_id) filter_metadata = extract_metadata(metadata_resp.content) (filename, cog_fname) = process_to_cog(prefix, gcs_prefix, landsat_id, config) s3_location = upload_file(owner, filename, cog_fname) logger.info('Creating image') ingest_location = 's3://{}/{}'.format(data_bucket, urllib.quote(s3_location)) scene = Scene('PRIVATE', [], datasource, {}, landsat_id, 'SUCCESS', 'SUCCESS', 'INGESTED', [io.make_path_for_mtl(gcs_prefix, landsat_id)], ingestLocation=ingest_location, cloudCover=filter_metadata['cloud_cover'], acquisitionDate=filter_metadata['acquisition_date'], sceneType='COG', owner=owner) image = create_geotiff_image( filename, ingest_location, filename=cog_fname, owner=owner, scene=scene.id, band_create_function=lambda x: config.bands.values()) scene.images = [image] return scene
def create_scene(hdf_url, temp_directory, user_id, datasource): """Create a MODIS scene Args: hdf_url (str): URL for MODIS scene to download temp_directory (str): directory to use as scratch space when creating scene user_id (str): ID of owner for new MODIS scene datasource (str): ID of datasource for new MODIS scene """ config = modis_configs[datasource] granule_parts = os.path.basename(hdf_url).split('.') acquisition_datetime = datetime.strptime(granule_parts[1][1:], '%Y%j') name = '.'.join(granule_parts[:-1]) id = str(uuid.uuid4()) scene = Scene(0, Visibility.PRIVATE, [], datasource, {}, name, JobStatus.SUCCESS, JobStatus.SUCCESS, IngestStatus.INGESTED, [], owner=user_id, id=id, acquisitionDate=acquisition_datetime.isoformat() + 'Z', cloudCover=0) hdf_directory = os.path.join(temp_directory, 'hdf') os.mkdir(hdf_directory) hdf_filepath = download_hdf(hdf_url, hdf_directory) tiff_directory = os.path.join(temp_directory, 'tiffs') os.mkdir(tiff_directory) tifs = create_geotiffs(hdf_filepath, tiff_directory) s3_uris = upload_tifs(tifs, user_id, scene.id) images = [] get_band_func = partial(get_image_band, modis_config=config) for local_path, remote_path in zip(tifs, s3_uris): image = create_geotiff_image(local_path, remote_path, scene=scene.id, owner=user_id, band_create_function=get_band_func) images.append(image) scene.images = images scene.ingestLocation = s3_uris[0] scene.sceneType = 'COG' return scene
def create_landsat8_scenes(csv_row): """Returns scenes that can be created via API given a path to tiles for Landsat 8 Args: csv_row (dict): value returned by a call to DictReader.next on the tiles csv Returns: List[Scene] """ scene_id = str(uuid.uuid4()) landsat_id = csv_row.pop('sceneID') tileFootprint, dataFootprint = create_footprints(csv_row) landsat_path = get_landsat_path(landsat_id) if not s3_obj_exists(aws_landsat_base + landsat_path + 'index.html'): logger.warn( 'AWS and USGS are not always in sync. Try again in several hours.\n' 'If you believe this message is in error, check %s manually.', aws_landsat_base + landsat_path) return [] timestamp = csv_row.pop('acquisitionDate') + 'T00:00:00.000Z' cloud_cover = float(csv_row.pop('cloudCoverFull')) sun_elevation = float(csv_row.pop('sunElevation')) sun_azimuth = float(csv_row.pop('sunAzimuth')) bands_15m = create_bands('15m') bands_30m = create_bands('30m') tags = ['Landsat 8', 'GeoTIFF'] scene_metadata = filter_empty_keys(csv_row) # Landsat 8 provides a panchromatic band at 15m resolution and all # other bands at 30m resolution scene = Scene( organization, 0, Visibility.PUBLIC, tags, datasource_id, scene_metadata, 'L8 {}'.format(landsat_path), # name JobStatus.SUCCESS, JobStatus.SUCCESS, IngestStatus.NOTINGESTED, id=scene_id, acquisitionDate=timestamp, cloudCover=cloud_cover, sunAzimuth=sun_azimuth, sunElevation=sun_elevation, tileFootprint=tileFootprint, dataFootprint=dataFootprint, metadataFiles=[ aws_landsat_base + landsat_path + landsat_id + '_MTL.txt' ], thumbnails=create_thumbnails(scene_id, landsat_id), images=(create_images(scene_id, landsat_id, 15, bands_15m) + create_images(scene_id, landsat_id, 30, bands_30m))) return [scene]
def create_ingest_definition_op(*args, **kwargs): """Create ingest definition and upload to S3""" logger.info('Beginning to create ingest definition...') xcom_client = kwargs['task_instance'] conf = kwargs['dag_run'].conf scene_dict = conf.get('scene') xcom_client.xcom_push(key='ingest_scene_id', value=scene_dict['id']) scene = Scene.from_id(scene_dict['id']) if scene.ingestStatus != IngestStatus.TOBEINGESTED: raise Exception('Scene is no longer waiting to be ingested, error error') if scene.datasource != datasource_id: raise Exception('Unable to import scene %s, only able to import Landsat 8 scenes', scene.id) scene.ingestStatus = IngestStatus.INGESTING logger.info('Updating scene status to ingesting') scene.update() logger.info('Successfully updated scene status') logger.info('Creating ingest definition') ingest_definition = create_landsat8_ingest(scene) ingest_definition.put_in_s3() logger.info('Successfully created and pushed ingest definition') # Store values for later tasks xcom_client.xcom_push(key='ingest_def_uri', value=ingest_definition.s3_uri) xcom_client.xcom_push(key='ingest_def_id', value=ingest_definition.id)
def create_sentinel2_scenes(tile_path): """Returns scenes that can be created via API given a path to tiles Args: tile_path (str): path to tile directory (e.g. tiles/54/M/XB/2016/9/25/0) Returns: List[Scene] """ scene_id = str(uuid.uuid4()) logger.info('Starting scene creation for sentinel 2 scene: %s', tile_path) metadata_file = '{path}/tileInfo.json'.format(path=tile_path) tileinfo = get_tileinfo(metadata_file) images = (create_images(scene_id, tileinfo, 10) + create_images(scene_id, tileinfo, 20) + create_images(scene_id, tileinfo, 60)) tileFootprint, dataFootprint = create_footprints(tileinfo) thumbnails = create_thumbnails(scene_id, tile_path) tags = ['Sentinel-2', 'JPEG2000'] aws_base = 'https://' + bucket.name + '.s3.amazonaws.com' metadataFiles = [ os.path.join(aws_base, tile_path, 'tileInfo.json'), os.path.join(aws_base, tile_path, 'metadata.xml'), os.path.join(aws_base, tile_path, 'productInfo.json') ] scene_metadata = dict( path=tileinfo['path'], timestamp=tileinfo['timestamp'], utmZone=tileinfo['utmZone'], latitudeBand=tileinfo['latitudeBand'], gridSquare=tileinfo['gridSquare'], dataCoveragePercentage=tileinfo['dataCoveragePercentage'], cloudyPixelPercentage=tileinfo['cloudyPixelPercentage'], productName=tileinfo['productName'], productPath=tileinfo['productPath']) scene = Scene( organization, 0, Visibility.PUBLIC, tags, datasource_id, scene_metadata, 'S2 {}'.format(tile_path), # name JobStatus.SUCCESS if thumbnails else JobStatus.FAILURE, JobStatus.SUCCESS if dataFootprint else JobStatus.FAILURE, IngestStatus.NOTINGESTED, id=scene_id, acquisitionDate=tileinfo['timestamp'], cloudCover=tileinfo['cloudyPixelPercentage'], tileFootprint=tileFootprint, dataFootprint=dataFootprint, metadataFiles=metadataFiles, thumbnails=thumbnails, images=images) return [scene]
def set_ingest_status_failure_op(*args, **kwargs): """Set ingest status on failure""" xcom_client = kwargs['task_instance'] scene_id = xcom_client.xcom_pull(key='ingest_scene_id', task_ids=None) logger.info("Setting scene (%s) ingested status to failed", scene_id) scene = Scene.from_id(scene_id) scene.ingestStatus = IngestStatus.FAILED scene.update() logger.info("Finished setting scene (%s) ingest status (%s)", scene_id, IngestStatus.FAILED)
def create_scene(owner, prefix, landsat_id, config, datasource): logger.info('Creating scene for landsat id {}'.format(landsat_id)) gcs_prefix = io.gcs_path_for_landsat_id(landsat_id) logger.info('Fetching all bands') for band in config.bands.keys(): fetch_band(prefix, gcs_prefix, band, landsat_id) filter_metadata = extract_metadata( requests.get(io.make_path_for_mtl(gcs_prefix, landsat_id)).content) cog_fname = '{}_COG.tif'.format(landsat_id) stacked_fname = '{}_STACKED.tif'.format(landsat_id) filenames = { 'COG': os.path.join(prefix, cog_fname), 'STACKED': os.path.join(prefix, stacked_fname) } convert_to_cog(prefix, filenames['STACKED'], filenames['COG'], config, landsat_id) s3_location = upload_file(owner, filenames['COG'], cog_fname) logger.info('Creating image') ingest_location = 's3://{}/{}'.format(data_bucket, urllib.quote(s3_location)) scene = Scene(0, 'PRIVATE', [], datasource, {}, landsat_id, 'SUCCESS', 'SUCCESS', 'INGESTED', [io.make_path_for_mtl(gcs_prefix, landsat_id)], ingestLocation=ingest_location, cloudCover=filter_metadata['cloud_cover'], acquisitionDate=filter_metadata['acquisition_date'], sceneType='COG', owner=owner) image = create_geotiff_image( filenames['COG'], ingest_location, filename=cog_fname, owner=owner, scene=scene.id, band_create_function=lambda x: config.bands.values()) scene.images = [image] return scene
def copy_scene(scene): """Create a copy of this scene with images converted to tifs in s3 Args: scene (Scene): scene to copy images from Return: Scene: the copied scene with substitute images """ copied = Scene.from_dict(scene.to_dict()) copied.images = [make_tif_image_copy(image) for image in scene.images] return copied
def create_scene(hdf_url, temp_directory, user_id, datasource): """Create a MODIS scene Args: hdf_url (str): URL for MODIS scene to download temp_directory (str): directory to use as scratch space when creating scene user_id (str): ID of owner for new MODIS scene datasource (str): ID of datasource for new MODIS scene """ config = modis_configs[datasource] granule_parts = os.path.basename(hdf_url).split('.') acquisition_datetime = datetime.strptime(granule_parts[1][1:], '%Y%j') name = '.'.join(granule_parts[:-1]) id = str(uuid.uuid4()) scene = Scene(Visibility.PRIVATE, [], datasource, {}, name, JobStatus.SUCCESS, JobStatus.SUCCESS, IngestStatus.INGESTED, [], owner=user_id, id=id, acquisitionDate=acquisition_datetime.isoformat() + 'Z', cloudCover=0) hdf_filepath = download_hdf(hdf_url, temp_directory) tifs = create_geotiffs(hdf_filepath, temp_directory) s3_uris = upload_tifs(tifs, user_id, scene.id) images = [] get_band_func = partial(get_image_band, modis_config=config) for local_path, remote_path in zip(tifs, s3_uris): image = create_geotiff_image(local_path, urllib.unquote(s3_uris[0]), scene=scene.id, owner=user_id, band_create_function=get_band_func) images.append(image) scene.images = images scene.ingestLocation = s3_uris[0] scene.sceneType = 'COG' return scene
def set_ingest_status_success_op(*args, **kwargs): """Set scene ingest status on success""" xcom_client = kwargs['task_instance'] scene_id = xcom_client.xcom_pull(key='ingest_scene_id', task_ids=None) logger.info("Setting scene (%s) ingested status to success", scene_id) scene = Scene.from_id(scene_id) scene.ingestStatus = IngestStatus.INGESTED layer_s3_bucket = os.getenv('TILE_SERVER_BUCKET') s3_output_location = 's3://{}/layers'.format(layer_s3_bucket) scene.ingestLocation = s3_output_location scene.update() logger.info("Finished setting scene (%s) ingest status (%s)", scene_id, IngestStatus.INGESTED)
def create_planet_scene(planet_feature, datasource, organization_id, planet_key, ingest_status=IngestStatus.TOBEINGESTED, visibility=Visibility.PRIVATE, tags=[], owner=None, sceneType="AVRO"): """Create a Raster Foundry scene from Planet scenes Args: planet_key (str): API auth key for planet API planet_feature (dict): json response from planet API client datasource (str): UUID of the datasource this scene belongs to organization_id (str): UUID of the organization that owns the scene visibility (Visibility): visibility for created scene tags (str[]): list of tags to attach to the created scene owner (str): user ID of the user who owns the scene Returns: Scene """ props = planet_feature['properties'] datasource = datasource name = planet_feature['id'] acquisitionDate = props['acquired'] cloudCover = props['cloud_cover'] ingestSizeBytes = 0 # TODO visibility = visibility tags = tags dataFootprint = planet_feature['geometry']['coordinates'] scene_kwargs = { 'sunAzimuth': props['sun_azimuth'], 'sunElevation': props['sun_elevation'], 'cloudCover': cloudCover, 'acquisitionDate': acquisitionDate, 'id': str(uuid.uuid4()), 'thumbnails': None, 'tileFootprint': Footprint(organization_id, bbox_from_planet_feature(planet_feature)), 'dataFootprint': Footprint(organization_id, [planet_feature['geometry']['coordinates']]) } images = [ create_geotiff_image(organization_id, planet_feature['added_props']['localPath'], planet_feature['added_props']['s3Location'], scene=scene_kwargs['id'], visibility=visibility, owner=owner) ] scene = Scene(organization_id, ingestSizeBytes, visibility, tags, datasource, props, name, JobStatus.QUEUED, JobStatus.QUEUED, ingest_status, [], owner=owner, images=images, sceneType=sceneType, **scene_kwargs) thumbnail_url = planet_feature['_links']['thumbnail'] scene.thumbnails = [ get_planet_thumbnail(organization_id, thumbnail_url, planet_key, scene.id) ] return scene
def create_geotiff_scene(tif_path, datasource, acquisitionDate=None, cloudCover=0, visibility=Visibility.PRIVATE, tags=[], sceneMetadata=None, name=None, thumbnailStatus=JobStatus.QUEUED, boundaryStatus=JobStatus.QUEUED, ingestStatus=IngestStatus.TOBEINGESTED, metadataFiles=[], owner=None, sceneType="COG", **kwargs): """Returns scenes that can be created via API given a local path to a geotiff. Does not create Images because those require a Source URI, which this doesn't know about. Use create_geotiff_scene from a higher level of code that knows about where things are located remotely and then add those Images to the Scene that this function returns. Tries to extract metadata from the GeoTIFF where possible, but can also accept parameter overrides. Order of preference is as follows: 1) Kwargs 2) GeoTiff Value / Dynamic value (e.g. azimuth calculated from capture location / time) 3) Default values Args: tif_path (str): Local path to GeoTIFF file to use. datasource (str): Name describing the source of the data **kwargs: Any remaining keyword arguments will override the values being passed to the Scene constructor. If Returns: List[Scene] """ logger.info('Generating Scene from %s', tif_path) # Start with default values sceneMetadata = sceneMetadata if sceneMetadata else get_geotiff_metadata( tif_path) name = name if name else get_geotiff_name(tif_path) sceneKwargs = { 'sunAzimuth': None, # TODO: Calculate from acquisitionDate and tif center. 'sunElevation': None, # TODO: Same 'cloudCover': cloudCover, 'acquisitionDate': acquisitionDate, 'id': str(uuid.uuid4()), 'thumbnails': None } # Override defaults with kwargs sceneKwargs.update(kwargs) # Construct Scene scene = Scene(visibility, tags, datasource, sceneMetadata, name, thumbnailStatus, boundaryStatus, ingestStatus, metadataFiles, owner=owner, sceneType=sceneType, **sceneKwargs) return scene
def create_planet_scene(planet_feature, datasource, planet_key, visibility=Visibility.PRIVATE, tags=[], owner=None): """Create a Raster Foundry scene from Planet scenes Args: planet_key (str): API auth key for planet API planet_feature (dict): json response from planet API client datasource (str): UUID of the datasource this scene belongs to visibility (Visibility): visibility for created scene tags (str[]): list of tags to attach to the created scene owner (str): user ID of the user who owns the scene Returns: Scene """ props = planet_feature['properties'] datasource = datasource name = planet_feature['id'] acquisitionDate = props['acquired'] cloudCover = props['cloud_cover'] visibility = visibility tags = tags scene_kwargs = { 'sunAzimuth': props['sun_azimuth'], 'sunElevation': props['sun_elevation'], 'cloudCover': cloudCover, 'acquisitionDate': acquisitionDate, 'id': str(uuid.uuid4()), 'thumbnails': None, 'ingestLocation': planet_feature['added_props']['s3Location'].replace( '|', '%7C' ) } images = [create_geotiff_image( planet_feature['added_props']['localPath'], planet_feature['added_props']['s3Location'], scene=scene_kwargs['id'], visibility=visibility, owner=owner )] scene = Scene( visibility, tags, datasource, props, name, JobStatus.QUEUED, JobStatus.QUEUED, 'INGESTED', [], owner=owner, images=images, sceneType='COG', **scene_kwargs ) return scene
def setUp(self): cwd = os.path.abspath(os.path.dirname(__file__)) scene_path = os.path.join(cwd, 'data', 'scene.json') with open(scene_path) as fh: self.scene = Scene.from_dict(json.load(fh))