def generate_scenes(self): scenes = [] for hdf_url in self.hdf_urls: with get_tempdir() as temp_dir: scene = create_scene(hdf_url, temp_dir, self.owner, self.datasource) scenes.append(scene) return scenes
def generate_scenes(self): """Create a Scene and associated Image for each GeoTiff in self.s3_path Returns: Generator of Scenes """ s3 = boto3.resource('s3') for infile in self.files: # We can't use the temp file as a context manager because it'll be opened/closed multiple # times and by default is deleted when it's closed. So we use try/finally to ensure that # it gets cleaned up. bucket_name, key = s3_bucket_and_key_from_url(infile) filename = os.path.basename(key) logger.info('Downloading %s => %s', infile, filename) bucket = s3.Bucket(bucket_name) with get_tempdir() as tempdir: tmp_fname = os.path.join(tempdir, filename) bucket.download_file(key, tmp_fname) cog_path = convert_to_cog(tempdir, filename) scene = self.create_geotiff_scene( tmp_fname, os.path.splitext(filename)[0]) scene.ingestLocation = upload_tifs([cog_path], self.owner, scene.id)[0] images = [ self.create_geotiff_image( tmp_fname, urllib.unquote(scene.ingestLocation), scene, cog_path) ] scene.thumbnails = [] scene.images = images yield scene
def create_cog(image_locations, scene): with get_tempdir() as local_dir: dsts = [os.path.join(local_dir, fname) for _, fname in image_locations] cog.fetch_imagery(image_locations, local_dir) warped_paths = cog.warp_tifs(dsts, local_dir) merged_tif = cog.merge_tifs(warped_paths, local_dir) cog.add_overviews(merged_tif) cog_path = cog.convert_to_cog(merged_tif, local_dir) updated_scene = upload_tif(cog_path, scene) updated_scene.update()
def test_temp_dir_cleanup(self): """Test that temporary directory is cleaned up even if error thrown""" try: with get_tempdir() as temp_dir: check_directory = temp_dir raise Exception('Dummy Exception') except: pass self.assertFalse( os.path.isdir(check_directory), 'Directory {} should have been deleted'.format(check_directory))
def generate_scenes(self): """Create a Scene and associated Image for each GeoTiff in self.s3_path Returns: Generator of Scenes """ s3 = boto3.resource('s3') for infile in self.files: # We can't use the temp file as a context manager because it'll be opened/closed multiple # times and by default is deleted when it's closed. So we use try/finally to ensure that # it gets cleaned up. local_tif = tempfile.NamedTemporaryFile(delete=False, suffix='.tif') try: bucket_name, key = s3_bucket_and_key_from_url(infile) bucket = s3.Bucket(bucket_name) bucket.download_file(key, local_tif.name) logger.info('Downloading %s => %s', infile, local_tif.name) # We need to override the autodetected filename because we're loading into temp # files which don't preserve the file name that is on S3. filename = os.path.basename(key) scene = self.create_geotiff_scene( local_tif.name, os.path.splitext(filename)[0]) if is_tif_too_large(local_tif.name): with get_tempdir() as tempdir: split_files = split_tif(local_tif.name, tempdir) target_key = 'user-uploads/{USER}/{UPLOAD}/'.format( USER=self.owner, UPLOAD=self._upload.id) keys_and_filepaths = upload_split_files( target_key, os.getenv('DATA_BUCKET'), split_files) images = [ self.create_geotiff_image( filepath, s3_url(bucket_name, s3_key), scene, os.path.basename(s3_key)) for (s3_key, filepath) in keys_and_filepaths ] else: images = [ self.create_geotiff_image(local_tif.name, infile, scene, filename) ] # TODO: thumbnails aren't currently created in a way that matches serialization # in the API scene.thumbnails = create_thumbnails(local_tif.name, scene.id) scene.images = images finally: os.remove(local_tif.name) yield scene
def generate_scenes(self): # If this upload is not associated with a project, set the scene's # ingest status to TOBEINGESTED so that scene creation will kick off # an ingest. Otherwise, set the status to NOTINGESTED, so that the status # will be updated when the scene is added to this upload's project for planet_id in set(self.planet_ids): logger.info('Preparing to copy planet asset to s3: %s', planet_id) with get_tempdir() as prefix: planet_feature, temp_tif_file = self.copy_asset_to_s3(prefix, planet_id) planet_key = self.client.auth.value planet_scene = create_planet_scene( planet_feature, self.datasource, planet_key, self.visibility, self.tags, self.owner ) yield planet_scene
def generate_scenes(self): scenes = [] for landsat_id in self.upload.files: path_meta = io.base_metadata_for_landsat_id(landsat_id) sensor = path_meta['sensor_id'] config = { 'M': MultiSpectralScannerConfig, 'T': ThematicMapperConfig, 'E': EnhancedThematicMapperConfig }[sensor] with get_tempdir() as temp_dir: scene = create_scene(self.upload.owner, temp_dir, landsat_id, config, self.upload.datasource) scenes.append(scene) return scenes
def test_create_tif_mask(self): """Test that creating a tif mask works properly""" with get_tempdir() as temp_dir: new_mask_tif = create_tif_mask(temp_dir, self.landsat8_tif) files = os.listdir(temp_dir) self.assertEqual(len(files), 1, 'Should have created a mask tif') with rasterio.open(new_mask_tif) as src: band = src.read(1) self.assertEqual( band.size, 260832, 'Size of band is {} instead of {}'.format( band.size, 260832)) non_zero_pixels = np.sum(band) self.assertEqual( non_zero_pixels, 117687, 'Number of pixels is {} should be {}'.format( non_zero_pixels, 117687))
def extract_footprints(organization_id, tif_path): """Performs all actions to extract polygon from a kayak scene Args: organization_id (str): organization footprints belong to tif_path (str): path to tif to extract polygons from Returns: tuple """ logger.info('Beginning process to extract footprint for image:%s', tif_path) with get_tempdir() as temp_dir: _, resampled_tif_path = tempfile.mkstemp(suffix='.TIF', dir=temp_dir) with rasterio.open(tif_path) as src: y, x = src.shape aspect = y / float(x) x_size = 512 y_size = int(512 * aspect) # Resample to a max width of 512 cmd = [ 'gdal_translate', tif_path, resampled_tif_path, '-outsize', str(x_size), str(y_size), ] logger.info('Running GDAL command: %s', ' '.join(cmd)) subprocess.check_call(cmd) tile_mask_tif_path, data_mask_tif_path = create_tif_mask( temp_dir, resampled_tif_path) data_footprint = extract_polygon(data_mask_tif_path) tile_footprint = extract_polygon(tile_mask_tif_path) return (Footprint(organization_id, tile_footprint), Footprint(organization_id, data_footprint))
def export(export_id): """Perform export configured by user Args: export_id (str): ID of export job to process """ logger.info('Creating Export Definition') final_status = 'EXPORTED' try: export_uri = create_export_definition(export_id) logger.info('Retrieving Export Definition %s', export_uri) export_definition = get_export_definition(export_uri) with get_tempdir() as local_dir: logger.info('Created Working Directory %s', local_dir) logger.info('Rewriting Export Definition') local_path = write_export_definition(export_definition, local_dir) logger.info('Rewrote export definition to %s', local_path) logger.info('Preparing to Run Export') run_export('file://' + local_path, export_id) logger.info('Post Processing Tiffs') merged_tiff_path = post_process_exports(export_definition, local_dir) logger.info('Uploading Processed Tiffs') upload_processed_tif(merged_tiff_path, export_definition) except subprocess.CalledProcessError as e: logger.error('Output from failed command: %s', e.output) final_status = 'FAILED' raise e except Exception as e: logger.error('Wrapper error: %s', e) final_status = 'FAILED' raise e finally: # The max number of retries is currently hardcoded in batch.tf # in the deployment repo. Please make sure that both areas are updated if # this needs to be changed to a configurable variable if final_status == 'EXPORTED' or int(RETRY) >= 3: logger.info('Sending email notifications for export %s on try: %s', export_id, RETRY) update_export_status(export_id, final_status) else: logger.info('Export failed, on try %s/3', RETRY)
def extract_footprints(organization_id, tif_path): """Performs all actions to extract polygon from a kayak scene Args: organization_id (str): organization footprints belong to tif_path (str): path to tif to extract polygons from Returns: tuple """ logger.info('Beginning process to extract footprint for image:%s', tif_path) with get_tempdir() as temp_dir: tile_mask_tif_path, data_mask_tif_path = create_tif_mask( temp_dir, tif_path) data_footprint = extract_polygon(data_mask_tif_path) tile_footprint = extract_polygon(tile_mask_tif_path) return (Footprint(organization_id, tile_footprint), Footprint(organization_id, data_footprint))
def extract_footprints(tif_path): """Performs all actions to extract polygon from a kayak scene Args: tif_path (str): path to tif to extract polygons from Returns: tuple """ logger.info('Beginning process to extract footprint for image:%s', tif_path) with get_tempdir() as temp_dir: _, resampled_tif_path = tempfile.mkstemp(suffix='.TIF', dir=temp_dir) _, warped_path = tempfile.mkstemp(suffix='.TIF', dir=temp_dir) _, geojson_path = tempfile.mkstemp(suffix='.GEOJSON', dir=temp_dir) with rasterio.open(tif_path) as src: y, x = src.shape aspect = y / float(x) x_size = 512 y_size = int(512 * aspect) resample_cmd = [ 'gdal_translate', tif_path, resampled_tif_path, '-outsize', str(x_size), str(y_size) ] warp_cmd = [ 'gdalwarp', '-co', 'compress=LZW', '-dstnodata', '0', '-dstalpha', '-t_srs', 'epsg:4326', resampled_tif_path, warped_path ] polygonize_cmd = [ 'gdal_polygonize.py', '-b', 'mask', warped_path, '-f', 'GEOJSON', geojson_path ] subprocess.check_call(resample_cmd) subprocess.check_call(warp_cmd) subprocess.check_call(polygonize_cmd) with open(geojson_path, 'r+') as fh: geojson = json.load(fh) data_footprint = [ feature['geometry']['coordinates'] for feature in geojson['features'] if feature['properties']['DN'] == 255 ] xs = [] ys = [] for area in data_footprint: xst, yst = zip(*area[0]) xs += xst ys += yst xmin = min(xs) xmax = max(xs) ymin = min(ys) ymax = max(ys) tile_footprint = [[[[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax], [xmin, ymin]]]] return (Footprint(tile_footprint), Footprint(data_footprint))
def process_jp2000(scene_id, jp2_source): """Converts a Jpeg 2000 file to a tif Args: scene_id (str): scene the image is associated with jp2_source (str): url to a jpeg 2000 file Return: str: s3 url to the converted tif """ with get_tempdir() as temp_dir: s3client = boto3.client('s3') in_bucket, in_key = geotiff_io.s3_bucket_and_key_from_url(jp2_source) in_bucket = in_bucket.replace(r'.s3.amazonaws.com', '') fname_part = os.path.split(in_key)[-1] out_bucket = os.getenv('DATA_BUCKET') out_key = os.path.join('sentinel-2-tifs', scene_id, fname_part.replace('.jp2', '.tif')) jp2_fname = os.path.join(temp_dir, fname_part) temp_tif_fname = jp2_fname.replace('.jp2', '-temp.tif') tif_fname = jp2_fname.replace('.jp2', '.tif') # Explicitly setting nbits is necessary because geotrellis only likes # powers of 2, and for some reason the value on the jpeg 2000 files # after translation is 15 temp_translate_cmd = [ 'gdal_translate', '-a_nodata', '0', # set 0 to nodata value '-co', 'NBITS=16', # explicitly set nbits = 16 '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', jp2_fname, temp_tif_fname ] warp_cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', '-t_srs', 'epsg:3857', temp_tif_fname, tif_fname ] dst_url = geotiff_io.s3_url(out_bucket, out_key) # Download the original jp2000 file logger.info('Downloading JPEG2000 file locally (%s/%s => %s)', in_bucket, in_key, jp2_fname) with open(jp2_fname, 'wb') as src: body = s3client.get_object(Bucket=in_bucket, Key=in_key)['Body'] src.write(body.read()) logger.info('Running translate command to convert to TIF') # Translate the original file and add 0 as a nodata value subprocess.check_call(temp_translate_cmd) logger.info('Running warp command to convert to web mercator') subprocess.check_call(warp_cmd) # Upload the converted tif logger.info('Uploading TIF to S3 (%s => %s/%s)', tif_fname, out_bucket, out_key) with open(tif_fname, 'r') as dst: s3client.put_object(Bucket=out_bucket, Key=out_key, Body=dst) # Return the s3 url to the converted image return dst_url