def process_jp2000(scene_id, jp2_source): """Converts a Jpeg 2000 file to a tif Args: scene_id (str): scene the image is associated with jp2_source (str): url to a jpeg 2000 file Return: str: s3 url to the converted tif """ s3client = boto3.client('s3') in_bucket, in_key = geotiff_io.s3_bucket_and_key_from_url(jp2_source) in_bucket = in_bucket.replace(r'.s3.amazonaws.com', '') fname_part = os.path.split(in_key)[-1] out_bucket = os.getenv('DATA_BUCKET') out_key = os.path.join('sentinel-2-tifs', scene_id, fname_part.replace('.jp2', '.tif')) jp2_fname = os.path.join('/tmp', fname_part) tif_fname = jp2_fname.replace('.jp2', '.tif') # Explicitly setting nbits is necessary because geotrellis only likes # powers of 2, and for some reason the value on the jpeg 2000 files # after translation is 15 cmd = [ 'gdal_translate', '-a_nodata', '0', # set 0 to nodata value '-co', 'NBITS=16', # explicitly set nbits = 16 jp2_fname, tif_fname ] dst_url = geotiff_io.s3_url(out_bucket, out_key) # check if the object is already there try: s3client.head_object(Bucket=out_bucket, Key=out_key) processed = True except ClientError: processed = False # If the object is already there, we've converted this scene # before if not processed: # Download the original jp2000 file with open(jp2_fname, 'wb') as src: body = s3client.get_object(Bucket=in_bucket, Key=in_key)['Body'] src.write(body.read()) # Translate the original file and add 0 as a nodata value subprocess.check_call(cmd) # Upload the converted tif with open(tif_fname, 'r') as dst: s3client.put_object(Bucket=out_bucket, Key=out_key, Body=dst) # Return the s3 url to the converted image return dst_url
def import_geotiffs(*args, **kwargs): """Find geotiffs which match the bucket and prefix and kick off imports""" logging.info("Finding geotiff scenes...") conf = kwargs['dag_run'].conf tilepaths = conf.get('tilepaths') organization = conf.get('organization') datasource = conf.get('datasource') capture_date = conf.get('capture_date') bucket_name = conf.get('bucket') factory = GeoTiffS3SceneFactory(organization, Visibility.PRIVATE, datasource, capture_date, bucket_name, '') s3 = boto3.resource('s3') bucket = s3.Bucket(bucket_name) for path in tilepaths: local_tif = tempfile.NamedTemporaryFile(delete=False) try: bucket.download_file(path, local_tif.name) # We need to override the autodetected filename because we're loading into temp # files which don't preserve the file name that is on S3. filename = os.path.basename(path) scene = factory.create_geotiff_scene(local_tif.name, os.path.splitext(filename)[0]) image = factory.create_geotiff_image(local_tif.name, s3_url(bucket.name, path), scene.id, filename) scene.thumbnails = create_thumbnails(local_tif.name, scene.id, organization) scene.images = [image] scene.create() finally: os.remove(local_tif.name) logger.info('Finished importing scenes')
def process_jp2000(scene_id, jp2_source): """Converts a Jpeg 2000 file to a tif Args: scene_id (str): scene the image is associated with jp2_source (str): url to a jpeg 2000 file Return: str: s3 url to the converted tif """ with get_tempdir() as temp_dir: s3client = boto3.client('s3') in_bucket, in_key = geotiff_io.s3_bucket_and_key_from_url(jp2_source) in_bucket = in_bucket.replace(r'.s3.amazonaws.com', '') fname_part = os.path.split(in_key)[-1] out_bucket = os.getenv('DATA_BUCKET') out_key = os.path.join('sentinel-2-tifs', scene_id, fname_part.replace('.jp2', '.tif')) jp2_fname = os.path.join(temp_dir, fname_part) temp_tif_fname = jp2_fname.replace('.jp2', '-temp.tif') tif_fname = jp2_fname.replace('.jp2', '.tif') # Explicitly setting nbits is necessary because geotrellis only likes # powers of 2, and for some reason the value on the jpeg 2000 files # after translation is 15 temp_translate_cmd = [ 'gdal_translate', '-a_nodata', '0', # set 0 to nodata value '-co', 'NBITS=16', # explicitly set nbits = 16 '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', jp2_fname, temp_tif_fname ] warp_cmd = [ 'gdalwarp', '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', '-t_srs', 'epsg:3857', temp_tif_fname, tif_fname ] dst_url = geotiff_io.s3_url(out_bucket, out_key) # Download the original jp2000 file logger.info('Downloading JPEG2000 file locally (%s/%s => %s)', in_bucket, in_key, jp2_fname) with open(jp2_fname, 'wb') as src: body = s3client.get_object(Bucket=in_bucket, Key=in_key)['Body'] src.write(body.read()) logger.info('Running translate command to convert to TIF') # Translate the original file and add 0 as a nodata value subprocess.check_call(temp_translate_cmd) logger.info('Running warp command to convert to web mercator') subprocess.check_call(warp_cmd) # Upload the converted tif logger.info('Uploading TIF to S3 (%s => %s/%s)', tif_fname, out_bucket, out_key) with open(tif_fname, 'r') as dst: s3client.put_object(Bucket=out_bucket, Key=out_key, Body=dst) # Return the s3 url to the converted image return dst_url