def process_jp2000(scene_id, jp2_source):
    """Converts a Jpeg 2000 file to a tif

    Args:
        scene_id (str): scene the image is associated with
        jp2_source (str): url to a jpeg 2000 file

    Return:
        str: s3 url to the converted tif
    """

    s3client = boto3.client('s3')
    in_bucket, in_key = geotiff_io.s3_bucket_and_key_from_url(jp2_source)
    in_bucket = in_bucket.replace(r'.s3.amazonaws.com', '')
    fname_part = os.path.split(in_key)[-1]
    out_bucket = os.getenv('DATA_BUCKET')
    out_key = os.path.join('sentinel-2-tifs', scene_id,
                           fname_part.replace('.jp2', '.tif'))
    jp2_fname = os.path.join('/tmp', fname_part)
    tif_fname = jp2_fname.replace('.jp2', '.tif')
    # Explicitly setting nbits is necessary because geotrellis only likes
    # powers of 2, and for some reason the value on the jpeg 2000 files
    # after translation is 15
    cmd = [
        'gdal_translate',
        '-a_nodata',
        '0',  # set 0 to nodata value
        '-co',
        'NBITS=16',  # explicitly set nbits = 16
        jp2_fname,
        tif_fname
    ]

    dst_url = geotiff_io.s3_url(out_bucket, out_key)

    # check if the object is already there
    try:
        s3client.head_object(Bucket=out_bucket, Key=out_key)
        processed = True
    except ClientError:
        processed = False

    # If the object is already there, we've converted this scene
    # before
    if not processed:
        # Download the original jp2000 file
        with open(jp2_fname, 'wb') as src:
            body = s3client.get_object(Bucket=in_bucket, Key=in_key)['Body']
            src.write(body.read())

        # Translate the original file and add 0 as a nodata value
        subprocess.check_call(cmd)

        # Upload the converted tif
        with open(tif_fname, 'r') as dst:
            s3client.put_object(Bucket=out_bucket, Key=out_key, Body=dst)

    # Return the s3 url to the converted image
    return dst_url
Exemple #2
0
def import_geotiffs(*args, **kwargs):
    """Find geotiffs which match the bucket and prefix and kick off imports"""

    logging.info("Finding geotiff scenes...")
    conf = kwargs['dag_run'].conf

    tilepaths = conf.get('tilepaths')
    organization = conf.get('organization')
    datasource = conf.get('datasource')
    capture_date = conf.get('capture_date')
    bucket_name = conf.get('bucket')

    factory = GeoTiffS3SceneFactory(organization, Visibility.PRIVATE,
                                    datasource, capture_date, bucket_name, '')

    s3 = boto3.resource('s3')
    bucket = s3.Bucket(bucket_name)

    for path in tilepaths:
        local_tif = tempfile.NamedTemporaryFile(delete=False)
        try:
            bucket.download_file(path, local_tif.name)
            # We need to override the autodetected filename because we're loading into temp
            # files which don't preserve the file name that is on S3.
            filename = os.path.basename(path)
            scene = factory.create_geotiff_scene(local_tif.name,
                                                 os.path.splitext(filename)[0])
            image = factory.create_geotiff_image(local_tif.name,
                                                 s3_url(bucket.name, path),
                                                 scene.id, filename)

            scene.thumbnails = create_thumbnails(local_tif.name, scene.id,
                                                 organization)
            scene.images = [image]
            scene.create()
        finally:
            os.remove(local_tif.name)

    logger.info('Finished importing scenes')
def process_jp2000(scene_id, jp2_source):
    """Converts a Jpeg 2000 file to a tif

    Args:
        scene_id (str): scene the image is associated with
        jp2_source (str): url to a jpeg 2000 file

    Return:
        str: s3 url to the converted tif
    """

    with get_tempdir() as temp_dir:

        s3client = boto3.client('s3')
        in_bucket, in_key = geotiff_io.s3_bucket_and_key_from_url(jp2_source)
        in_bucket = in_bucket.replace(r'.s3.amazonaws.com', '')
        fname_part = os.path.split(in_key)[-1]
        out_bucket = os.getenv('DATA_BUCKET')
        out_key = os.path.join('sentinel-2-tifs', scene_id,
                               fname_part.replace('.jp2', '.tif'))
        jp2_fname = os.path.join(temp_dir, fname_part)
        temp_tif_fname = jp2_fname.replace('.jp2', '-temp.tif')
        tif_fname = jp2_fname.replace('.jp2', '.tif')

        # Explicitly setting nbits is necessary because geotrellis only likes
        # powers of 2, and for some reason the value on the jpeg 2000 files
        # after translation is 15
        temp_translate_cmd = [
            'gdal_translate',
            '-a_nodata',
            '0',  # set 0 to nodata value
            '-co',
            'NBITS=16',  # explicitly set nbits = 16
            '-co',
            'COMPRESS=LZW',
            '-co',
            'TILED=YES',
            jp2_fname,
            temp_tif_fname
        ]

        warp_cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', '-t_srs',
            'epsg:3857', temp_tif_fname, tif_fname
        ]

        dst_url = geotiff_io.s3_url(out_bucket, out_key)

        # Download the original jp2000 file
        logger.info('Downloading JPEG2000 file locally (%s/%s => %s)',
                    in_bucket, in_key, jp2_fname)
        with open(jp2_fname, 'wb') as src:
            body = s3client.get_object(Bucket=in_bucket, Key=in_key)['Body']
            src.write(body.read())

        logger.info('Running translate command to convert to TIF')
        # Translate the original file and add 0 as a nodata value
        subprocess.check_call(temp_translate_cmd)
        logger.info('Running warp command to convert to web mercator')
        subprocess.check_call(warp_cmd)

        # Upload the converted tif
        logger.info('Uploading TIF to S3 (%s => %s/%s)', tif_fname, out_bucket,
                    out_key)
        with open(tif_fname, 'r') as dst:
            s3client.put_object(Bucket=out_bucket, Key=out_key, Body=dst)

        # Return the s3 url to the converted image
        return dst_url