Esempio n. 1
0
 def generate_scenes(self):
     scenes = []
     for hdf_url in self.hdf_urls:
         with get_tempdir() as temp_dir:
             scene = create_scene(hdf_url, temp_dir, self.owner, self.datasource)
             scenes.append(scene)
     return scenes
Esempio n. 2
0
    def generate_scenes(self):
        """Create a Scene and associated Image for each GeoTiff in self.s3_path
        Returns:
            Generator of Scenes
        """
        s3 = boto3.resource('s3')
        for infile in self.files:
            # We can't use the temp file as a context manager because it'll be opened/closed multiple
            # times and by default is deleted when it's closed. So we use try/finally to ensure that
            # it gets cleaned up.
            bucket_name, key = s3_bucket_and_key_from_url(infile)
            filename = os.path.basename(key)
            logger.info('Downloading %s => %s', infile, filename)
            bucket = s3.Bucket(bucket_name)
            with get_tempdir() as tempdir:
                tmp_fname = os.path.join(tempdir, filename)
                bucket.download_file(key, tmp_fname)
                cog_path = convert_to_cog(tempdir, filename)
                scene = self.create_geotiff_scene(
                    tmp_fname,
                    os.path.splitext(filename)[0])
                scene.ingestLocation = upload_tifs([cog_path], self.owner,
                                                   scene.id)[0]
                images = [
                    self.create_geotiff_image(
                        tmp_fname, urllib.unquote(scene.ingestLocation), scene,
                        cog_path)
                ]

            scene.thumbnails = []
            scene.images = images
            yield scene
Esempio n. 3
0
def create_cog(image_locations, scene):
    with get_tempdir() as local_dir:
        dsts = [os.path.join(local_dir, fname) for _, fname in image_locations]
        cog.fetch_imagery(image_locations, local_dir)
        warped_paths = cog.warp_tifs(dsts, local_dir)
        merged_tif = cog.merge_tifs(warped_paths, local_dir)
        cog.add_overviews(merged_tif)
        cog_path = cog.convert_to_cog(merged_tif, local_dir)
        updated_scene = upload_tif(cog_path, scene)
        updated_scene.update()
Esempio n. 4
0
    def test_temp_dir_cleanup(self):
        """Test that temporary directory is cleaned up even if error thrown"""

        try:
            with get_tempdir() as temp_dir:
                check_directory = temp_dir
                raise Exception('Dummy Exception')
        except:
            pass

        self.assertFalse(
            os.path.isdir(check_directory),
            'Directory {} should have been deleted'.format(check_directory))
Esempio n. 5
0
    def generate_scenes(self):
        """Create a Scene and associated Image for each GeoTiff in self.s3_path
        Returns:
            Generator of Scenes
        """
        s3 = boto3.resource('s3')
        for infile in self.files:
            # We can't use the temp file as a context manager because it'll be opened/closed multiple
            # times and by default is deleted when it's closed. So we use try/finally to ensure that
            # it gets cleaned up.
            local_tif = tempfile.NamedTemporaryFile(delete=False,
                                                    suffix='.tif')
            try:
                bucket_name, key = s3_bucket_and_key_from_url(infile)
                bucket = s3.Bucket(bucket_name)
                bucket.download_file(key, local_tif.name)
                logger.info('Downloading %s => %s', infile, local_tif.name)
                # We need to override the autodetected filename because we're loading into temp
                # files which don't preserve the file name that is on S3.
                filename = os.path.basename(key)
                scene = self.create_geotiff_scene(
                    local_tif.name,
                    os.path.splitext(filename)[0])

                if is_tif_too_large(local_tif.name):
                    with get_tempdir() as tempdir:
                        split_files = split_tif(local_tif.name, tempdir)
                        target_key = 'user-uploads/{USER}/{UPLOAD}/'.format(
                            USER=self.owner, UPLOAD=self._upload.id)
                        keys_and_filepaths = upload_split_files(
                            target_key, os.getenv('DATA_BUCKET'), split_files)

                        images = [
                            self.create_geotiff_image(
                                filepath, s3_url(bucket_name, s3_key), scene,
                                os.path.basename(s3_key))
                            for (s3_key, filepath) in keys_and_filepaths
                        ]
                else:
                    images = [
                        self.create_geotiff_image(local_tif.name, infile,
                                                  scene, filename)
                    ]

                # TODO: thumbnails aren't currently created in a way that matches serialization
                # in the API
                scene.thumbnails = create_thumbnails(local_tif.name, scene.id)
                scene.images = images
            finally:
                os.remove(local_tif.name)
            yield scene
Esempio n. 6
0
 def generate_scenes(self):
     # If this upload is not associated with a project, set the scene's
     # ingest status to TOBEINGESTED so that scene creation will kick off
     # an ingest. Otherwise, set the status to NOTINGESTED, so that the status
     # will be updated when the scene is added to this upload's project
     for planet_id in set(self.planet_ids):
         logger.info('Preparing to copy planet asset to s3: %s', planet_id)
         with get_tempdir() as prefix:
             planet_feature, temp_tif_file = self.copy_asset_to_s3(prefix, planet_id)
             planet_key = self.client.auth.value
             planet_scene = create_planet_scene(
                 planet_feature, self.datasource, planet_key,
                 self.visibility, self.tags, self.owner
             )
             yield planet_scene
Esempio n. 7
0
 def generate_scenes(self):
     scenes = []
     for landsat_id in self.upload.files:
         path_meta = io.base_metadata_for_landsat_id(landsat_id)
         sensor = path_meta['sensor_id']
         config = {
             'M': MultiSpectralScannerConfig,
             'T': ThematicMapperConfig,
             'E': EnhancedThematicMapperConfig
         }[sensor]
         with get_tempdir() as temp_dir:
             scene = create_scene(self.upload.owner, temp_dir, landsat_id,
                                  config, self.upload.datasource)
             scenes.append(scene)
     return scenes
Esempio n. 8
0
    def test_create_tif_mask(self):
        """Test that creating a tif mask works properly"""
        with get_tempdir() as temp_dir:
            new_mask_tif = create_tif_mask(temp_dir, self.landsat8_tif)
            files = os.listdir(temp_dir)
            self.assertEqual(len(files), 1, 'Should have created a mask tif')

            with rasterio.open(new_mask_tif) as src:
                band = src.read(1)
                self.assertEqual(
                    band.size, 260832,
                    'Size of band is {} instead of {}'.format(
                        band.size, 260832))
                non_zero_pixels = np.sum(band)
                self.assertEqual(
                    non_zero_pixels, 117687,
                    'Number of pixels is {} should be {}'.format(
                        non_zero_pixels, 117687))
Esempio n. 9
0
def extract_footprints(organization_id, tif_path):
    """Performs all actions to extract polygon from a kayak scene

    Args:
        organization_id (str): organization footprints belong to
        tif_path (str): path to tif to extract polygons from

    Returns:
        tuple
    """
    logger.info('Beginning process to extract footprint for image:%s',
                tif_path)
    with get_tempdir() as temp_dir:

        _, resampled_tif_path = tempfile.mkstemp(suffix='.TIF', dir=temp_dir)

        with rasterio.open(tif_path) as src:
            y, x = src.shape

            aspect = y / float(x)
        x_size = 512
        y_size = int(512 * aspect)

        # Resample to a max width of 512
        cmd = [
            'gdal_translate',
            tif_path,
            resampled_tif_path,
            '-outsize',
            str(x_size),
            str(y_size),
        ]
        logger.info('Running GDAL command: %s', ' '.join(cmd))

        subprocess.check_call(cmd)

        tile_mask_tif_path, data_mask_tif_path = create_tif_mask(
            temp_dir, resampled_tif_path)
        data_footprint = extract_polygon(data_mask_tif_path)
        tile_footprint = extract_polygon(tile_mask_tif_path)

        return (Footprint(organization_id, tile_footprint),
                Footprint(organization_id, data_footprint))
Esempio n. 10
0
def export(export_id):
    """Perform export configured by user

    Args:
        export_id (str): ID of export job to process
    """
    logger.info('Creating Export Definition')
    final_status = 'EXPORTED'
    try:
        export_uri = create_export_definition(export_id)
        logger.info('Retrieving Export Definition %s', export_uri)
        export_definition = get_export_definition(export_uri)
        with get_tempdir() as local_dir:
            logger.info('Created Working Directory %s', local_dir)
            logger.info('Rewriting Export Definition')
            local_path = write_export_definition(export_definition, local_dir)
            logger.info('Rewrote export definition to %s', local_path)
            logger.info('Preparing to Run Export')
            run_export('file://' + local_path, export_id)
            logger.info('Post Processing Tiffs')
            merged_tiff_path = post_process_exports(export_definition,
                                                    local_dir)
            logger.info('Uploading Processed Tiffs')
            upload_processed_tif(merged_tiff_path, export_definition)
    except subprocess.CalledProcessError as e:
        logger.error('Output from failed command: %s', e.output)
        final_status = 'FAILED'
        raise e
    except Exception as e:
        logger.error('Wrapper error: %s', e)
        final_status = 'FAILED'
        raise e
    finally:
        # The max number of retries is currently hardcoded in batch.tf
        # in the deployment repo. Please make sure that both areas are updated if
        # this needs to be changed to a configurable variable
        if final_status == 'EXPORTED' or int(RETRY) >= 3:
            logger.info('Sending email notifications for export %s on try: %s',
                        export_id, RETRY)
            update_export_status(export_id, final_status)
        else:
            logger.info('Export failed, on try %s/3', RETRY)
Esempio n. 11
0
def extract_footprints(organization_id, tif_path):
    """Performs all actions to extract polygon from a kayak scene

    Args:
        organization_id (str): organization footprints belong to
        tif_path (str): path to tif to extract polygons from

    Returns:
        tuple
    """
    logger.info('Beginning process to extract footprint for image:%s',
                tif_path)

    with get_tempdir() as temp_dir:
        tile_mask_tif_path, data_mask_tif_path = create_tif_mask(
            temp_dir, tif_path)
        data_footprint = extract_polygon(data_mask_tif_path)
        tile_footprint = extract_polygon(tile_mask_tif_path)
        return (Footprint(organization_id, tile_footprint),
                Footprint(organization_id, data_footprint))
Esempio n. 12
0
def extract_footprints(tif_path):
    """Performs all actions to extract polygon from a kayak scene

    Args:
        tif_path (str): path to tif to extract polygons from

    Returns:
        tuple
    """
    logger.info('Beginning process to extract footprint for image:%s',
                tif_path)
    with get_tempdir() as temp_dir:

        _, resampled_tif_path = tempfile.mkstemp(suffix='.TIF', dir=temp_dir)
        _, warped_path = tempfile.mkstemp(suffix='.TIF', dir=temp_dir)
        _, geojson_path = tempfile.mkstemp(suffix='.GEOJSON', dir=temp_dir)

        with rasterio.open(tif_path) as src:
            y, x = src.shape
            aspect = y / float(x)
            x_size = 512
            y_size = int(512 * aspect)

        resample_cmd = [
            'gdal_translate', tif_path, resampled_tif_path, '-outsize',
            str(x_size),
            str(y_size)
        ]
        warp_cmd = [
            'gdalwarp', '-co', 'compress=LZW', '-dstnodata', '0', '-dstalpha',
            '-t_srs', 'epsg:4326', resampled_tif_path, warped_path
        ]
        polygonize_cmd = [
            'gdal_polygonize.py', '-b', 'mask', warped_path, '-f', 'GEOJSON',
            geojson_path
        ]

        subprocess.check_call(resample_cmd)
        subprocess.check_call(warp_cmd)
        subprocess.check_call(polygonize_cmd)
        with open(geojson_path, 'r+') as fh:
            geojson = json.load(fh)

        data_footprint = [
            feature['geometry']['coordinates']
            for feature in geojson['features']
            if feature['properties']['DN'] == 255
        ]

        xs = []
        ys = []

        for area in data_footprint:
            xst, yst = zip(*area[0])
            xs += xst
            ys += yst

        xmin = min(xs)
        xmax = max(xs)
        ymin = min(ys)
        ymax = max(ys)

        tile_footprint = [[[[xmin, ymin], [xmax, ymin], [xmax, ymax],
                            [xmin, ymax], [xmin, ymin]]]]

        return (Footprint(tile_footprint), Footprint(data_footprint))
Esempio n. 13
0
def process_jp2000(scene_id, jp2_source):
    """Converts a Jpeg 2000 file to a tif

    Args:
        scene_id (str): scene the image is associated with
        jp2_source (str): url to a jpeg 2000 file

    Return:
        str: s3 url to the converted tif
    """

    with get_tempdir() as temp_dir:

        s3client = boto3.client('s3')
        in_bucket, in_key = geotiff_io.s3_bucket_and_key_from_url(jp2_source)
        in_bucket = in_bucket.replace(r'.s3.amazonaws.com', '')
        fname_part = os.path.split(in_key)[-1]
        out_bucket = os.getenv('DATA_BUCKET')
        out_key = os.path.join('sentinel-2-tifs', scene_id,
                               fname_part.replace('.jp2', '.tif'))
        jp2_fname = os.path.join(temp_dir, fname_part)
        temp_tif_fname = jp2_fname.replace('.jp2', '-temp.tif')
        tif_fname = jp2_fname.replace('.jp2', '.tif')

        # Explicitly setting nbits is necessary because geotrellis only likes
        # powers of 2, and for some reason the value on the jpeg 2000 files
        # after translation is 15
        temp_translate_cmd = [
            'gdal_translate',
            '-a_nodata',
            '0',  # set 0 to nodata value
            '-co',
            'NBITS=16',  # explicitly set nbits = 16
            '-co',
            'COMPRESS=LZW',
            '-co',
            'TILED=YES',
            jp2_fname,
            temp_tif_fname
        ]

        warp_cmd = [
            'gdalwarp', '-co', 'COMPRESS=LZW', '-co', 'TILED=YES', '-t_srs',
            'epsg:3857', temp_tif_fname, tif_fname
        ]

        dst_url = geotiff_io.s3_url(out_bucket, out_key)

        # Download the original jp2000 file
        logger.info('Downloading JPEG2000 file locally (%s/%s => %s)',
                    in_bucket, in_key, jp2_fname)
        with open(jp2_fname, 'wb') as src:
            body = s3client.get_object(Bucket=in_bucket, Key=in_key)['Body']
            src.write(body.read())

        logger.info('Running translate command to convert to TIF')
        # Translate the original file and add 0 as a nodata value
        subprocess.check_call(temp_translate_cmd)
        logger.info('Running warp command to convert to web mercator')
        subprocess.check_call(warp_cmd)

        # Upload the converted tif
        logger.info('Uploading TIF to S3 (%s => %s/%s)', tif_fname, out_bucket,
                    out_key)
        with open(tif_fname, 'r') as dst:
            s3client.put_object(Bucket=out_bucket, Key=out_key, Body=dst)

        # Return the s3 url to the converted image
        return dst_url