Ejemplo n.º 1
0
def landsat_filenames(original_file):
    '''Create a fake Landsat data structure on disk using artificial image data'''
    tmpdir = tempfile.mkdtemp()
    image_name = 'L1_IGNORE_AAABBB_DATE'
    mtl_name = image_name + '_MTL.txt'
    mtl_path = os.path.join(tmpdir, mtl_name)
    zip_path = os.path.join(tmpdir, image_name + '.zip')
    # not really a valid file but this is all we need, only one band in image
    with open(mtl_path, 'a') as f:
        f.write('SPACECRAFT_ID = LANDSAT_1\n')
        f.write('SUN_ELEVATION = 5.8\n')
        f.write('FILE_NAME_BAND_1 = 1.tiff\n')
        f.write('RADIANCE_MULT_BAND_1 = 2.0\n')
        f.write('RADIANCE_ADD_BAND_1 = 2.0\n')
        f.write('REFLECTANCE_MULT_BAND_1 = 2.0\n')
        f.write('REFLECTANCE_ADD_BAND_1 = 2.0\n')
        f.write('K1_CONSTANT_BAND_1 = 2.0\n')
        f.write('K2_CONSTANT_BAND_1 = 2.0\n')

    image_path = os.path.join(tmpdir, '1.tiff')
    tiff.TiffImage(original_file[0]).save(image_path)

    z = zipfile.ZipFile(zip_path, mode='x')
    z.write(image_path, arcname='1.tiff')
    z.write(mtl_path, arcname=mtl_name)
    z.close()

    label_path = os.path.join(tmpdir, image_name + '_label.tiff')
    tiff.TiffImage(original_file[1]).save(label_path)

    yield (zip_path, label_path)

    shutil.rmtree(tmpdir)
Ejemplo n.º 2
0
def worldview_filenames(original_file):
    '''Create a fake WorldView 2 data structure on disk using artificial image data'''
    tmpdir = tempfile.mkdtemp()
    image_name = 'WV02N42_939570W073_2520792013040400000000MS00_GU004003002'
    imd_name = '19MAY13164205-M2AS-503204071020_01_P003.IMD'
    zip_path = os.path.join(tmpdir, image_name + '.zip')
    label_path = os.path.join(tmpdir, image_name + '_label.tiff')
    image_dir = os.path.join(tmpdir, 'image')
    image_path = os.path.join(image_dir, image_name + '.tif')
    vendor_dir = os.path.join(image_dir, 'vendor_metadata')
    imd_path = os.path.join(vendor_dir, imd_name)
    os.mkdir(image_dir)
    os.mkdir(vendor_dir)
    # not really a valid file but this is all we need, only one band in image
    with open(imd_path, 'a') as f:
        f.write('absCalFactor = 9.295654e-03\n')
        f.write('effectiveBandwidth = 4.730000e-02\n')

    tiff.TiffImage(original_file[0]).save(image_path)
    tiff.TiffImage(original_file[1]).save(label_path)

    z = zipfile.ZipFile(zip_path, mode='x')
    z.write(image_path, arcname=image_name + '.tif')
    z.write(imd_path, arcname=os.path.join('vendor_metadata', imd_name))
    z.close()

    yield (zip_path, label_path)

    shutil.rmtree(tmpdir)
Ejemplo n.º 3
0
def lookup_georef_nodataMask(basename,
                             useNodataMask=True,
                             smartAssignHighLow=False):
    ''' 
    Looks up raster metadata and nodata pixels of an image corresponding to a basename from a different dir. Used to add georeferencing to "dumb" tiff outputs from SR. Returns georeferencing (and optionally nodata mask) for corresponding SR image. smartAssignHighLow (deprecated) refers to guessing whether a pixel with value nodata_value that is not in the LR nodata mask should be assigned a high or low val, based on the median val of the LR image. If smartAssignHighLow=True, third return val is a boolean indicating whether or not the original image val was greater than the median. Now deprecated because I found a much simpler way of accomplishing this task (check values before converting to uint8).
    '''
    georef_file = glob(os.path.join(cropped_roi_dir8, '**', basename + '.tif'),
                       recursive=True)
    georef_file = [file for file in georef_file if 'cog' not in file
                   ]  # sloppy fix to exclude new COG directory
    georef_ex_img = tiff.TiffImage(georef_file)
    metadata = georef_ex_img.metadata()
    geotransform_out = list(georef_ex_img.metadata()
                            ['geotransform'])  # recast to list so I can edit

    ## Modify to make 10x higher res
    geotransform_out[1] /= upscale_ratio
    geotransform_out[5] /= upscale_ratio
    metadata['geotransform'] = tuple(geotransform_out)

    ## Get height and width to use for assert statement at beginning
    h = georef_ex_img.height()
    w = georef_ex_img.width()
    if useNodataMask == False:  # Don't take nodata values from original LR image
        return metadata, h, w
    else:
        ## Load metadata
        ex_img = georef_ex_img.read()  # example corresponding LR image
        nodata_value = int(
            georef_ex_img.nodata_value())  # in contrast to env var nodata_val
        ex_img_mask = np.any(ex_img == nodata_value, axis=2)  # negative mask

        ## Resample image to match SR grid
        ex_img_mask_rs = zoom(
            ex_img_mask.astype('float32'), upscale_ratio, order=1
        ) > noDataCutoff  # scipy function: use bilinear interpolation over mask and take values greater than noDataCutoff (0.2 or 0.5)

        ## Save mem
        if ~smartAssignHighLow:
            del ex_img

        if smartAssignHighLow:  # I could save processing by only doing one resampling and then mask afterwards...oh well...one needs nearest neighber, other should have cubic
            ## Resample image
            ex_img_rs = zoom(ex_img, [upscale_ratio, upscale_ratio, 1],
                             order=1)  # again use bilinear for speed

            ## Compute if SR val >= median LR val
            quantile = np.quantile(ex_img[~ex_img_mask], highLowCutoff)
            highLow = ex_img_rs >= quantile
            return metadata, ex_img_mask_rs, highLow

        else:
            return metadata, ex_img_mask_rs
Ejemplo n.º 4
0
def main(argsIn):

    SUPPORTED_IMAGE_TYPES = ['worldview', 'sentinel1']

    try:

        usage = "usage: unpack_inputs [options]"
        parser = argparse.ArgumentParser(usage=usage)

        parser.add_argument("--input-folder",
                            dest="input_folder",
                            required=True,
                            help="Folder containing the input image files.")

        parser.add_argument("--output-folder",
                            dest="output_folder",
                            required=True,
                            help="Unpack images to this folder.")

        parser.add_argument("--image-type",
                            dest="image_type",
                            default='worldview',
                            help="Type of image files: " +
                            ', '.join(SUPPORTED_IMAGE_TYPES))

        parser.add_argument("--image-ext",
                            dest="image_extension",
                            default='.zip',
                            help="Extension for image files.")

        parser.add_argument("--delete-inputs",
                            action="store_true",
                            dest="delete_inputs",
                            default=False,
                            help="Delete input files after unpacking.")

        parser.add_argument("--image-limit",
                            dest="image_limit",
                            default=None,
                            type=int,
                            help="Stop after unpacking this many images.")

        options = parser.parse_args(argsIn)

    except argparse.ArgumentError:
        print(usage)
        return -1

    if options.image_type not in SUPPORTED_IMAGE_TYPES:
        print('Input image type is not supported!')
        return -1

    # Recursively find image files, obtaining the full path for each file.
    input_image_list = [
        os.path.join(root, name)
        for root, dirs, files in os.walk(options.input_folder)
        for name in files if name.endswith((options.image_extension))
    ]

    print('Found ' + str(len(input_image_list)) + ' image files.')

    # Try to load each file and record the ones that fail
    failed_files = []
    count = 0
    for image_path in input_image_list:

        try:

            if count % 10 == 0:
                print('Progress = ' + str(count) + ' out of ' +
                      str(len(input_image_list)))

            if options.image_limit and (count >= options.image_limit):
                print('Stopping because we hit the image limit.')
                break
            count += 1

            # Mirror the input folder structure in the output folder
            image_name = os.path.basename(os.path.splitext(image_path)[0])
            image_folder = os.path.dirname(image_path)
            relative_path = os.path.relpath(image_folder, options.input_folder)
            this_output_folder = os.path.join(options.output_folder,
                                              relative_path, image_name)

            # TODO: Synch up the unpack functions
            tif_path = None
            if not os.path.exists(this_output_folder):
                print('Unpacking input file: ' + image_path)
                if options.image_type == 'worldview':
                    tif_path = worldview.unpack_wv_to_folder(
                        image_path, this_output_folder)[0]
                else:  # sentinel1
                    tif_path = sentinel1.unpack_s1_to_folder(
                        image_path, this_output_folder)

            else:  # The folder was already unpacked (at least partially)
                if options.image_type == 'worldview':
                    tif_path = worldview.get_files_from_unpack_folder(
                        this_output_folder)[0]
                else:  # sentinel1
                    tif_path = sentinel1.unpack_s1_to_folder(
                        image_path, this_output_folder)

            # Make sure the unpacked image loads properly
            test_image = tiff.TiffImage(tif_path)  #pylint: disable=W0612

            if options.delete_inputs:
                print('Deleting input file: ' + image_path)
                os.remove(image_path)

        except Exception as e:  #pylint: disable=W0703
            failed_files.append(image_path)
            print('For file: ' + image_path + '\ncaught exception: ' + str(e))
            traceback.print_exc(file=sys.stdout)

    if failed_files:
        print('The following files failed: ')
        for f in failed_files:
            print(f)
    else:
        print('No files failed to unpack!')

    return 0
Ejemplo n.º 5
0
def combine_chunks(scale=None):
    '''
    Parses input chip directory to find common file basenames, then reconstructs one full ROI image for each unique basename. Uses env vars kernel and kernel_scale. Kernel can be None, 'gaussian', or 'rect', where rect has zeros along edge and is boolean. Doesn't overwrite output.
    Optional 'scale' param gives Gaussian kernel std [default: env var kernel_scale]
    If testing, look for '# testing-change'. Then change env var sr_dir to point to chip dir; and chip_shape to be = sr_chip_shape
    '''
    ## Avoid overloading var kernel_scale unless specified in function call (allows me to run multiple times for different scales)
    if scale == None:
        scale = kernel_scale  # read from env var, unless specified

    ## I/O
    chip_image_list = glob(
        os.path.join(sr_dir, '*.png')
    )  # cropped_u8_chip_dir for testing # HERE change to tiff if needed # testing-change
    # combined_dir = os.path.join(sr_dir, 'combined_scale_'+str(scale)) ## variables- SR ## Place here, not in env22.py, because value can be overwritten by fx argument
    combined_dir = f'/mnt/gcs/sr/v2/{upscale_ratio}x/overlap_{overlap_shape[0]}/batch{batch}/combined_scale_{kernel_scale}/'
    os.makedirs(combined_dir, exist_ok=True)

    ## Since I now have uneven filename length, I have to parse basename using number of '_' characters, not by length.
    chip_basenames = []
    for name in chip_image_list:
        name_base = os.path.basename(name)
        sep_idx = [i.start()
                   for i in re.finditer('_', name_base)]  # seperator index
        chip_basenames.append(
            name_base[:sep_idx[-2]])  # works for both mosaic and orig paths
    unq_chip_basenames = np.unique(
        chip_basenames
    )  # find basenames for orig ROIs, e.g. LC08_L2SP_012031_20201129_20210316_02_T1_SR_B534_C
    print(f'Unique basenames:\n{unq_chip_basenames}')

    ## Loop
    for i, basename in enumerate(
            unq_chip_basenames
    ):  # [:1] HERE change for testing 1:2 is Lincoln, :1 is Cumberland; 2:3 is Redberry; 3:4 is st denis

        ## Parse basenames and stiching indexes
        combined_out_name = str(basename) + '_sr_' + str(sr_res) + 'm.tif'
        combined_out_pth = os.path.join(
            combined_dir,
            combined_out_name)  # path to write reconstructed image

        ## skip if commented out or not present in file_paths.yml
        ID = str(basename
                 )[:
                   40]  # hopefully counting characters from the left works too
        if (ID + '.tif' not in mosaic_names
            ):  # if this is the case, then ID may be an actual landsat ID
            if ID not in file_inputs['bases'].keys(
            ):  # ID must be commented out
                print(
                    f'Skipping ID {ID+".tif"} bc it is not in input file yml list.'
                )
                continue
            else:  # ID is an actual landsat ID
                pass

        ## Prevent overwriting:
        if os.path.exists(combined_out_pth) | len(
                glob(os.path.join(
                    f'/mnt/gcs/sr/v[0-9]/10x/overlap_{overlap_shape[0]}/batch[0-9]/combined_scale_{kernel_scale}',
                    combined_out_name),
                     recursive=True)
        ) > 0:  # comment out to either skip or not # Search all batch dirs
            ## Skip outputs that exist
            print(f'Skipping output {i} bc it exists somewhere: {basename}')
            continue

            ## Overwrite outputs
            # print(f'Overwriting output ({i}) : {basename}')
        else:
            print(f'Combining chunks from image {i}: {basename}')
        chips = glob(
            os.path.join(sr_dir, basename + '*.png')
        )  # use cropped_u8_chip_dir for testing on non-SR # testing-change
        chip_origins_y = [
            int(os.path.basename(name).split('_')[-2][1:5]) for name in chips
        ]
        chip_origins_x = [
            int(os.path.basename(name).split('_')[-2][6:10]) for name in chips
        ]
        y_max = max(chip_origins_y)
        x_max = max(chip_origins_x)

        ## Assert correct number of tiles (Doesn't make sense now that I'm throwing out blank chips)
        metadata, georef_h, georef_w = lookup_georef_nodataMask(
            basename, useNodataMask=False)
        reconstructed_yDim = y_max * upscale_ratio + sr_chip_shape[0]
        reconstructed_xDim = x_max * upscale_ratio + sr_chip_shape[0]
        georef_sr_h = georef_h * upscale_ratio
        georef_sr_w = georef_w * upscale_ratio

        if (georef_sr_h, georef_sr_w) != (reconstructed_yDim,
                                          reconstructed_xDim):
            print(
                f'Error [EDK]: georeferenced mask {georef_sr_h, georef_sr_w} is different shape in x-y dim. than array {reconstructed_yDim, reconstructed_xDim}!'
            )
            print(f'\tBasename: {str(basename)}')
            print(f'\tBase dir: {sr_dir}')
            print('Skipping this file.')
            continue

        ## init output array: all arrays beginning with C have dim of final image shape (large); beginning with K have dims of kernel (much smaller)
        C = np.zeros(
            (reconstructed_yDim, reconstructed_xDim, 3), dtype='float32'
        )  # "combined" array with dimensions parsed from max chip indexes, as float for division # Adds chips multiplied by K (kernel)# testing-change

        ## Create 3-d  kernels with chip shape and full-size sum arrays of ROI image shape.
        if kernel == 'gaussian':
            K = np.repeat(
                gkern(sr_chip_shape[0], scale)[:, :, np.newaxis], 3, axis=2
            ).astype(
                'float32'
            )  # std of 48/4.8 = 10 # good default: scale = float(sr_chip_shape[0])/4.8 # testing-change
        elif kernel == 'rect':
            K = np.zeros(
                (sr_chip_shape[0], sr_chip_shape[1], 3), dtype='float32'
            )  # start all true- keep base kernel, [but add ability to modify to set some sides == 1] # testing-change
            K[kernRectBounds[0]:-kernRectBounds[0],
              kernRectBounds[1]:-kernRectBounds[1], :] = 1
        elif kernel == None:
            K = np.ones((sr_chip_shape[0], sr_chip_shape[1], 3),
                        dtype='float32')  # testing-change
        else:  # replace, not add chips
            raise RuntimeError('EDK: Undefined kernel')
        CKS = C.copy(
        )  # np.zeros(C.shape, dtype='float') # "C kernel sum"; Init this array as float for division

        ## Loop over chips
        for j, chip_pth in enumerate(chips):  # chips[:30]

            ## Progress bar
            progress_bar('', j / len(chips),
                         f'Combining chips: ({j}/{len(chips)})')

            ## Create delta tiffimage, load and read()
            chip_image = tiff.TiffImage(chip_pth)
            chip = chip_image.read(
            )  # + np.random.randint(0, 30) # TESTING: rand adds noise for testing. Beware overflow for uint8.

            ## Assert dtype, nodata val and dims
            if True:  # j == 0:
                assert chip.dtype == 'uint8', "EDK: Check dtype."  # verify hard-coded assumptions
                assert chip.shape[:
                                  2] == sr_chip_shape, "EDK: chip shape."  # verify hard-coded assumptions against env variable # testing-change
                assert chip_image.nodata_value(
                ) == nodata_val, "EDK: Check no-data value."  # [STILL NEED TO TEST THIS AFTER COMMENTING OUT] verify hard-coded assumptions against env variable TODO: temporary commented out

            ## save metadata for output if first chip
            if j == 0:
                metadata_combined = chip_image.metadata(
                )  # re-initialize from pristine copy
                pass

            ## Add chip to output array, C, weighted by kernel
            C[chip_origins_y[j] *
              upscale_ratio:chip_origins_y[j] * upscale_ratio +
              sr_chip_shape[0], chip_origins_x[j] *
              upscale_ratio:chip_origins_x[j] * upscale_ratio +
              sr_chip_shape[1], :] += chip * K

            ## Sum kernels
            CKS[chip_origins_y[j] *
                upscale_ratio:chip_origins_y[j] * upscale_ratio +
                sr_chip_shape[0], chip_origins_x[j] *
                upscale_ratio:chip_origins_x[j] * upscale_ratio +
                sr_chip_shape[1], :] += K

        ## Division for weighted average, and apply mask # TODO: add small value in case KS has any zeros (for division)
        combined_out = (
            C / CKS
        )  # weighted image divided by sum of kernels (Note that CKS can equal 0 in regions of image fill, and potentially due to underflow error)
        del C, CKS  # save mem

        ## Make room for nodata value (don't trust any zeros in the SR image, only trust the resampled mask from og image)
        combined_out[
            combined_out >
            254] = 254  # to avoid ambiguity with over/underflow once I convert to uint8
        combined_out = combined_out.astype('uint8')
        combined_out += 1  # int shift to free up zero value for no data

        ## Check if I need to look up georeferencing (this works around my issue: https://github.com/nasa/delta/issues/148)
        if metadata_combined['projection'] == '':
            metadata, nodataMask = lookup_georef_nodataMask(
                basename
            )  # note that this function has three return vals by default, but will have less if I specify in argument calls. See lookup_georef_nodataMask def.
        else:
            _, nodataMask = lookup_georef_nodataMask(basename)
            metadata = metadata_combined

            ## Raise error bc I need to copy code snippet to rescale image affine transform here. Should implement as function to avoid re-writing code.
            raise RuntimeError(
                '[EDK] Caution: using metadata from first image chip.')

        ## Apply upscaled nodata mask from LR image
        assert nodataMask.shape[:
                                2] == combined_out.shape[:
                                                         2], f'[EDK]: mask {nodataMask.shape[:2]} is different shape in x-y dim. than array {combined_out.shape[:2]}!'
        combined_out[
            nodataMask] = nodata_val  # appears soft-coded, but really nodata_val=0 is hardcoded in, because I have shifte up all uint8 pixel values by 1

        ## Write out to combined_dir
        tiff.write_tiff(combined_out_pth,
                        combined_out,
                        nodata=nodata_val,
                        metadata=metadata)
        print(f'Combined {j+1} chunks\n\n')

        ## Check/update projection if none exists
        from osgeo import gdal
        ds = gdal.Open(combined_out_pth)
        if ds.GetProjection() == '':
            ds.SetProjection(
                metadata["projection"]
            )  # CRS.from_string(metadata["projection"]).to_wkt()
            ds.FlushCache()  # close file/buffer
        pass