Esempio n. 1
0
def run_geocode(inps):
    """geocode all input files"""
    start_time = time.time()

    # feed the largest file for resample object initiation
    ind_max = np.argmax([os.path.getsize(i) for i in inps.file])

    # prepare geometry for geocoding
    kwargs = dict(interp_method=inps.interpMethod,
                  fill_value=inps.fillValue,
                  nprocs=inps.nprocs,
                  max_memory=inps.maxMemory,
                  software=inps.software,
                  print_msg=True)
    if inps.latFile and inps.lonFile:
        kwargs['lat_file'] = inps.latFile
        kwargs['lon_file'] = inps.lonFile
    res_obj = resample(lut_file=inps.lookupFile,
                       src_file=inps.file[ind_max],
                       SNWE=inps.SNWE,
                       lalo_step=inps.laloStep,
                       **kwargs)
    res_obj.open()
    res_obj.prepare()

    # resample input files one by one
    for infile in inps.file:
        print('-' * 50 + '\nresampling file: {}'.format(infile))
        atr = readfile.read_attribute(infile, datasetName=inps.dset)
        outfile = auto_output_filename(infile, inps)

        # update_mode
        if inps.updateMode:
            print('update mode: ON')
            if ut.run_or_skip(outfile, in_file=[infile,
                                                inps.lookupFile]) == 'skip':
                continue

        ## prepare output
        # update metadata
        if inps.radar2geo:
            atr = attr.update_attribute4radar2geo(atr, res_obj=res_obj)
        else:
            atr = attr.update_attribute4geo2radar(atr, res_obj=res_obj)

        # instantiate output file
        file_is_hdf5 = os.path.splitext(outfile)[1] in ['.h5', '.he5']
        if file_is_hdf5:
            compression = readfile.get_hdf5_compression(infile)
            writefile.layout_hdf5(outfile,
                                  metadata=atr,
                                  ref_file=infile,
                                  compression=compression)
        else:
            dsDict = dict()

        ## run
        dsNames = readfile.get_dataset_list(infile, datasetName=inps.dset)
        maxDigit = max([len(i) for i in dsNames])
        for dsName in dsNames:

            if not file_is_hdf5:
                dsDict[dsName] = np.zeros((res_obj.length, res_obj.width))

            # loop for block-by-block IO
            for i in range(res_obj.num_box):
                src_box = res_obj.src_box_list[i]
                dest_box = res_obj.dest_box_list[i]

                # read
                print('-' * 50 +
                      '\nreading {d:<{w}} in block {b} from {f} ...'.format(
                          d=dsName,
                          w=maxDigit,
                          b=src_box,
                          f=os.path.basename(infile)))

                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=src_box,
                                     print_msg=False)[0]

                # resample
                data = res_obj.run_resample(src_data=data, box_ind=i)

                # write / save block data
                if data.ndim == 3:
                    block = [
                        0, data.shape[0], dest_box[1], dest_box[3],
                        dest_box[0], dest_box[2]
                    ]
                else:
                    block = [
                        dest_box[1], dest_box[3], dest_box[0], dest_box[2]
                    ]

                if file_is_hdf5:
                    print('write data in block {} to file: {}'.format(
                        block, outfile))
                    writefile.write_hdf5_block(outfile,
                                               data=data,
                                               datasetName=dsName,
                                               block=block,
                                               print_msg=False)
                else:
                    dsDict[dsName][block[0]:block[1], block[2]:block[3]] = data

            # for binary file: ensure same data type
            if not file_is_hdf5:
                dsDict[dsName] = np.array(dsDict[dsName], dtype=data.dtype)

        # write binary file
        if not file_is_hdf5:
            atr['BANDS'] = len(dsDict.keys())
            writefile.write(dsDict,
                            out_file=outfile,
                            metadata=atr,
                            ref_file=infile)

            # create ISCE XML and GDAL VRT file if using ISCE lookup table file
            if inps.latFile and inps.lonFile:
                writefile.write_isce_xml(atr, fname=outfile)

    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s))
    return outfile
Esempio n. 2
0
def subset_file(fname, subset_dict_input, out_file=None):
    """Subset file with
    Inputs:
        fname        : str, path/name of file
        out_file     : str, path/name of output file
        subset_dict : dict, subsut parameter, including the following items:
                      subset_x   : list of 2 int,   subset in x direction,   default=None
                      subset_y   : list of 2 int,   subset in y direction,   default=None
                      subset_lat : list of 2 float, subset in lat direction, default=None
                      subset_lon : list of 2 float, subset in lon direction, default=None
                      fill_value : float, optional. filled value for area outside of data coverage. default=None
                                   None/not-existed to subset within data coverage only.
                      tight  : bool, tight subset or not, for lookup table file, i.e. geomap*.trans
    Outputs:
        out_file :  str, path/name of output file; 
                   out_file = 'subset_'+fname, if fname is in current directory;
                   out_file = fname, if fname is not in the current directory.
    """

    # Input File Info
    atr = readfile.read_attribute(fname)
    width = int(atr['WIDTH'])
    length = int(atr['LENGTH'])
    k = atr['FILE_TYPE']
    print('subset ' + k + ' file: ' + fname + ' ...')

    subset_dict = subset_dict_input.copy()
    # Read Subset Inputs into 4-tuple box in pixel and geo coord
    pix_box, geo_box = subset_input_dict2box(subset_dict, atr)

    coord = ut.coordinate(atr)
    # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage.
    # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan
    outfill = False
    if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']:
        outfill = True
    else:
        outfill = False
    if not outfill:
        pix_box = coord.check_box_within_data_coverage(pix_box)
        subset_dict['fill_value'] = np.nan

    geo_box = coord.box_pixel2geo(pix_box)
    data_box = (0, 0, width, length)
    print('data   range in (x0,y0,x1,y1): {}'.format(data_box))
    print('subset range in (x0,y0,x1,y1): {}'.format(pix_box))
    print('data   range in (W, N, E, S): {}'.format(
        coord.box_pixel2geo(data_box)))
    print('subset range in (W, N, E, S): {}'.format(geo_box))

    if pix_box == data_box:
        print('Subset range == data coverage, no need to subset. Skip.')
        return fname

    # Calculate Subset/Overlap Index
    pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box)

    ###########################  Data Read and Write  ######################
    # Output File Name
    if not out_file:
        if os.getcwd() == os.path.dirname(os.path.abspath(fname)):
            if 'tight' in subset_dict.keys() and subset_dict['tight']:
                out_file = '{}_tight{}'.format(
                    os.path.splitext(fname)[0],
                    os.path.splitext(fname)[1])
            else:
                out_file = 'sub_' + os.path.basename(fname)
        else:
            out_file = os.path.basename(fname)
    print('writing >>> ' + out_file)

    # update metadata
    atr = attr.update_attribute4subset(atr, pix_box)

    # subset datasets one by one
    dsNames = readfile.get_dataset_list(fname)
    maxDigit = max([len(i) for i in dsNames])

    ext = os.path.splitext(out_file)[1]
    if ext in ['.h5', '.he5']:
        # initiate the output file
        writefile.layout_hdf5(out_file, metadata=atr, ref_file=fname)

        # subset dataset one-by-one
        for dsName in dsNames:
            with h5py.File(fname, 'r') as fi:
                ds = fi[dsName]
                ds_shape = ds.shape
                ds_ndim = ds.ndim
                print('cropping {d} in {b} from {f} ...'.format(
                    d=dsName, b=pix_box4data, f=os.path.basename(fname)))

                if ds_ndim == 2:
                    # read
                    data = ds[pix_box4data[1]:pix_box4data[3],
                              pix_box4data[0]:pix_box4data[2]]

                    # crop
                    data_out = np.ones(
                        (pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]),
                        data.dtype) * subset_dict['fill_value']
                    data_out[pix_box4subset[1]:pix_box4subset[3],
                             pix_box4subset[0]:pix_box4subset[2]] = data
                    data_out = np.array(data_out, dtype=data.dtype)

                    # write
                    block = [0, int(atr['LENGTH']), 0, int(atr['WIDTH'])]
                    writefile.write_hdf5_block(out_file,
                                               data=data_out,
                                               datasetName=dsName,
                                               block=block,
                                               print_msg=True)

                if ds_ndim == 3:
                    prog_bar = ptime.progressBar(maxValue=ds_shape[0])
                    for i in range(ds_shape[0]):
                        # read
                        data = ds[i, pix_box4data[1]:pix_box4data[3],
                                  pix_box4data[0]:pix_box4data[2]]

                        # crop
                        data_out = np.ones(
                            (1, pix_box[3] - pix_box[1],
                             pix_box[2] - pix_box[0]),
                            data.dtype) * subset_dict['fill_value']
                        data_out[:, pix_box4subset[1]:pix_box4subset[3],
                                 pix_box4subset[0]:pix_box4subset[2]] = data

                        # write
                        block = [
                            i, i + 1, 0,
                            int(atr['LENGTH']), 0,
                            int(atr['WIDTH'])
                        ]
                        writefile.write_hdf5_block(out_file,
                                                   data=data_out,
                                                   datasetName=dsName,
                                                   block=block,
                                                   print_msg=False)

                        prog_bar.update(i + 1,
                                        suffix='{}/{}'.format(
                                            i + 1, ds_shape[0]))
                    prog_bar.close()
                    print('finished writing to file: {}'.format(out_file))

    else:
        # IO for binary files
        dsDict = dict()
        for dsName in dsNames:
            dsDict[dsName] = subset_dataset(
                fname,
                dsName,
                pix_box,
                pix_box4data,
                pix_box4subset,
                fill_value=subset_dict['fill_value'])
        writefile.write(dsDict,
                        out_file=out_file,
                        metadata=atr,
                        ref_file=fname)

        # write extra metadata files for ISCE data files
        if os.path.isfile(fname + '.xml') or os.path.isfile(fname +
                                                            '.aux.xml'):
            # write ISCE XML file
            dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']]
            dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal]
            writefile.write_isce_xml(out_file,
                                     width=int(atr['WIDTH']),
                                     length=int(atr['LENGTH']),
                                     bands=len(dsDict.keys()),
                                     data_type=dtype_isce,
                                     scheme=atr['scheme'],
                                     image_type=atr['FILE_TYPE'])
            print(f'write file: {out_file}.xml')

            # write GDAL VRT file
            if os.path.isfile(fname + '.vrt'):
                from isceobj.Util.ImageUtil import ImageLib as IML
                img = IML.loadImage(out_file)[0]
                img.renderVRT()
                print(f'write file: {out_file}.vrt')

    return out_file
Esempio n. 3
0
def multilook_file(infile,
                   lks_y,
                   lks_x,
                   outfile=None,
                   method='average',
                   margin=[0, 0, 0, 0],
                   max_memory=4):
    """ Multilook input file
    Parameters: infile - str, path of input file to be multilooked.
                lks_y  - int, number of looks in y / row direction.
                lks_x  - int, number of looks in x / column direction.
                margin - list of 4 int, number of pixels to be skipped during multilooking.
                         useful for offset product, where the marginal pixels are ignored during
                         cross correlation matching.
                outfile - str, path of output file
    Returns:    outfile - str, path of output file
    """
    lks_y = int(lks_y)
    lks_x = int(lks_x)

    # input file info
    atr = readfile.read_attribute(infile)
    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
    k = atr['FILE_TYPE']
    print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile))
    print('number of looks in y / azimuth direction: %d' % lks_y)
    print('number of looks in x / range   direction: %d' % lks_x)
    print('multilook method: {}'.format(method))

    # margin --> box
    if margin is not [0, 0, 0, 0]:  # top, bottom, left, right
        box = (margin[2], margin[0], width - margin[3], length - margin[1])
        print(
            'number of pixels to skip in top/bottom/left/right boundaries: {}'.
            format(margin))
    else:
        box = (0, 0, width, length)

    # output file name
    ext = os.path.splitext(infile)[1]
    if not outfile:
        if os.getcwd() == os.path.dirname(os.path.abspath(infile)):
            outfile = os.path.splitext(infile)[0] + '_' + str(
                lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext
        else:
            outfile = os.path.basename(infile)

    # update metadata
    atr = attr.update_attribute4multilook(atr, lks_y, lks_x, box=box)

    if ext in ['.h5', '.he5']:
        writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile)

    # read source data and multilooking
    dsNames = readfile.get_dataset_list(infile)
    maxDigit = max([len(i) for i in dsNames])
    dsDict = dict()
    for dsName in dsNames:
        print('multilooking {d:<{w}} from {f} ...'.format(
            d=dsName, w=maxDigit, f=os.path.basename(infile)))

        # split in Y/row direction for IO for HDF5 only
        if ext in ['.h5', '.he5']:
            # calc step size with memory usage up to 4 GB
            with h5py.File(infile, 'r') as f:
                ds = f[dsName]
                ds_size = np.prod(ds.shape) * 4
            num_step = int(np.ceil(ds_size * 4 / (max_memory * 1024**3)))
            row_step = int(np.rint(length / num_step / 10) * 10)
            row_step = max(row_step, 10)

        else:
            row_step = box[3] - box[1]

        num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y)))
        for i in range(num_step):
            r0 = box[1] + row_step * lks_y * i
            r1 = box[1] + row_step * lks_y * (i + 1)
            r1 = min(r1, box[3])
            # IO box
            box_i = (box[0], r0, box[2], r1)
            box_o = (int((box[0] - box[0]) / lks_x), int(
                (r0 - box[1]) / lks_y), int(
                    (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y))
            print('box: {}'.format(box_o))

            # read / multilook
            if method == 'nearest':
                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=box_i,
                                     xstep=lks_x,
                                     ystep=lks_y,
                                     print_msg=False)[0]

            else:
                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=box_i,
                                     print_msg=False)[0]

                data = multilook_data(data, lks_y, lks_x)

            # output block
            if data.ndim == 3:
                block = [
                    0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2]
                ]
            else:
                block = [box_o[1], box_o[3], box_o[0], box_o[2]]

            # write
            if ext in ['.h5', '.he5']:
                writefile.write_hdf5_block(outfile,
                                           data=data,
                                           datasetName=dsName,
                                           block=block,
                                           print_msg=False)
            else:
                dsDict[dsName] = data

    # for binary file with 2 bands, always use BIL scheme
    if (len(dsDict.keys()) == 2
            and os.path.splitext(infile)[1] not in ['.h5', '.he5']
            and atr.get('scheme', 'BIL').upper() != 'BIL'):
        print('the input binary file has 2 bands with band interleave as: {}'.
              format(atr['scheme']))
        print(
            'for the output binary file, change the band interleave to BIL as default.'
        )
        atr['scheme'] = 'BIL'

    if ext not in ['.h5', '.he5']:
        writefile.write(dsDict,
                        out_file=outfile,
                        metadata=atr,
                        ref_file=infile)

        # write extra metadata files for ISCE data files
        if os.path.isfile(infile + '.xml') or os.path.isfile(infile +
                                                             '.aux.xml'):
            # write ISCE XML file
            dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']]
            dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal]
            writefile.write_isce_xml(outfile,
                                     width=int(atr['WIDTH']),
                                     length=int(atr['LENGTH']),
                                     bands=len(dsDict.keys()),
                                     data_type=dtype_isce,
                                     scheme=atr['scheme'],
                                     image_type=atr['FILE_TYPE'])
            print(f'write file: {outfile}.xml')

            # write GDAL VRT file
            if os.path.isfile(infile + '.vrt'):
                from isceobj.Util.ImageUtil import ImageLib as IML
                img = IML.loadImage(outfile)[0]
                img.renderVRT()
                print(f'write file: {outfile}.vrt')

    return outfile