Beispiel #1
0
def remap(inputfile, labels, outputfile, flag, chunksize):
    """."""

    ref_file = inputfile
    lbl_file = labels
    out_file = outputfile
    nchunk = chunksize

    ref_img = envi.open(envi_header(ref_file), ref_file)
    ref_meta = ref_img.metadata
    ref_mm = ref_img.open_memmap(interleave='source', writable=False)
    ref = np.array(ref_mm[:, :])

    lbl_img = envi.open(envi_header(lbl_file), lbl_file)
    lbl_meta = lbl_img.metadata
    labels = lbl_img.read_band(0)

    nl = int(lbl_meta['lines'])
    ns = int(lbl_meta['samples'])
    nb = int(ref_meta['bands'])

    out_meta = dict([(k, v) for k, v in ref_meta.items()])

    out_meta["samples"] = ns
    out_meta["bands"] = nb
    out_meta["lines"] = nl
    out_meta['data type'] = ref_meta['data type']
    out_meta["interleave"] = "bil"

    out_img = envi.create_image(envi_header(out_file),
                                metadata=out_meta,
                                ext='',
                                force=True)
    out_mm = out_img.open_memmap(interleave='source', writable=True)

    # Iterate through image "chunks," restoring as we go
    for lstart in np.arange(0, nl, nchunk):
        print(lstart)
        del out_mm
        out_mm = out_img.open_memmap(interleave='source', writable=True)

        # Which labels will we extract? ignore zero index
        lend = min(lstart + nchunk, nl)

        lbl = labels[lstart:lend, :]
        out = flag * np.ones((lbl.shape[0], nb, lbl.shape[1]))
        for row in range(lbl.shape[0]):
            for col in range(lbl.shape[1]):
                out[row, :, col] = np.squeeze(ref[int(lbl[row, col]), :])

        out_mm[lstart:lend, :, :] = out
def instrument_model(config):
    """."""

    hdr_template = """ENVI
    samples = {samples}
    lines   = {lines}
    bands   = 1
    header offset = 0
    file type = ENVI Standard
    data type = 4
    interleave = bsq
    byte order = 0
    """

    config = json_load_ascii(config, shell_replace=True)
    configdir, configfile = split(abspath(config))

    infile = expand_path(configdir, config['input_radiance_file'])
    outfile = expand_path(configdir, config['output_model_file'])
    flatfile = expand_path(configdir, config['output_flatfield_file'])
    uniformity_thresh = float(config['uniformity_threshold'])

    infile_hdr = envi_header(infile)
    img = envi.open(infile_hdr, infile)
    inmm = img.open_memmap(interleave='bil', writable=False)
    X = np.array(inmm[:, :, :], dtype=np.float32)
    nr, nb, nc = X.shape

    FF, Xhoriz, Xhorizp, use_ff = _flat_field(X, uniformity_thresh)
    np.array(FF, dtype=np.float32).tofile(flatfile)
    with open(envi_header(flatfile), 'w') as fout:
        fout.write(hdr_template.format(lines=nb, samples=nc))

    C, Xvert, Xvertp, use_C = _column_covariances(X, uniformity_thresh)
    cshape = (C.shape[0], C.shape[1]**2)
    out = np.array(C, dtype=np.float32).reshape(cshape)
    mdict = {
        'columns': out.shape[0],
        'bands': out.shape[1],
        'covariances': out,
        'Xvert': Xvert,
        'Xhoriz': Xhoriz,
        'Xvertp': Xvertp,
        'Xhorizp': Xhorizp,
        'use_ff': use_ff,
        'use_C': use_C
    }
    scipy.io.savemat(outfile, mdict)
Beispiel #3
0
    def flush_buffers(self):
        """Write to file, and refresh the memory map object."""

        if self.format == 'ENVI':
            if self.write:
                for row, frame in self.frames.items():
                    valid = np.logical_not(np.isnan(frame[:, 0]))
                    self.memmap[row, valid, :] = frame[valid, :]
            self.frames = OrderedDict()
            del self.file
            self.file = envi.open(envi_header(self.fname), self.fname)
            self.open_map_with_retries()
Beispiel #4
0
def sample_calibration_uncertainty(input_file: pathlib.Path,
                                   output_file: pathlib.Path,
                                   cov_l: np.ndarray,
                                   cov_wl: np.ndarray,
                                   rad_wl: np.ndarray,
                                   bias_scale=1.0):
    input_file_hdr = envi_header(str(input_file))
    output_file_hdr = envi_header(str(output_file))
    shutil.copy(input_file, output_file)
    shutil.copy(input_file_hdr, output_file_hdr)

    img = sp.open_image(str(output_file_hdr))
    img_m = img.open_memmap(writable=True)

    # Here, we assume that the calibration bias is constant across the entire
    # image (i.e., the same bias is added to all pixels).
    z = np.random.normal(size=cov_l.shape[0], scale=bias_scale)
    Az = 1.0 + cov_l @ z
    # Resample the added noise vector to match the wavelengths of the target
    # image.
    Az_resampled = interp1d(cov_wl, Az, fill_value="extrapolate")(rad_wl)
    img_m *= Az_resampled
    return output_file
Beispiel #5
0
def extract_chunk(lstart: int,
                  lend: int,
                  in_file: str,
                  labels: np.array,
                  flag: float,
                  logfile=None,
                  loglevel='INFO'):
    """
    Extract a small chunk of the image

    Args:
        lstart: line to start extraction at
        lend: line to end extraction at
        in_file: file to read image from
        labels: labels to use for data read
        flag: nodata value of image
        logfile: logging file name
        loglevel: logging level

    Returns:
        out_index: array of output indices (based on labels)
        out_data: array of output data
    """

    logging.basicConfig(format='%(levelname)s:%(message)s',
                        level=loglevel,
                        filename=logfile)
    logging.info(f'{lstart}: starting')

    in_img = envi.open(envi_header(in_file))
    img_mm = in_img.open_memmap(interleave='bip', writable=False)

    # Which labels will we extract? ignore zero index
    active = labels[lstart:lend, :]
    active = active[active >= 1]
    active = np.unique(active)
    logging.debug(f'{lstart}: found {len(active)} unique labels')
    if len(active) == 0:
        return None, None

    # Handle labels extending outside our chunk by expanding margins
    cs = lend - lstart
    boundary_min = max(lstart - cs, 0)
    boundary_max = min(lend + cs, labels.shape[0])

    active_area = np.zeros((boundary_max - boundary_min, labels.shape[1]))
    for i in active:
        active_area[labels[boundary_min:boundary_max, :] == i] = True
    active_locs = np.where(active_area)

    lstart_adjust = min(active_locs[0]) + boundary_min
    lend_adjust = max(active_locs[0]) + boundary_min + 1

    cstart_adjust = min(active_locs[1])
    cend_adjust = max(active_locs[1]) + 1

    logging.debug(
        f'{lstart} area subset: {lstart_adjust}, {lend_adjust} :::: {cstart_adjust}, {cend_adjust}'
    )

    chunk_lbl = np.array(labels[lstart_adjust:lend_adjust,
                                cstart_adjust:cend_adjust])
    chunk_inp = np.array(img_mm[lstart_adjust:lend_adjust,
                                cstart_adjust:cend_adjust, :])

    out_data = np.zeros((len(active), img_mm.shape[-1])) + flag

    logging.debug(f'{lstart}: running extraction from local array')
    for _lab, lab in enumerate(active):
        out_data[_lab, :] = 0
        locs = np.where(chunk_lbl == lab)
        for row, col in zip(locs[0], locs[1]):
            out_data[_lab, :] += np.squeeze(chunk_inp[row, col, :])
        out_data[_lab, :] /= float(len(locs[0]))

    unique_labels = np.unique(labels)
    unique_labels = unique_labels[unique_labels >= 1]
    if unique_labels[0] != 0:
        unique_labels = np.hstack([np.zeros(1), unique_labels])

    match_idx = np.searchsorted(unique_labels, active)

    out_data[np.logical_not(np.isfinite(out_data))] = flag
    logging.debug(f'{lstart}: complete')

    return match_idx, out_data
Beispiel #6
0
import matplotlib.pyplot as plt
from isofit.core.common import envi_header

assert len(sys.argv) > 1, "Please specify a JSON config file."

configfile = sys.argv[1]
with open(configfile, "r") as f:
    config = json.load(f)

outdir = Path(config["outdir"])

reflfiles = list(outdir.glob("**/estimated-reflectance"))
assert len(reflfiles) > 0, f"No reflectance files found in directory {outdir}"

true_refl_file = Path(config["reflectance_file"]).expanduser()
true_reflectance = sp.open_image(envi_header(str(true_refl_file)))
true_waves = np.array(true_reflectance.metadata["wavelength"], dtype=float)
true_refl_m = true_reflectance.open_memmap()

windows = config["isofit"]["implementation"]["inversion"]["windows"]

def parse_dir(ddir):
    grps = {"directory": [str(ddir)]}
    for key in ["atm", "noise", "prior", "inversion"]:
        pat = f".*{key}_(.+?)" + r"(__|/|\Z)"
        match = re.match(pat, str(ddir))
        if match is not None:
            match = match.group(1)
        grps[key] = [match]
    for key in ["szen", "ozen", "zen",
                "saz", "oaz", "az",
Beispiel #7
0
def _run_chunk(start_line: int, stop_line: int, reference_radiance_file: str,
               reference_atm_file: str, reference_locations_file: str,
               input_radiance_file: str, input_locations_file: str,
               segmentation_file: str, isofit_config: dict,
               output_reflectance_file: str, output_uncertainty_file: str,
               radiance_factors: np.array, nneighbors: int,
               nodata_value: float) -> None:
    """
    Args:
        start_line: line to start empirical line run at
        stop_line:  line to stop empirical line run at
        reference_radiance_file: source file for radiance (interpolation built from this)
        reference_atm_file:  source file for atmosphere coefficients (interpolation built from this)
        reference_locations_file:  source file for file locations (lon, lat, elev), (interpolation built from this)
        input_radiance_file: input radiance file (interpolate over this)
        input_locations_file: input location file (interpolate over this)
        segmentation_file: input file noting the per-pixel segmentation used
        isofit_config: dictionary-stype isofit configuration
        output_reflectance_file: location to write output reflectance to
        output_uncertainty_file: location to write output uncertainty to
        radiance_factors: radiance adjustment factors
        nneighbors: number of neighbors to use for interpolation
        nodata_value: nodata value of input and output

    Returns:
        None

    """

    # Load reference images
    reference_radiance_img = envi.open(envi_header(reference_radiance_file),
                                       reference_radiance_file)
    reference_atm_img = envi.open(envi_header(reference_atm_file),
                                  reference_atm_file)
    reference_locations_img = envi.open(envi_header(reference_locations_file),
                                        reference_locations_file)

    n_reference_lines, n_radiance_bands, n_reference_columns = [
        int(reference_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]

    # Load input images
    input_radiance_img = envi.open(envi_header(input_radiance_file),
                                   input_radiance_file)
    n_input_lines, n_input_bands, n_input_samples = [
        int(input_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    wl = np.array(
        [float(w) for w in input_radiance_img.metadata['wavelength']])

    input_locations_img = envi.open(envi_header(input_locations_file),
                                    input_locations_file)
    n_location_bands = int(input_locations_img.metadata['bands'])

    # Load output images
    output_reflectance_img = envi.open(envi_header(output_reflectance_file),
                                       output_reflectance_file)
    output_uncertainty_img = envi.open(envi_header(output_uncertainty_file),
                                       output_uncertainty_file)
    n_output_reflectance_bands = int(output_reflectance_img.metadata['bands'])
    n_output_uncertainty_bands = int(output_uncertainty_img.metadata['bands'])

    # Load reference data
    reference_locations_mm = reference_locations_img.open_memmap(
        interleave='source', writable=False)
    reference_locations = np.array(reference_locations_mm[:, :, :]).reshape(
        (n_reference_lines, n_location_bands))

    reference_radiance_mm = reference_radiance_img.open_memmap(
        interleave='source', writable=False)
    reference_radiance = np.array(reference_radiance_mm[:, :, :]).reshape(
        (n_reference_lines, n_radiance_bands))

    reference_atm_mm = reference_atm_img.open_memmap(interleave='source',
                                                     writable=False)
    reference_atm = np.array(reference_atm_mm[:, :, :]).reshape(
        (n_reference_lines, n_radiance_bands * 5))
    rhoatm = reference_atm[:, :n_radiance_bands]
    sphalb = reference_atm[:, n_radiance_bands:(n_radiance_bands * 2)]
    transm = reference_atm[:, (n_radiance_bands * 2):(n_radiance_bands * 3)]
    solirr = reference_atm[:, (n_radiance_bands * 3):(n_radiance_bands * 4)]
    coszen = reference_atm[:, (n_radiance_bands * 4):(n_radiance_bands * 5)]

    # Load segmentation data
    if segmentation_file:
        segmentation_img = envi.open(envi_header(segmentation_file),
                                     segmentation_file)
        segmentation_img = segmentation_img.read_band(0)
    else:
        segmentation_img = None

    # Prepare instrument model, if available
    if isofit_config is not None:
        config = configs.create_new_config(isofit_config)
        instrument = Instrument(config)
        logging.info('Loading instrument')
    else:
        instrument = None

    # Load radiance factors
    if radiance_factors is None:
        radiance_adjustment = np.ones(n_radiance_bands, )
    else:
        radiance_adjustment = np.loadtxt(radiance_factors)

    # PCA coefficients
    rdn_pca = PCA(n_components=2)
    reference_pca = rdn_pca.fit_transform(reference_radiance *
                                          radiance_adjustment)

    # Create the tree to find nearest neighbor segments.
    # Assume (heuristically) that, for distance purposes, 1 m vertically is
    # comparable to 10 m horizontally, and that there are 100 km per latitude
    # degree.  This is all approximate of course.  Elevation appears in the
    # Third element, and the first two are latitude/longitude coordinates
    # The fourth and fifth elements are "spectral distance" determined by the
    # top principal component coefficients
    loc_scaling = np.array([1e5, 1e5, 10, 100, 100])
    scaled_ref_loc = np.concatenate(
        (reference_locations, reference_pca), axis=1) * loc_scaling
    tree = KDTree(scaled_ref_loc)

    # Fit GP parameters on transmissivity of an H2O feature, in the
    # first 400 datapoints
    use = np.arange(min(len(rhoatm), 400))
    h2oband = np.argmin(abs(wl - 940))
    scale = (500, 500, 500, 500, 500)
    bounds = ((100, 2000), (100, 2000), (100, 2000), (100, 2000), (100, 2000))
    kernel =  RBF(length_scale=scale, length_scale_bounds=bounds) +\
                  WhiteKernel(noise_level=0.01, noise_level_bounds=(1e-10, 0.1))
    gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0, normalize_y=True)
    gp = gp.fit(scaled_ref_loc[use, :], transm[use, h2oband])
    kernel = gp.kernel_

    # Iterate through image.  Each segment has its own GP, stored in a
    # hash table indexed by location in the segmentation map
    hash_table = {}

    for row in np.arange(start_line, stop_line):

        # Load inline input data
        input_radiance_mm = input_radiance_img.open_memmap(interleave='source',
                                                           writable=False)
        input_radiance = np.array(input_radiance_mm[row, :, :])
        if input_radiance_img.metadata['interleave'] == 'bil':
            input_radiance = input_radiance.transpose((1, 0))
        input_radiance = input_radiance * radiance_adjustment

        input_locations_mm = input_locations_img.open_memmap(
            interleave='source', writable=False)
        input_locations = np.array(input_locations_mm[row, :, :])
        if input_locations_img.metadata['interleave'] == 'bil':
            input_locations = input_locations.transpose((1, 0))

        output_reflectance_row = np.zeros(input_radiance.shape) + nodata_value
        output_uncertainty_row = np.zeros(input_radiance.shape) + nodata_value

        nspectra, start = 0, time.time()
        for col in np.arange(n_input_samples):

            # Get radiance, pca coordinates, physical location for this datum
            my_rdn = input_radiance[col, :]
            my_pca = rdn_pca.transform(my_rdn[np.newaxis, :])
            my_loc = np.r_[input_locations[col, :], my_pca[0, :]] * loc_scaling

            if np.all(np.isclose(my_rdn, nodata_value)):
                output_reflectance_row[col, :] = nodata_value
                output_uncertainty_row[col, :] = nodata_value
                continue

            # Retrieve or build the GP
            gp_rhoatm, gp_sphalb, gp_transm, irr = None, None, None, None
            hash_idx = segmentation_img[row, col]
            if hash_idx in hash_table:
                gp_rhoatm, gp_sphalb, gp_transm, irr = hash_table[hash_idx]
            else:

                # There is no GP for this segment, so we build one from
                # the atmospheric coefficients from closest neighbors
                dists, nn = tree.query(my_loc, nneighbors)
                neighbor_rhoatm = rhoatm[nn, :]
                neighbor_transm = transm[nn, :]
                neighbor_sphalb = sphalb[nn, :]
                neighbor_coszen = coszen[nn, :]
                neighbor_solirr = solirr[nn, :]
                neighbor_locs = scaled_ref_loc[nn, :]

                # Create a new GP using the optimized parameters as a fixed kernel
                gp_rhoatm = GaussianProcessRegressor(kernel=kernel,
                                                     alpha=0.0,
                                                     normalize_y=True,
                                                     optimizer=None)
                gp_rhoatm.fit(neighbor_locs, neighbor_rhoatm)
                gp_sphalb = GaussianProcessRegressor(kernel=kernel,
                                                     alpha=0.0,
                                                     normalize_y=True,
                                                     optimizer=None)
                gp_sphalb.fit(neighbor_locs, neighbor_sphalb)
                gp_transm = GaussianProcessRegressor(kernel=kernel,
                                                     alpha=0.0,
                                                     normalize_y=True,
                                                     optimizer=None)
                gp_transm.fit(neighbor_locs, neighbor_transm)
                irr = solirr[1, :] * coszen[1, :]
                irr[irr < 1e-8] = 1e-8

                hash_table[hash_idx] = (gp_rhoatm, gp_sphalb, gp_transm, irr)

            my_rhoatm = gp_rhoatm.predict(my_loc[np.newaxis, :])
            my_sphalb = gp_sphalb.predict(my_loc[np.newaxis, :])
            my_transm = gp_transm.predict(my_loc[np.newaxis, :])
            my_rho = (my_rdn * np.pi) / irr
            my_rfl = 1.0 / (my_transm / (my_rho - my_rhoatm) + my_sphalb)
            output_reflectance_row[col, :] = my_rfl

            # Calculate uncertainties.  Sy approximation rather than Seps for
            # speed, for now... but we do take into account instrument
            # radiometric uncertainties
            #output_uncertainty_row[col, :] = np.zeros()
            #if instrument is None:
            #else:
            #    Sy = instrument.Sy(x, geom=None)
            #    calunc = instrument.bval[:instrument.n_chan]
            #    output_uncertainty_row[col, :] = np.sqrt(
            #        np.diag(Sy) + pow(calunc * x, 2)) * bhat[:, 1]
            # if loglevel == 'DEBUG':
            #    plot_example(xv, yv, bhat)

            nspectra = nspectra + 1

        elapsed = float(time.time() - start)
        logging.info('row {}/{}, ({}/{} local), {} spectra per second'.format(
            row, n_input_lines, int(row - start_line),
            int(stop_line - start_line), round(float(nspectra) / elapsed, 2)))

        del input_locations_mm
        del input_radiance_mm

        output_reflectance_row = output_reflectance_row.transpose((1, 0))
        output_uncertainty_row = output_uncertainty_row.transpose((1, 0))
        shp = output_reflectance_row.shape
        output_reflectance_row = output_reflectance_row.reshape(
            (1, shp[0], shp[1]))
        shp = output_uncertainty_row.shape
        output_uncertainty_row = output_uncertainty_row.reshape(
            (1, shp[0], shp[1]))

        _write_bil_chunk(
            output_reflectance_row, output_reflectance_file, row,
            (n_input_lines, n_output_reflectance_bands, n_input_samples))
        _write_bil_chunk(
            output_uncertainty_row, output_uncertainty_file, row,
            (n_input_lines, n_output_uncertainty_bands, n_input_samples))
Beispiel #8
0
def extractions(inputfile,
                labels,
                output,
                chunksize,
                flag,
                n_cores: int = 1,
                ray_address: str = None,
                ray_redis_password: str = None,
                ray_temp_dir: str = None,
                ray_ip_head=None,
                logfile: str = None,
                loglevel: str = 'INFO'):
    """..."""

    in_file = inputfile
    lbl_file = labels
    out_file = output
    nchunk = chunksize

    dtm = {'4': np.float32, '5': np.float64}

    # Open input data, get dimensions
    in_img = envi.open(envi_header(in_file), in_file)
    meta = in_img.metadata

    nl, nb, ns = [int(meta[n]) for n in ('lines', 'bands', 'samples')]
    img_mm = in_img.open_memmap(interleave='bip', writable=False)

    lbl_img = envi.open(envi_header(lbl_file), lbl_file)
    labels = lbl_img.read_band(0)
    un_labels = np.unique(labels).tolist()
    if 0 not in un_labels:
        un_labels.insert(0, 0)
    nout = len(un_labels)

    # Start up a ray instance for parallel work
    rayargs = {
        'ignore_reinit_error': True,
        'local_mode': n_cores == 1,
        "address": ray_address,
        "_redis_password": ray_redis_password
    }

    if rayargs['local_mode']:
        rayargs['_temp_dir'] = ray_temp_dir
        # Used to run on a VPN
        ray.services.get_node_ip_address = lambda: '127.0.0.1'

    # We can only set the num_cpus if running on a single-node
    if ray_ip_head is None and ray_redis_password is None:
        rayargs['num_cpus'] = n_cores

    ray.init(**rayargs)
    atexit.register(ray.shutdown)

    labelid = ray.put(labels)
    jobs = []
    for lstart in np.arange(0, nl, nchunk):
        lend = min(lstart + nchunk, nl)
        jobs.append(
            extract_chunk.remote(lstart,
                                 lend,
                                 in_file,
                                 labelid,
                                 flag,
                                 logfile=logfile,
                                 loglevel=loglevel))

    # Collect results
    rreturn = [ray.get(jid) for jid in jobs]

    ## Iterate through image "chunks," segmenting as we go
    out = np.zeros((nout, nb, 1))
    for idx, ret in rreturn:
        if ret is not None:
            out[idx, :, 0] = ret
    del rreturn
    ray.shutdown()

    meta["lines"] = str(nout)
    meta["bands"] = str(nb)
    meta["samples"] = '1'
    meta["interleave"] = "bil"

    out_img = envi.create_image(envi_header(out_file),
                                metadata=meta,
                                ext='',
                                force=True)
    del out_img
    if dtm[meta['data type']] == np.float32:
        type = 'float32'
    else:
        type = 'float64'

    write_bil_chunk(out, out_file, 0, out.shape, dtype=type)
Beispiel #9
0
def surface_model(config_path: str,
                  wavelength_path: str = None,
                  output_path: str = None) -> None:
    """The surface model tool contains everything you need to build basic
    multicomponent (i.e. colleciton of Gaussian) surface priors for the
    multicomponent surface model.

    Args:
        config_path: path to a JSON formatted surface model configuration
        wavelength_path: optional path to a three-column wavelength file, 
           overriding the configuration file settings
        output_path: optional path to the destination .mat file, overriding
           the configuration file settings
    Returns:
        None
    """

    # Load configuration JSON into a local dictionary
    configdir, _ = os.path.split(os.path.abspath(config_path))
    config = json_load_ascii(config_path, shell_replace=True)

    # Determine top level parameters
    for q in ['output_model_file', 'sources', 'normalize', 'wavelength_file']:
        if q not in config:
            raise ValueError("Missing parameter: %s" % q)
    if wavelength_path is not None:
        wavelength_file = wavelength_path
    else:
        wavelength_file = expand_path(configdir, config['wavelength_file'])
    if output_path is not None:
        outfile = output_path
    else:
        outfile = expand_path(configdir, config['output_model_file'])
    normalize = config['normalize']
    reference_windows = config['reference_windows']

    # load wavelengths file, and change units to nm if needed
    q = np.loadtxt(wavelength_file)
    if q.shape[1] > 2:
        q = q[:, 1:]
    if q[0, 0] < 100:
        q = q * 1000.0
    wl = q[:, 0]
    nchan = len(wl)

    # build global reference windows
    refwl = []
    for wi, window in enumerate(reference_windows):
        active_wl = np.logical_and(wl >= window[0], wl < window[1])
        refwl.extend(wl[active_wl])
    normind = np.array([np.argmin(abs(wl - w)) for w in refwl])
    refwl = np.array(refwl, dtype=float)

    # create basic model template
    model = {
        'normalize': normalize,
        'wl': wl,
        'means': [],
        'covs': [],
        'attribute_means': [],
        'attribute_covs': [],
        'attributes': [],
        'refwl': refwl
    }

    # each "source" (i.e. spectral library) is treated separately
    for si, source_config in enumerate(config['sources']):

        # Determine source parameters
        for q in [
                'input_spectrum_files', 'windows', 'n_components', 'windows'
        ]:
            if q not in source_config:
                raise ValueError('Source %i is missing a parameter: %s' %
                                 (si, q))

        # Determine whether we should synthesize our own mixtures
        if 'mixtures' in source_config:
            mixtures = source_config['mixtures']
        elif 'mixtures' in config:
            mixtures = config['mixtures']
        else:
            mixtures = 0

        # open input files associated with this source
        infiles = [
            expand_path(configdir, fi)
            for fi in source_config['input_spectrum_files']
        ]

        # associate attributes, if they exist. These will not be used
        # in the retrieval, but can be used in post-analysis
        if 'input_attribute_files' in source_config:
            infiles_attributes = [
                expand_path(configdir, fi)
                for fi in source_config['input_attribute_files']
            ]
            if len(infiles_attributes) != len(infiles):
                raise IndexError('spectrum / attribute file mismatch')
        else:
            infiles_attributes = [
                None for fi in source_config['input_spectrum_files']
            ]

        ncomp = int(source_config['n_components'])
        windows = source_config['windows']

        # load spectra
        spectra, attributes = [], []
        for infile, attribute_file in zip(infiles, infiles_attributes):

            rfl = envi.open(envi_header(infile), infile)
            nl, nb, ns = [
                int(rfl.metadata[n]) for n in ('lines', 'bands', 'samples')
            ]
            swl = np.array([float(f) for f in rfl.metadata['wavelength']])

            # Maybe convert to nanometers
            if swl[0] < 100:
                swl = swl * 1000.0

            # Load library and adjust interleave, if needed
            rfl_mm = rfl.open_memmap(interleave='bip', writable=False)
            x = np.array(rfl_mm[:, :, :])
            x = x.reshape(nl * ns, nb)

            # import spectra and resample
            for x1 in x:
                p = scipy.interpolate.interp1d(swl,
                                               x1,
                                               kind='linear',
                                               bounds_error=False,
                                               fill_value='extrapolate')
                spectra.append(p(wl))

            # Load attributes
            if attribute_file is not None:

                attr = envi.open(envi_header(attribute_file), attribute_file)
                nla, nba, nsa = [
                    int(attr.metadata[n])
                    for n in ('lines', 'bands', 'samples')
                ]

                # Load library and adjust interleave, if needed
                attr_mm = attr.open_memmap(interleave='bip', writable=False)
                x = np.array(attr_mm[:, :, :])
                x = x.reshape(nla * nsa, nba)
                model['attributes'] = attr.metadata['band names']

                # import spectra and resample
                for x1 in x:
                    attributes.append(x1)

        if len(attributes) > 0 and len(attributes) != len(spectra):
            raise IndexError('Mismatch in number of spectra vs. attributes')

        # calculate mixtures, if needed
        if len(attributes) > 0 and mixtures > 0:
            raise ValueError('Synthetic mixtures w/ attributes is not advised')

        n = float(len(spectra))
        nmix = int(n * mixtures)
        for mi in range(nmix):
            s1, m1 = spectra[int(np.random.rand() * n)], np.random.rand()
            s2, m2 = spectra[int(np.random.rand() * n)], 1.0 - m1
            spectra.append(m1 * s1 + m2 * s2)

        # Lists to arrays
        spectra = np.array(spectra)
        attributes = np.array(attributes)

        # Flag bad data
        use = np.all(np.isfinite(spectra), axis=1)
        spectra = spectra[use, :]
        if len(attributes) > 0:
            attributes = attributes[use, :]

        # Accumulate total list of window indices
        window_idx = -np.ones((nchan), dtype=int)
        for wi, win in enumerate(windows):
            active_wl = np.logical_and(wl >= win['interval'][0],
                                       wl < win['interval'][1])
            window_idx[active_wl] = wi

        # Two step model generation.  First step is k-means clustering.
        # This is more "stable" than Expectation Maximization with an
        # unconstrained covariance matrix
        kmeans = KMeans(init='k-means++', n_clusters=ncomp, n_init=10)
        kmeans.fit(spectra)
        Z = kmeans.predict(spectra)

        # Build a combined dataset of attributes and spectra
        if len(attributes) > 0:
            spectra_attr = np.concatenate((spectra, attributes), axis=1)

        # Now fit the full covariance for each component
        for ci in range(ncomp):

            m = np.mean(spectra[Z == ci, :], axis=0)
            C = np.cov(spectra[Z == ci, :], rowvar=False)
            if len(attributes) > 0:
                m_attr = np.mean(spectra_attr[Z == ci, :], axis=0)
                C_attr = np.cov(spectra_attr[Z == ci, :], rowvar=False)

            for i in range(nchan):
                window = windows[window_idx[i]]

                # Each spectral interval, or window, is constructed
                # using one of several rules.  We can draw the covariance
                # directly from the data...
                if window['correlation'] == 'EM':
                    C[i, i] = C[i, i] + float(window['regularizer'])

                # Alternatively, we can use a band diagonal form,
                # a Gaussian process that promotes local smoothness.
                elif window['correlation'] == 'GP':
                    width = float(window['gp_width'])
                    magnitude = float(window['gp_magnitude'])
                    kernel = scipy.stats.norm.pdf((wl - wl[i]) / width)
                    kernel = kernel / kernel.sum() * magnitude
                    C[i, :] = kernel
                    C[:, i] = kernel
                    C[i, i] = C[i, i] + float(window['regularizer'])

                # To minimize bias, leave the channels independent
                # and uncorrelated
                elif window['correlation'] == 'decorrelated':
                    ci = C[i, i]
                    C[:, i] = 0
                    C[i, :] = 0
                    C[i, i] = ci + float(window['regularizer'])

                else:
                    raise ValueError('I do not recognize the method ' +
                                     window['correlation'])

            # Normalize the component spectrum if desired
            if normalize == 'Euclidean':
                z = np.sqrt(np.sum(pow(m[normind], 2)))
            elif normalize == 'RMS':
                z = np.sqrt(np.mean(pow(m[normind], 2)))
            elif normalize == 'None':
                z = 1.0
            else:
                raise ValueError('Unrecognized normalization: %s\n' %
                                 normalize)
            m = m / z
            C = C / (z**2)

            model['means'].append(m)
            model['covs'].append(C)

            if len(attributes) > 0:
                model['attribute_means'].append(m_attr)
                model['attribute_covs'].append(C_attr)

    model['means'] = np.array(model['means'])
    model['covs'] = np.array(model['covs'])
    model['attribute_means'] = np.array(model['attribute_means'])
    model['attribute_covs'] = np.array(model['attribute_covs'])

    scipy.io.savemat(outfile, model)
Beispiel #10
0
def interpolate_atmosphere(reference_radiance_file: str,
                           reference_atm_file: str,
                           reference_locations_file: str,
                           segmentation_file: str,
                           input_radiance_file: str,
                           input_locations_file: str,
                           output_reflectance_file: str,
                           output_uncertainty_file: str,
                           nneighbors: int = 15,
                           nodata_value: float = -9999.0,
                           level: str = 'INFO',
                           radiance_factors: np.array = None,
                           isofit_config: dict = None,
                           n_cores: int = -1) -> None:
    """
    Perform a Gaussian process interpolation of atmospheric parameters.  It relies on precalculated
    atmospheric coefficients at a subset of spatial locations stored in a file.  The file has 
    each coefficient defined for every radiance channel, appearing in the order: (1) atmospheric
    path reflectance; (2) spherical sky albedo; (3) total diffuse and direct transmittance of the 
    two-part downwelling and upwelling path; (4) extraterrestrial solar irradiance; (5) cosine of solar
    zenith angle.
    Args:
        reference_radiance_file: source file for radiance (interpolation built from this)
        reference_atm_file:  source file for atmospheric coefficients (interpolation from this)
        reference_locations_file:  source file for file locations (lon, lat, elev), (interpolation from this)
        segmentation_file: input file noting the per-pixel segmentation used
        input_radiance_file: input radiance file (interpolate over this)
        input_locations_file: input location file (interpolate over this)
        output_reflectance_file: location to write output reflectance
        output_uncertainty_file: location to write output uncertainty

        nneighbors: number of neighbors to use for interpolation
        nodata_value: nodata value of input and output
        level: logging level
        radiance_factors: radiance adjustment factors
        isofit_config: dictionary-stype isofit configuration
        n_cores: number of cores to run on
    Returns:
        None
    """

    loglevel = level

    logging.basicConfig(format='%(message)s', level=loglevel)

    # Open input data to check that band formatting is correct
    # Load reference set radiance
    reference_radiance_img = envi.open(envi_header(reference_radiance_file),
                                       reference_radiance_file)
    n_reference_lines, n_radiance_bands, n_reference_columns = [
        int(reference_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if n_reference_columns != 1:
        raise IndexError("Reference data should be a single-column list")

    # Load reference set atmospheric coefficients
    reference_atm_img = envi.open(envi_header(reference_atm_file),
                                  reference_atm_file)
    nrefa, nba, srefa = [
        int(reference_atm_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if nrefa != n_reference_lines or srefa != n_reference_columns:
        raise IndexError("Reference file dimension mismatch (atmosphere)")
    if nba != (n_radiance_bands * 5):
        raise IndexError(
            "Reference atmosphere file has incorrect dimensioning")

    # Load reference set locations
    reference_locations_img = envi.open(envi_header(reference_locations_file),
                                        reference_locations_file)
    nrefl, lb, ls = [
        int(reference_locations_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if nrefl != n_reference_lines or lb != 3:
        raise IndexError("Reference file dimension mismatch (locations)")

    input_radiance_img = envi.open(envi_header(input_radiance_file),
                                   input_radiance_file)
    n_input_lines, n_input_bands, n_input_samples = [
        int(input_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if n_radiance_bands != n_input_bands:
        msg = 'Number of channels mismatch: input (%i) vs. reference (%i)'
        raise IndexError(msg % (n_input_bands, n_radiance_bands))

    input_locations_img = envi.open(envi_header(input_locations_file),
                                    input_locations_file)
    nll, nlb, nls = [
        int(input_locations_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if nll != n_input_lines or nlb != 3 or nls != n_input_samples:
        raise IndexError('Input location dimension mismatch')

    # Create output files
    output_metadata = input_radiance_img.metadata
    output_metadata['interleave'] = 'bil'
    output_reflectance_img = envi.create_image(
        envi_header(output_reflectance_file),
        ext='',
        metadata=output_metadata,
        force=True)

    output_uncertainty_img = envi.create_image(
        envi_header(output_uncertainty_file),
        ext='',
        metadata=output_metadata,
        force=True)

    # Now cleanup inputs and outputs, we'll write dynamically above
    del output_reflectance_img, output_uncertainty_img
    del reference_atm_img, reference_locations_img, input_radiance_img, input_locations_img

    # Determine the number of cores to use
    if n_cores == -1:
        n_cores = multiprocessing.cpu_count()
    n_cores = min(n_cores, n_input_lines)

    # Break data into sections
    line_sections = np.linspace(0, n_input_lines, num=n_cores + 1, dtype=int)

    # Set up our pool
    pool = multiprocessing.Pool(processes=n_cores)
    start_time = time.time()
    logging.info(
        'Beginning atmospheric interpolation inversions using {} cores'.format(
            n_cores))

    # Run the pool (or run serially)
    results = []
    for l in range(len(line_sections) - 1):
        args = (
            line_sections[l],
            line_sections[l + 1],
            reference_radiance_file,
            reference_atm_file,
            reference_locations_file,
            input_radiance_file,
            input_locations_file,
            segmentation_file,
            isofit_config,
            output_reflectance_file,
            output_uncertainty_file,
            radiance_factors,
            nneighbors,
            nodata_value,
        )
        if n_cores != 1:
            results.append(pool.apply_async(_run_chunk, args))
        else:
            _run_chunk(*args)

    pool.close()
    pool.join()

    total_time = time.time() - start_time
    logging.info(
        'Parallel empirical line inversions complete.  {} s total, {} spectra/s, {} spectra/s/core'
        .format(total_time, line_sections[-1] * n_input_samples / total_time,
                line_sections[-1] * n_input_samples / total_time / n_cores))
Beispiel #11
0
def empirical_line(reference_radiance_file: str,
                   reference_reflectance_file: str,
                   reference_uncertainty_file: str,
                   reference_locations_file: str,
                   segmentation_file: str,
                   input_radiance_file: str,
                   input_locations_file: str,
                   output_reflectance_file: str,
                   output_uncertainty_file: str,
                   nneighbors: int = 400,
                   nodata_value: float = -9999.0,
                   level: str = 'INFO',
                   logfile: str = None,
                   radiance_factors: np.array = None,
                   isofit_config: str = None,
                   n_cores: int = -1) -> None:
    """
    Perform an empirical line interpolation for reflectance and uncertainty extrapolation
    Args:
        reference_radiance_file: source file for radiance (interpolation built from this)
        reference_reflectance_file:  source file for reflectance (interpolation built from this)
        reference_uncertainty_file:  source file for uncertainty (interpolation built from this)
        reference_locations_file:  source file for file locations (lon, lat, elev), (interpolation built from this)
        segmentation_file: input file noting the per-pixel segmentation used
        input_radiance_file: input radiance file (interpolate over this)
        input_locations_file: input location file (interpolate over this)
        output_reflectance_file: location to write output reflectance to
        output_uncertainty_file: location to write output uncertainty to

        nneighbors: number of neighbors to use for interpolation
        nodata_value: nodata value of input and output
        level: logging level
        logfile: logging file
        radiance_factors: radiance adjustment factors
        isofit_config: path to isofit configuration JSON file
        n_cores: number of cores to run on
    Returns:
        None
    """

    loglevel = level

    logging.basicConfig(format='%(message)s', level=loglevel, filename=logfile)

    # Open input data to check that band formatting is correct
    # Load reference set radiance
    reference_radiance_img = envi.open(envi_header(reference_radiance_file),
                                       reference_radiance_file)
    n_reference_lines, n_radiance_bands, n_reference_columns = [
        int(reference_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if n_reference_columns != 1:
        raise IndexError("Reference data should be a single-column list")

    # Load reference set reflectance
    reference_reflectance_img = envi.open(
        envi_header(reference_reflectance_file), reference_reflectance_file)
    nrefr, nbr, srefr = [
        int(reference_reflectance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if nrefr != n_reference_lines or nbr != n_radiance_bands or srefr != n_reference_columns:
        raise IndexError("Reference file dimension mismatch (reflectance)")

    # Load reference set uncertainty, assuming reflectance uncertainty is
    # recoreded in the first n_radiance_bands channels of data
    reference_uncertainty_img = envi.open(
        envi_header(reference_uncertainty_file), reference_uncertainty_file)
    nrefu, ns, srefu = [
        int(reference_uncertainty_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if nrefu != n_reference_lines or ns < n_radiance_bands or srefu != n_reference_columns:
        raise IndexError("Reference file dimension mismatch (uncertainty)")

    # Load reference set locations
    reference_locations_img = envi.open(envi_header(reference_locations_file),
                                        reference_locations_file)
    nrefl, lb, ls = [
        int(reference_locations_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if nrefl != n_reference_lines or lb != 3:
        raise IndexError("Reference file dimension mismatch (locations)")

    input_radiance_img = envi.open(envi_header(input_radiance_file),
                                   input_radiance_file)
    n_input_lines, n_input_bands, n_input_samples = [
        int(input_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if n_radiance_bands != n_input_bands:
        msg = 'Number of channels mismatch: input (%i) vs. reference (%i)'
        raise IndexError(msg % (nbr, n_radiance_bands))

    input_locations_img = envi.open(envi_header(input_locations_file),
                                    input_locations_file)
    nll, nlb, nls = [
        int(input_locations_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    if nll != n_input_lines or nlb != 3 or nls != n_input_samples:
        raise IndexError('Input location dimension mismatch')

    # Create output files
    output_metadata = input_radiance_img.metadata
    output_metadata['interleave'] = 'bil'
    output_reflectance_img = envi.create_image(
        envi_header(output_reflectance_file),
        ext='',
        metadata=output_metadata,
        force=True)

    output_uncertainty_img = envi.create_image(
        envi_header(output_uncertainty_file),
        ext='',
        metadata=output_metadata,
        force=True)

    # Now cleanup inputs and outputs, we'll write dynamically above
    del output_reflectance_img, output_uncertainty_img
    del reference_reflectance_img, reference_uncertainty_img, reference_locations_img, input_radiance_img, input_locations_img

    # Initialize ray cluster
    start_time = time.time()
    if isofit_config is not None:
        iconfig = configs.create_new_config(isofit_config)
    else:
        # If none, create a temporary config to get default ray parameters
        iconfig = configs.Config({})
    if n_cores == -1:
        n_cores = iconfig.implementation.n_cores
    rayargs = {
        'ignore_reinit_error': iconfig.implementation.ray_ignore_reinit_error,
        'local_mode': n_cores == 1,
        "address": iconfig.implementation.ip_head,
        '_temp_dir': iconfig.implementation.ray_temp_dir,
        "_redis_password": iconfig.implementation.redis_password
    }

    # We can only set the num_cpus if running on a single-node
    if iconfig.implementation.ip_head is None and iconfig.implementation.redis_password is None:
        rayargs['num_cpus'] = n_cores

    ray.init(**rayargs)
    atexit.register(ray.shutdown)

    n_ray_cores = ray.available_resources()["CPU"]
    n_cores = min(n_ray_cores, n_input_lines)

    logging.info(
        'Beginning empirical line inversions using {} cores'.format(n_cores))

    # Break data into sections
    line_sections = np.linspace(0,
                                n_input_lines,
                                num=int(n_cores + 1),
                                dtype=int)

    start_time = time.time()

    # Run the pool (or run serially)
    results = []
    for l in range(len(line_sections) - 1):
        args = (line_sections[l], line_sections[l + 1],
                reference_radiance_file, reference_reflectance_file,
                reference_uncertainty_file, reference_locations_file,
                input_radiance_file, input_locations_file, segmentation_file,
                isofit_config, output_reflectance_file,
                output_uncertainty_file, radiance_factors, nneighbors,
                nodata_value, level, logfile)
        results.append(_run_chunk.remote(*args))

    _ = ray.get(results)

    total_time = time.time() - start_time
    logging.info(
        'Parallel empirical line inversions complete.  {} s total, {} spectra/s, {} spectra/s/core'
        .format(total_time, line_sections[-1] * n_input_samples / total_time,
                line_sections[-1] * n_input_samples / total_time / n_cores))
Beispiel #12
0
def _run_chunk(start_line: int, stop_line: int, reference_radiance_file: str,
               reference_reflectance_file: str,
               reference_uncertainty_file: str, reference_locations_file: str,
               input_radiance_file: str, input_locations_file: str,
               segmentation_file: str, isofit_config: str,
               output_reflectance_file: str, output_uncertainty_file: str,
               radiance_factors: np.array, nneighbors: int,
               nodata_value: float, loglevel: str, logfile: str) -> None:
    """
    Args:
        start_line: line to start empirical line run at
        stop_line:  line to stop empirical line run at
        reference_radiance_file: source file for radiance (interpolation built from this)
        reference_reflectance_file:  source file for reflectance (interpolation built from this)
        reference_uncertainty_file:  source file for uncertainty (interpolation built from this)
        reference_locations_file:  source file for file locations (lon, lat, elev), (interpolation built from this)
        input_radiance_file: input radiance file (interpolate over this)
        input_locations_file: input location file (interpolate over this)
        segmentation_file: input file noting the per-pixel segmentation used
        isofit_config: path to isofit configuration JSON file
        output_reflectance_file: location to write output reflectance to
        output_uncertainty_file: location to write output uncertainty to
        radiance_factors: radiance adjustment factors
        nneighbors: number of neighbors to use for interpolation
        nodata_value: nodata value of input and output
        loglevel: logging level
        logfile: logging file

    Returns:
        None

    """

    logging.basicConfig(format='%(message)s', level=loglevel, filename=logfile)

    # Load reference images
    reference_radiance_img = envi.open(envi_header(reference_radiance_file),
                                       reference_radiance_file)
    reference_reflectance_img = envi.open(
        envi_header(reference_reflectance_file), reference_reflectance_file)
    reference_uncertainty_img = envi.open(
        envi_header(reference_uncertainty_file), reference_uncertainty_file)
    reference_locations_img = envi.open(envi_header(reference_locations_file),
                                        reference_locations_file)

    n_reference_lines, n_radiance_bands, n_reference_columns = [
        int(reference_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]
    n_reference_uncertainty_bands = int(
        reference_uncertainty_img.metadata['bands'])

    # Load input images
    input_radiance_img = envi.open(envi_header(input_radiance_file),
                                   input_radiance_file)
    n_input_lines, n_input_bands, n_input_samples = [
        int(input_radiance_img.metadata[n])
        for n in ('lines', 'bands', 'samples')
    ]

    input_locations_img = envi.open(envi_header(input_locations_file),
                                    input_locations_file)
    n_location_bands = int(input_locations_img.metadata['bands'])

    # Load output images
    output_reflectance_img = envi.open(envi_header(output_reflectance_file),
                                       output_reflectance_file)
    output_uncertainty_img = envi.open(envi_header(output_uncertainty_file),
                                       output_uncertainty_file)
    n_output_reflectance_bands = int(output_reflectance_img.metadata['bands'])
    n_output_uncertainty_bands = int(output_uncertainty_img.metadata['bands'])

    # Load reference data
    reference_locations_mm = reference_locations_img.open_memmap(
        interleave='bip', writable=False)
    reference_locations = np.array(reference_locations_mm[:, :, :]).reshape(
        (n_reference_lines, n_location_bands))

    reference_radiance_mm = reference_radiance_img.open_memmap(
        interleave='bip', writable=False)
    reference_radiance = np.array(reference_radiance_mm[:, :, :]).reshape(
        (n_reference_lines, n_radiance_bands))

    reference_reflectance_mm = reference_reflectance_img.open_memmap(
        interleave='bip', writable=False)
    reference_reflectance = np.array(
        reference_reflectance_mm[:, :, :]).reshape(
            (n_reference_lines, n_radiance_bands))

    reference_uncertainty_mm = reference_uncertainty_img.open_memmap(
        interleave='bip', writable=False)
    reference_uncertainty = np.array(
        reference_uncertainty_mm[:, :, :]).reshape(
            (n_reference_lines, n_reference_uncertainty_bands))
    reference_uncertainty = reference_uncertainty[:, :
                                                  n_radiance_bands].reshape(
                                                      (n_reference_lines,
                                                       n_radiance_bands))

    # Load segmentation data
    if segmentation_file:
        segmentation_img = envi.open(envi_header(segmentation_file),
                                     segmentation_file)
        segmentation_img = segmentation_img.read_band(0)
    else:
        segmentation_img = None

    # Prepare instrument model, if available
    if isofit_config is not None:
        config = configs.create_new_config(isofit_config)
        instrument = Instrument(config)
        logging.info('Loading instrument')

        # Make sure the instrument is configured for single-pixel noise (no averaging)
        instrument.integrations = 1
    else:
        instrument = None

    # Load radiance factors
    if radiance_factors is None:
        radiance_adjustment = np.ones(n_radiance_bands, )
    else:
        radiance_adjustment = np.loadtxt(radiance_factors)

    # Load Tree
    loc_scaling = np.array([1e5, 1e5, 0.1])
    scaled_ref_loc = reference_locations * loc_scaling
    tree = KDTree(scaled_ref_loc)
    # Assume (heuristically) that, for distance purposes, 1 m vertically is
    # comparable to 10 m horizontally, and that there are 100 km per latitude
    # degree.  This is all approximate of course.  Elevation appears in the
    # Third element, and the first two are latitude/longitude coordinates

    # Iterate through image
    hash_table = {}

    for row in np.arange(start_line, stop_line):

        # Load inline input data
        input_radiance_mm = input_radiance_img.open_memmap(interleave='bip',
                                                           writable=False)
        input_radiance = np.array(input_radiance_mm[row, :, :])
        input_radiance = input_radiance * radiance_adjustment

        input_locations_mm = input_locations_img.open_memmap(interleave='bip',
                                                             writable=False)
        input_locations = np.array(input_locations_mm[row, :, :])

        output_reflectance_row = np.zeros(input_radiance.shape) + nodata_value
        output_uncertainty_row = np.zeros(input_radiance.shape) + nodata_value

        nspectra, start = 0, time.time()
        for col in np.arange(n_input_samples):

            x = input_radiance[col, :]
            if np.all(np.isclose(x, nodata_value)):
                output_reflectance_row[col, :] = nodata_value
                output_uncertainty_row[col, :] = nodata_value
                continue

            bhat = None
            if segmentation_img is not None:
                hash_idx = segmentation_img[row, col]
                if hash_idx in hash_table:
                    bhat, bmarg, bcov = hash_table[hash_idx]
                else:
                    loc = reference_locations[
                        np.array(hash_idx, dtype=int), :] * loc_scaling
            else:
                loc = input_locations[col, :] * loc_scaling

            if bhat is None:
                dists, nn = tree.query(loc, nneighbors)
                xv = reference_radiance[nn, :]
                yv = reference_reflectance[nn, :]
                uv = reference_uncertainty[nn, :]
                bhat = np.zeros((n_radiance_bands, 2))
                bmarg = np.zeros((n_radiance_bands, 2))
                bcov = np.zeros((n_radiance_bands, 2, 2))

                for i in np.arange(n_radiance_bands):
                    use = yv[:, i] > 0
                    n = sum(use)
                    X = np.concatenate((np.ones((n, 1)), xv[use, i:i + 1]),
                                       axis=1)
                    W = np.diag(np.ones(n))  # /uv[use, i])
                    y = yv[use, i:i + 1]
                    try:
                        bhat[i, :] = (inv(X.T @ W @ X) @ X.T @ W @ y).T
                        bcov[i, :, :] = inv(X.T @ W @ X)
                    except:
                        bhat[i, :] = 0
                        bcov[i, :, :] = 0
                    bmarg[i, :] = np.diag(bcov[i, :, :])

            if (segmentation_img is not None) and not (hash_idx in hash_table):
                hash_table[hash_idx] = bhat, bmarg, bcov

            A = np.array((np.ones(n_radiance_bands), x))
            output_reflectance_row[col, :] = (np.multiply(bhat.T,
                                                          A).sum(axis=0))

            # Calculate uncertainties.  Sy approximation rather than Seps for
            # speed, for now... but we do take into account instrument
            # radiometric uncertainties
            if instrument is None:
                output_uncertainty_row[col, :] = np.sqrt(
                    np.multiply(bmarg.T, A).sum(axis=0))
            else:
                Sy = instrument.Sy(x, geom=None)
                calunc = instrument.bval[:instrument.n_chan]
                output_uncertainty_row[col, :] = np.sqrt(
                    np.diag(Sy) + pow(calunc * x, 2)) * bhat[:, 1]
            # if loglevel == 'DEBUG':
            #    plot_example(xv, yv, bhat)

            nspectra = nspectra + 1

        elapsed = float(time.time() - start)
        logging.info('row {}/{}, ({}/{} local), {} spectra per second'.format(
            row, n_input_lines, int(row - start_line),
            int(stop_line - start_line), round(float(nspectra) / elapsed, 2)))

        del input_locations_mm
        del input_radiance_mm

        output_reflectance_row = output_reflectance_row.transpose((1, 0))
        output_uncertainty_row = output_uncertainty_row.transpose((1, 0))
        shp = output_reflectance_row.shape
        output_reflectance_row = output_reflectance_row.reshape(
            (1, shp[0], shp[1]))
        shp = output_uncertainty_row.shape
        output_uncertainty_row = output_uncertainty_row.reshape(
            (1, shp[0], shp[1]))

        write_bil_chunk(
            output_reflectance_row, output_reflectance_file, row,
            (n_input_lines, n_output_reflectance_bands, n_input_samples))
        write_bil_chunk(
            output_uncertainty_row, output_uncertainty_file, row,
            (n_input_lines, n_output_uncertainty_bands, n_input_samples))
Beispiel #13
0
    def __init__(self,
                 fname,
                 write=False,
                 n_rows=None,
                 n_cols=None,
                 n_bands=None,
                 interleave=None,
                 dtype=np.float32,
                 wavelengths=None,
                 fwhm=None,
                 band_names=None,
                 bad_bands='[]',
                 zrange='{0.0, 1.0}',
                 flag=-9999.0,
                 ztitles='{Wavelength (nm), Magnitude}',
                 map_info='{}'):
        """."""

        self.frames = OrderedDict()
        self.write = write
        self.fname = os.path.abspath(fname)
        self.wl = wavelengths
        self.band_names = band_names
        self.fwhm = fwhm
        self.flag = flag
        self.n_rows = n_rows
        self.n_cols = n_cols
        self.n_bands = n_bands

        if self.fname.endswith('.txt'):

            # The .txt suffix implies a space-separated ASCII text file of
            # one or more data columns.  This is cheap to load and store, so
            # we do not defer read/write operations.
            logging.debug('Inferred ASCII file format for %s' % self.fname)
            self.format = 'ASCII'
            if not self.write:
                self.data, self.wl = load_spectrum(self.fname)
                self.n_rows, self.n_cols, self.map_info = 1, 1, '{}'
                if self.wl is not None:
                    self.n_bands = len(self.wl)
                else:
                    self.n_bands = None
                self.meta = {}

        elif self.fname.endswith('.mat'):

            # The .mat suffix implies a matlab-style file, i.e. a dictionary
            # of 2D arrays and other matlab-like objects. This is typically
            # only used for specific output products associated with single
            # spectrum retrievals; there is no read option.
            logging.debug('Inferred MATLAB file format for %s' % self.fname)
            self.format = 'MATLAB'
            if not self.write:
                logging.error('Unsupported MATLAB file in input block')
                raise IOError('MATLAB format in input block not supported')

        else:

            # Otherwise we assume it is an ENVI-format file, which is
            # basically just a binary data cube with a detached human-
            # readable ASCII header describing dimensions, interleave, and
            # metadata.  We buffer this data in self.frames, reading and
            # writing individual rows of the cube on-demand.
            logging.debug('Inferred ENVI file format for %s' % self.fname)
            self.format = 'ENVI'

            if not self.write:

                # If we are an input file, the header must preexist.
                if not os.path.exists(envi_header(self.fname)):
                    logging.error('Could not find %s' %
                                  (envi_header(self.fname)))
                    raise IOError('Could not find %s' %
                                  (envi_header(self.fname)))

                # open file and copy metadata
                self.file = envi.open(envi_header(self.fname), fname)
                self.meta = self.file.metadata.copy()

                self.n_rows = int(self.meta['lines'])
                self.n_cols = int(self.meta['samples'])
                self.n_bands = int(self.meta['bands'])
                if 'data ignore value' in self.meta:
                    self.flag = float(self.meta['data ignore value'])
                else:
                    self.flag = -9999.0

            else:

                # If we are an output file, we may have to build the header
                # from scratch.  Hopefully the caller has supplied the
                # necessary metadata details.
                meta = {
                    'lines': n_rows,
                    'samples': n_cols,
                    'bands': n_bands,
                    'byte order': 0,
                    'header offset': 0,
                    'map info': map_info,
                    'file_type': 'ENVI Standard',
                    'sensor type': 'unknown',
                    'interleave': interleave,
                    'data type': typemap[dtype],
                    'wavelength units': 'nm',
                    'z plot range': zrange,
                    'z plot titles': ztitles,
                    'fwhm': fwhm,
                    'bbl': bad_bands,
                    'band names': band_names,
                    'wavelength': self.wl
                }

                for k, v in meta.items():
                    if v is None:
                        logging.error('Must specify %s' % (k))
                        raise IOError('Must specify %s' % (k))

                if os.path.isfile(envi_header(fname)) is False:
                    self.file = envi.create_image(envi_header(fname),
                                                  meta,
                                                  ext='',
                                                  force=True)
                else:
                    self.file = envi.open(envi_header(fname))

            self.open_map_with_retries()
Beispiel #14
0
def do_inverse(isofit_inv: dict, radfile: pathlib.Path,
               est_refl_file: pathlib.Path, est_state_file: pathlib.Path,
               atm_coef_file: pathlib.Path, post_unc_file: pathlib.Path,
               overwrite: bool, use_empirical_line: bool):
    if use_empirical_line:
        # Segment first, then run on segmented file
        SEGMENTATION_SIZE = 40
        CHUNKSIZE = 256
        lbl_working_path = radfile.parent / str(radfile).replace(
            "toa-radiance", "segmentation")
        rdn_subs_path = radfile.with_suffix("-subs")
        rfl_subs_path = est_refl_file.with_suffix("-subs")
        state_subs_path = est_state_file.with_suffix("-subs")
        atm_subs_path = atm_coef_file.with_suffix("-subs")
        unc_subs_path = post_unc_file.with_suffix("-subs")
        isofit_inv["input"]["measured_radiance_file"] = str(rdn_subs_path)
        isofit_inv["output"] = {
            "estimated_reflectance_file": str(rfl_subs_path),
            "estimated_state_file": str(state_subs_path),
            "atmospheric_coefficients_file": str(atm_subs_path),
            "posterior_uncertainty_file": str(unc_subs_path)
        }
        if not overwrite and lbl_working_path.exists(
        ) and rdn_subs_path.exists():
            logger.info(
                "Skipping segmentation and extraction because files exist.")
        else:
            logger.info(
                "Fixing any radiance values slightly less than zero...")
            rad_img = sp.open_image(envi_header(str(radfile)))
            rad_m = rad_img.open_memmap(writable=True)
            nearzero = np.logical_and(rad_m < 0, rad_m > -2)
            rad_m[nearzero] = 0.0001
            del rad_m
            del rad_img
            logger.info("Segmenting...")
            segment(spectra=(str(radfile), str(lbl_working_path)),
                    flag=-9999,
                    npca=5,
                    segsize=SEGMENTATION_SIZE,
                    nchunk=CHUNKSIZE)
            logger.info("Extracting...")
            extractions(inputfile=str(radfile),
                        labels=str(lbl_working_path),
                        output=str(rdn_subs_path),
                        chunksize=CHUNKSIZE,
                        flag=-9999)

    else:
        # Run Isofit directly
        isofit_inv["input"]["measured_radiance_file"] = str(radfile)
        isofit_inv["output"] = {
            "estimated_reflectance_file": str(est_refl_file),
            "estimated_state_file": str(est_state_file),
            "atmospheric_coefficients_file": str(atm_coef_file),
            "posterior_uncertainty_file": str(post_unc_file)
        }

    if not overwrite and pathlib.Path(
            isofit_inv["output"]["estimated_reflectance_file"]).exists():
        logger.info("Skipping inversion because output file exists.")
    else:
        invfile = radfile.parent / (
            str(radfile).replace("toa-radiance", "inverse") + ".json")
        json.dump(isofit_inv, open(invfile, "w"), indent=2)
        Isofit(invfile).run()

    if use_empirical_line:
        if not overwrite and est_refl_file.exists():
            logger.info("Skipping empirical line because output exists.")
        else:
            logger.info("Applying empirical line...")
            empirical_line(reference_radiance_file=str(rdn_subs_path),
                           reference_reflectance_file=str(rfl_subs_path),
                           reference_uncertainty_file=str(unc_subs_path),
                           reference_locations_file=None,
                           segmentation_file=str(lbl_working_path),
                           input_radiance_file=str(radfile),
                           input_locations_file=None,
                           output_reflectance_file=str(est_refl_file),
                           output_uncertainty_file=str(post_unc_file),
                           isofit_config=str(invfile))
Beispiel #15
0
def do_hypertrace(isofit_config,
                  wavelength_file,
                  reflectance_file,
                  rtm_template_file,
                  lutdir,
                  outdir,
                  surface_file="./data/prior.mat",
                  noisefile=None,
                  snr=300,
                  aod=0.1,
                  h2o=1.0,
                  atmosphere_type="ATM_MIDLAT_WINTER",
                  atm_aod_h2o=None,
                  solar_zenith=0,
                  observer_zenith=0,
                  solar_azimuth=0,
                  observer_azimuth=0,
                  observer_altitude_km=99.9,
                  dayofyear=200,
                  latitude=34.15,
                  longitude=-118.14,
                  localtime=10.0,
                  elevation_km=0.01,
                  inversion_mode="inversion",
                  use_empirical_line=False,
                  calibration_uncertainty_file=None,
                  n_calibration_draws=1,
                  calibration_scale=1,
                  create_lut=True,
                  overwrite=False):
    """One iteration of the hypertrace workflow.

    Required arguments:
        isofit_config: dict of isofit configuration options

        `wavelength_file`: Path to ASCII space delimited table containing two
        columns, wavelength and full width half max (FWHM); both in nanometers.

        `reflectance_file`: Path to input reflectance file. Note that this has
        to be an ENVI-formatted binary reflectance file, and this path is to the
        associated header file (`.hdr`), not the image file itself (following
        the convention of the `spectral` Python library, which will be used to
        read this file).

        rtm_template_file: Path to the atmospheric RTM template. For LibRadtran,
        note that this is slightly different from the Isofit template in that
        the Isofit fields are surrounded by two sets of `{{` while a few
        additional options related to geometry are surrounded by just `{` (this
        is because Hypertrace does an initial pass at formatting the files).

        `lutdir`: Directory where look-up tables will be stored. Will be created
        if missing.

        `outdir`: Directory where outputs will be stored. Will be created if
        missing.

    Keyword arguments:
      surface_file: Matlab (`.mat`) file containing a multicomponent surface
      prior. See Isofit documentation for details.

      noisefile: Parametric instrument noise file. See Isofit documentation for
      details. Default = `None`

      snr: Instrument signal-to-noise ratio. Ignored if `noisefile` is present.
      Default = 300

      aod: True aerosol optical depth. Default = 0.1

      h2o: True water vapor content. Default = 1.0

      atmosphere_type: LibRadtran or Modtran atmosphere type. See RTM
      manuals for details. Default = `ATM_MIDLAT_WINTER`

      atm_aod_h2o: A list containing three elements: The atmosphere type, AOD,
      and H2O. This provides a way to iterate over specific known atmospheres
      that are combinations of the three previous variables. If this is set, it
      overrides the three previous arguments. Default = `None`

      solar_zenith, observer_zenith: Solar and observer zenith angles,
      respectively (0 = directly overhead, 90 = horizon). These are in degrees
      off nadir. Default = 0 for both. (Note that off-nadir angles make
      LibRadtran run _much_ more slowly, so be prepared if you need to generate
      those LUTs). (Note: For `modtran` and `modtran_simulator`, `solar_zenith`
      is calculated from the `gmtime` and location, so this parameter is ignored.)

      solar_azimuth, observer_azimuth: Solar and observer azimuth angles,
      respectively, in degrees. Observer azimuth is the sensor _position_ (so
      180 degrees off from view direction) relative to N, rotating
      counterclockwise; i.e., 0 = Sensor in N, looking S; 90 = Sensor in W,
      looking E (this follows the LibRadtran convention). Default = 0 for both.
      Note: For `modtran` and `modtran_simulator`, `observer_azimuth` is used as
      `to_sensor_azimuth`; i.e., the *relative* azimuth of the sensor. The true
      solar azimuth is calculated from lat/lon and time, so `solar_azimuth` is ignored.

      observer_altitude_km: Sensor altitude in km. Must be less than 100. Default = 99.9.
      (`modtran` and `modtran_simulator` only)

      dayofyear: Julian date of observation. Default = 200
      (`modtran` and `modtran_simulator` only)

      latitude, longitude: Decimal degree coordinates of observation. Default =
      34.15, -118.14 (Pasadena, CA).
      (`modtran` and `modtran_simulator` only)

      localtime: Local time, in decimal hours (0-24). Default = 10.0
      (`modtran` and `modtran_simulator` only)

      elevation_km: Target elevation above sea level, in km. Default = 0.01
      (`modtran` and `modtran_simulator` only)

      inversion_mode: Inversion algorithm to use. Must be either "inversion"
      (default) for standard optimal estimation, or "mcmc_inversion" for MCMC.

      use_empirical_line: (boolean, default = `False`) If `True`, perform
      atmospheric correction on a segmented image and then resample using the
      empirical line method. If `False`, run Isofit pixel-by-pixel.

      overwrite: (boolean, default = `False`) If `False` (default), skip steps
      where output files already exist. If `True`, run the full workflow
      regardless of existing files.
    """

    outdir = mkabs(outdir)
    outdir.mkdir(parents=True, exist_ok=True)

    assert observer_altitude_km < 100, "Isofit 6S does not support altitude >= 100km"

    isofit_common = copy.deepcopy(isofit_config)
    # NOTE: All of these settings are *not* copied, but referenced. So these
    # changes propagate to the `forward_settings` object below.
    forward_settings = isofit_common["forward_model"]
    instrument_settings = forward_settings["instrument"]
    # NOTE: This also propagates to the radiative transfer engine
    instrument_settings["wavelength_file"] = str(mkabs(wavelength_file))
    surface_settings = forward_settings["surface"]
    surface_settings["surface_file"] = str(mkabs(surface_file))
    if noisefile is not None:
        noisetag = f"noise_{pathlib.Path(noisefile).stem}"
        if "SNR" in instrument_settings:
            instrument_settings.pop("SNR")
        instrument_settings["parametric_noise_file"] = str(mkabs(noisefile))
        if "integrations" not in instrument_settings:
            instrument_settings["integrations"] = 1
    elif snr is not None:
        noisetag = f"snr_{snr}"
        instrument_settings["SNR"] = snr

    priortag = f"prior_{pathlib.Path(surface_file).stem}__" +\
        f"inversion_{inversion_mode}"

    if atm_aod_h2o is not None:
        atmosphere_type = atm_aod_h2o[0]
        aod = atm_aod_h2o[1]
        h2o = atm_aod_h2o[2]

    atmtag = f"aod_{aod:.3f}__h2o_{h2o:.3f}"
    if calibration_uncertainty_file is not None:
        caltag = f"cal_{pathlib.Path(calibration_uncertainty_file).stem}__" +\
                f"draw_{n_calibration_draws}__" +\
                f"scale_{calibration_scale}"
    else:
        caltag = "cal_NONE__draw_0__scale_0"

    if create_lut:
        lutdir = mkabs(lutdir)
        lutdir.mkdir(parents=True, exist_ok=True)
        vswir_conf = forward_settings["radiative_transfer"][
            "radiative_transfer_engines"]["vswir"]
        atmospheric_rtm = vswir_conf["engine_name"]

        if atmospheric_rtm == "libradtran":
            lrttag = f"atm_{atmosphere_type}__" +\
                f"szen_{solar_zenith:.2f}__" +\
                f"ozen_{observer_zenith:.2f}__" +\
                f"saz_{solar_azimuth:.2f}__" +\
                f"oaz_{observer_azimuth:.2f}"
            lutdir2 = lutdir / lrttag
            lutdir2.mkdir(parents=True, exist_ok=True)
            lrtfile = lutdir2 / "lrt-template.inp"
            with open(rtm_template_file, "r") as f:
                fs = f.read()
                open(lrtfile, "w").write(
                    fs.format(atmosphere=atmosphere_type,
                              solar_azimuth=solar_azimuth,
                              solar_zenith=solar_zenith,
                              cos_observer_zenith=np.cos(observer_zenith *
                                                         np.pi / 180.0),
                              observer_azimuth=observer_azimuth))
            open(lutdir2 / "prescribed_geom",
                 "w").write(f"99:99:99   {solar_zenith}  {solar_azimuth}")

        elif atmospheric_rtm in ("modtran", "sRTMnet"):
            loctag = f"atm_{atmosphere_type}__" +\
                f"alt_{observer_altitude_km:.2f}__" +\
                f"doy_{dayofyear:.0f}__" +\
                f"lat_{latitude:.3f}__lon_{longitude:.3f}"
            angtag = f"az_{observer_azimuth:.2f}__" +\
                f"zen_{180 - observer_zenith:.2f}__" +\
                f"time_{localtime:.2f}__" +\
                f"elev_{elevation_km:.2f}"
            lrttag = loctag + "/" + angtag
            lutdir2 = lutdir / lrttag
            lutdir2.mkdir(parents=True, exist_ok=True)
            lrtfile = lutdir2 / "modtran-template-h2o.json"
            mt_params = {
                "atmosphere_type": atmosphere_type,
                "fid": "hypertrace",
                "altitude_km": observer_altitude_km,
                "dayofyear": dayofyear,
                "latitude": latitude,
                "longitude": longitude,
                "to_sensor_azimuth": observer_azimuth,
                "to_sensor_zenith": 180 - observer_zenith,
                "gmtime": localtime,
                "elevation_km": elevation_km,
                "output_file": lrtfile,
                "ihaze_type": "AER_NONE"
            }
            write_modtran_template(**mt_params)
            mt_params["ihaze_type"] = "AER_RURAL"
            mt_params["output_file"] = lutdir2 / "modtran-template.json"
            write_modtran_template(**mt_params)

            vswir_conf["modtran_template_path"] = str(mt_params["output_file"])
            if atmospheric_rtm == "sRTMnet":
                vswir_conf["interpolator_base_path"] = str(
                    lutdir2 / "sRTMnet_interpolator")
                # These need to be absolute file paths
                for path in [
                        "emulator_aux_file", "emulator_file",
                        "earth_sun_distance_file", "irradiance_file"
                ]:
                    vswir_conf[path] = str(mkabs(vswir_conf[path]))

        else:
            raise ValueError(f"Invalid atmospheric rtm {atmospheric_rtm}")

        vswir_conf["lut_path"] = str(lutdir2)
        vswir_conf["template_file"] = str(lrtfile)

    outdir2 = outdir / lrttag / noisetag / priortag / atmtag / caltag
    outdir2.mkdir(parents=True, exist_ok=True)

    # Observation file, which describes the geometry
    # Angles follow LibRadtran conventions
    obsfile = outdir2 / "obs.txt"
    geomvec = [
        -999,  # path length; not used
        observer_azimuth,  # Degrees 0-360; 0 = Sensor in N, looking S; 90 = Sensor in W, looking E
        observer_zenith,  # Degrees 0-90; 0 = directly overhead, 90 = horizon
        solar_azimuth,  # Degrees 0-360; 0 = Sun in S; 90 = Sun in W.
        solar_zenith,  # Same units as observer zenith
        180.0 - abs(observer_zenith),  # MODTRAN OBSZEN -- t
        observer_azimuth - solar_azimuth + 180.0,  # MODTRAN relative azimuth
        observer_azimuth,  # MODTRAN azimuth
        np.cos(observer_zenith * np.pi /
               180.0)  # Libradtran cos obsever zenith
    ]
    np.savetxt(obsfile, np.array([geomvec]))

    isofit_common["input"] = {"obs_file": str(obsfile)}

    isofit_fwd = copy.deepcopy(isofit_common)
    isofit_fwd["input"]["reflectance_file"] = str(mkabs(reflectance_file))
    isofit_fwd["implementation"]["mode"] = "simulation"
    isofit_fwd["implementation"]["inversion"]["simulation_mode"] = True
    fwd_surface = isofit_fwd["forward_model"]["surface"]
    fwd_surface["surface_category"] = "surface"

    # Check that prior and wavelength file have the same dimensions
    prior = loadmat(mkabs(surface_file))
    prior_wl = prior["wl"][0]
    prior_nwl = len(prior_wl)
    file_wl = np.loadtxt(wavelength_file)
    file_nwl = file_wl.shape[0]
    assert prior_nwl == file_nwl, \
        f"Mismatch between wavelength file ({file_nwl}) " +\
        f"and prior ({prior_nwl})."

    fwd_surface["wavelength_file"] = str(wavelength_file)

    radfile = outdir2 / "toa-radiance"
    isofit_fwd["output"] = {"simulated_measurement_file": str(radfile)}
    fwd_state = isofit_fwd["forward_model"]["radiative_transfer"][
        "statevector"]
    fwd_state["AOT550"]["init"] = aod
    fwd_state["H2OSTR"]["init"] = h2o

    # Also set the LUT grid to only target state. We don't want to interpolate
    # over the LUT for our forward simulations!
    fwd_lut = isofit_fwd["forward_model"]["radiative_transfer"]["lut_grid"]
    fwd_lut["AOT550"] = [aod]
    fwd_lut["H2OSTR"] = [h2o]
    # Also have to create a one-off LUT directory for the forward run, to avoid
    # using an (incorrect) previously cached one.
    fwd_lutdir = outdir2 / "fwd_lut"
    fwd_lutdir.mkdir(parents=True, exist_ok=True)
    fwd_vswir = (isofit_fwd["forward_model"]["radiative_transfer"]
                 ["radiative_transfer_engines"]["vswir"])
    fwd_vswir["lut_path"] = str(fwd_lutdir)
    fwd_vswir["interpolator_base_path"] = str(fwd_lutdir)

    if radfile.exists() and not overwrite:
        logger.info("Skipping forward simulation because file exists.")
    else:
        fwdfile = outdir2 / "forward.json"
        json.dump(isofit_fwd, open(fwdfile, "w"), indent=2)
        logger.info("Starting forward simulation.")
        Isofit(fwdfile).run()
        logger.info("Forward simulation complete.")

    isofit_inv = copy.deepcopy(isofit_common)
    if inversion_mode == "simple":
        # Special case! Use the optimal estimation code, but set `max_nfev` to 1.
        inversion_mode = "inversion"
        imp_inv = isofit_inv["implementation"]["inversion"]
        if "least_squares_params" not in imp_inv:
            imp_inv["least_squares_params"] = {}
        imp_inv["least_squares_params"]["max_nfev"] = 1
    isofit_inv["implementation"]["mode"] = inversion_mode
    isofit_inv["input"]["measured_radiance_file"] = str(radfile)
    est_refl_file = outdir2 / "estimated-reflectance"

    post_unc_path = outdir2 / "posterior-uncertainty"

    # Inverse mode
    est_state_file = outdir2 / "estimated-state"
    atm_coef_file = outdir2 / "atmospheric-coefficients"
    post_unc_file = outdir2 / "posterior-uncertainty"
    isofit_inv["output"] = {
        "estimated_reflectance_file": str(est_refl_file),
        "estimated_state_file": str(est_state_file),
        "atmospheric_coefficients_file": str(atm_coef_file),
        "posterior_uncertainty_file": str(post_unc_file)
    }

    # Run the workflow
    if calibration_uncertainty_file is not None:
        # Apply calibration uncertainty here
        calmat = loadmat(calibration_uncertainty_file)
        cov = calmat["Covariance"]
        cov_l = np.linalg.cholesky(cov)
        cov_wl = np.squeeze(calmat["wavelengths"])
        rad_img = sp.open_image(envi_header(str(radfile)))
        rad_wl = rad_img.bands.centers
        del rad_img
        for ical in range(n_calibration_draws):
            icalp1 = ical + 1
            radfile_cal = f"{str(radfile)}-{icalp1:02d}"
            reflfile_cal = f"{str(est_refl_file)}-{icalp1:02d}"
            statefile_cal = f"{str(est_state_file)}-{icalp1:02d}"
            atmfile_cal = f"{str(atm_coef_file)}-{icalp1:02d}"
            uncfile_cal = f"{str(post_unc_file)}-{icalp1:02d}"
            if pathlib.Path(reflfile_cal).exists() and not overwrite:
                logger.info("Skipping calibration %d/%d because output exists",
                            icalp1, n_calibration_draws)
                next
            logger.info("Applying calibration uncertainty (%d/%d)", icalp1,
                        n_calibration_draws)
            sample_calibration_uncertainty(radfile,
                                           radfile_cal,
                                           cov_l,
                                           cov_wl,
                                           rad_wl,
                                           bias_scale=calibration_scale)
            logger.info("Starting inversion (calibration %d/%d)", icalp1,
                        n_calibration_draws)
            do_inverse(copy.deepcopy(isofit_inv),
                       radfile_cal,
                       reflfile_cal,
                       statefile_cal,
                       atmfile_cal,
                       uncfile_cal,
                       overwrite=overwrite,
                       use_empirical_line=use_empirical_line)
            logger.info("Inversion complete (calibration %d/%d)", icalp1,
                        n_calibration_draws)

    else:
        if est_refl_file.exists() and not overwrite:
            logger.info("Skipping inversion because output exists.")
        else:
            logger.info("Starting inversion.")
            do_inverse(copy.deepcopy(isofit_inv),
                       radfile,
                       est_refl_file,
                       est_state_file,
                       atm_coef_file,
                       post_unc_file,
                       overwrite=overwrite,
                       use_empirical_line=use_empirical_line)
            logger.info("Inversion complete.")
    logger.info("Workflow complete!")
Beispiel #16
0
def segment_chunk(lstart,
                  lend,
                  in_file,
                  nodata_value,
                  npca,
                  segsize,
                  logfile=None,
                  loglevel='INFO'):
    """
    Segment a small chunk of the image

    Args:
        lstart: starting position in image file
        lend:  stopping position in image file
        in_file: file path to segment
        nodata_value: value to ignore
        npca:  number of pca components to use
        segsize: mean segmentation size
        logfile: logging file name
        loglevel: logging level

    Returns:
        lstart: starting position in image file
        lend: stopping position in image file
        labels: labeled image chunk

    """
    logging.basicConfig(format='%(levelname)s:%(message)s',
                        level=loglevel,
                        filename=logfile)

    logging.info(f'{lstart}: starting')

    in_img = envi.open(envi_header(in_file), in_file)
    meta = in_img.metadata
    nl, nb, ns = [int(meta[n]) for n in ('lines', 'bands', 'samples')]
    img_mm = in_img.open_memmap(interleave='bip', writable=False)

    # Do quick single-band screen before reading all bands
    use = np.logical_not(
        np.isclose(np.array(img_mm[lstart:lend, :, 0]), nodata_value))
    if np.sum(use) == 0:
        logging.info(f'{lstart}: no non null data present, returning early')
        return lstart, lend, np.zeros((use.shape[0], ns))

    x = np.array(img_mm[lstart:lend, :, :]).astype(np.float32)
    nc = x.shape[0]
    x = x.reshape((nc * ns, nb))
    logging.debug(f'{lstart}: read and reshaped data')

    # Excluding bad locations, calculate top PCA coefficients
    use = np.all(abs(x - nodata_value) > 1e-6, axis=1)

    # If this chunk is empty, return immediately
    if np.sum(use) == 0:
        logging.info(f'{lstart}: no non null data present, returning early')
        return lstart, lend, np.zeros((nc, ns))

    mu = x[use, :].mean(axis=0)
    C = np.cov(x[use, :], rowvar=False)
    [v, d] = scipy.linalg.eigh(C)

    # Determine segmentation compactness scaling based on eigenvalues
    # Override with a floor value to prevent zeros
    cmpct = scipy.linalg.norm(np.sqrt(v[-npca:]))
    if cmpct < 1e-6:
        cmpct = 10.0
        print('Compactness override: %f' % cmpct)

    # Project, redimension as an image with "npca" channels, and segment
    x_pca_subset = (x[use, :] - mu) @ d[:, -npca:]
    del x, mu, d
    x_pca = np.zeros((nc, ns, npca))
    x_pca[use.reshape(nc, ns), :] = x_pca_subset
    del x_pca_subset

    x_pca = x_pca.reshape([nc, ns, npca])
    seg_in_chunk = int(sum(use) / float(segsize))

    logging.debug(f'{lstart}: starting slic')
    labels = slic(x_pca,
                  n_segments=seg_in_chunk,
                  compactness=cmpct,
                  max_iter=10,
                  sigma=0,
                  multichannel=True,
                  enforce_connectivity=True,
                  min_size_factor=0.5,
                  max_size_factor=3,
                  mask=use.reshape(nc, ns))

    # Reindex the subscene labels and place them into the larger scene
    labels = labels.reshape([nc * ns])
    labels[np.logical_not(use)] = 0
    labels = labels.reshape([nc, ns])

    logging.info(f'{lstart}: completing')
    return lstart, lend, labels
Beispiel #17
0
def segment(spectra: tuple,
            nodata_value: float,
            npca: int,
            segsize: int,
            nchunk: int,
            n_cores: int = 1,
            ray_address: str = None,
            ray_redis_password: str = None,
            ray_temp_dir=None,
            ray_ip_head=None,
            logfile=None,
            loglevel='INFO'):
    """
    Segment an image using SLIC on a PCA.

    Args:
        spectra: tuple of filepaths of image to segment and (optionally) output label file
        nodata_value: data to ignore in radiance image
        npca: number of pca components to use
        segsize: mean segmentation size
        nchunk: size of each image chunk
        n_cores: number of cores to use
        ray_address: ray address to connect to (for multinode implementation)
        ray_redis_password: ray password to use (for multinode implementation)
        ray_temp_dir: ray temp directory to reference
        ray_ip_head: ray ip head to reference (for multinode use)
        logfile: logging file to output to
        loglevel: logging level to use

    """

    logging.basicConfig(format='%(levelname)s:%(message)s',
                        level=loglevel,
                        filename=logfile)

    in_file = spectra[0]
    if len(spectra) > 1 and type(spectra) is tuple:
        lbl_file = spectra[1]
    else:
        lbl_file = spectra + '_lbl'

    # Open input data, get dimensions
    in_img = envi.open(envi_header(in_file), in_file)
    meta = in_img.metadata
    nl, nb, ns = [int(meta[n]) for n in ('lines', 'bands', 'samples')]

    # Start up a ray instance for parallel work
    rayargs = {
        'ignore_reinit_error': True,
        'local_mode': n_cores == 1,
        "address": ray_address,
        '_temp_dir': ray_temp_dir,
        "_redis_password": ray_redis_password
    }

    # We can only set the num_cpus if running on a single-node
    if ray_ip_head is None and ray_redis_password is None:
        rayargs['num_cpus'] = n_cores

    ray.init(**rayargs)
    atexit.register(ray.shutdown)

    # Iterate through image "chunks," segmenting as we go
    all_labels = np.zeros((nl, ns), dtype=np.int64)
    jobs = []

    # Enforce a minimum chunk size to prevent singularities downstream
    # This could eventually be made a user-tunable parameter but this
    # value should work in all cases
    min_lines_per_chunk = 10
    for lstart in np.arange(0, nl - min_lines_per_chunk, nchunk):

        # Extend any chunk that falls within a small margin of the
        # end of the flightline
        lend = min(lstart + nchunk, nl)
        if lend > (nl - min_lines_per_chunk):
            lend = nl

        # Extract data
        jobs.append(
            segment_chunk.remote(lstart,
                                 lend,
                                 in_file,
                                 nodata_value,
                                 npca,
                                 segsize,
                                 logfile=logfile,
                                 loglevel=loglevel))

    # Collect results, making sure each chunk is distinct, and enforce an order
    next_label = 1
    rreturn = [ray.get(jid) for jid in jobs]
    for lstart, lend, ret in rreturn:
        if ret is not None:
            logging.debug(f'Collecting chunk: {lstart}')
            chunk_label = ret.copy()
            unique_chunk_labels = np.unique(chunk_label[chunk_label != 0])
            ordered_chunk_labels = np.zeros(chunk_label.shape)
            for lbl in unique_chunk_labels:
                ordered_chunk_labels[chunk_label == lbl] = next_label
                next_label += 1
            all_labels[lstart:lend, ...] = ordered_chunk_labels
    del rreturn
    ray.shutdown()

    # Final file I/O
    logging.debug('Writing output')
    lbl_meta = {
        "samples": str(ns),
        "lines": str(nl),
        "bands": "1",
        "header offset": "0",
        "file type": "ENVI Standard",
        "data type": "4",
        "interleave": "bil"
    }
    lbl_img = envi.create_image(envi_header(lbl_file),
                                lbl_meta,
                                ext='',
                                force=True)
    lbl_mm = lbl_img.open_memmap(interleave='source', writable=True)
    lbl_mm[:, :] = np.array(all_labels, dtype=np.float32).reshape((nl, 1, ns))
    del lbl_mm