def remap(inputfile, labels, outputfile, flag, chunksize): """.""" ref_file = inputfile lbl_file = labels out_file = outputfile nchunk = chunksize ref_img = envi.open(envi_header(ref_file), ref_file) ref_meta = ref_img.metadata ref_mm = ref_img.open_memmap(interleave='source', writable=False) ref = np.array(ref_mm[:, :]) lbl_img = envi.open(envi_header(lbl_file), lbl_file) lbl_meta = lbl_img.metadata labels = lbl_img.read_band(0) nl = int(lbl_meta['lines']) ns = int(lbl_meta['samples']) nb = int(ref_meta['bands']) out_meta = dict([(k, v) for k, v in ref_meta.items()]) out_meta["samples"] = ns out_meta["bands"] = nb out_meta["lines"] = nl out_meta['data type'] = ref_meta['data type'] out_meta["interleave"] = "bil" out_img = envi.create_image(envi_header(out_file), metadata=out_meta, ext='', force=True) out_mm = out_img.open_memmap(interleave='source', writable=True) # Iterate through image "chunks," restoring as we go for lstart in np.arange(0, nl, nchunk): print(lstart) del out_mm out_mm = out_img.open_memmap(interleave='source', writable=True) # Which labels will we extract? ignore zero index lend = min(lstart + nchunk, nl) lbl = labels[lstart:lend, :] out = flag * np.ones((lbl.shape[0], nb, lbl.shape[1])) for row in range(lbl.shape[0]): for col in range(lbl.shape[1]): out[row, :, col] = np.squeeze(ref[int(lbl[row, col]), :]) out_mm[lstart:lend, :, :] = out
def instrument_model(config): """.""" hdr_template = """ENVI samples = {samples} lines = {lines} bands = 1 header offset = 0 file type = ENVI Standard data type = 4 interleave = bsq byte order = 0 """ config = json_load_ascii(config, shell_replace=True) configdir, configfile = split(abspath(config)) infile = expand_path(configdir, config['input_radiance_file']) outfile = expand_path(configdir, config['output_model_file']) flatfile = expand_path(configdir, config['output_flatfield_file']) uniformity_thresh = float(config['uniformity_threshold']) infile_hdr = envi_header(infile) img = envi.open(infile_hdr, infile) inmm = img.open_memmap(interleave='bil', writable=False) X = np.array(inmm[:, :, :], dtype=np.float32) nr, nb, nc = X.shape FF, Xhoriz, Xhorizp, use_ff = _flat_field(X, uniformity_thresh) np.array(FF, dtype=np.float32).tofile(flatfile) with open(envi_header(flatfile), 'w') as fout: fout.write(hdr_template.format(lines=nb, samples=nc)) C, Xvert, Xvertp, use_C = _column_covariances(X, uniformity_thresh) cshape = (C.shape[0], C.shape[1]**2) out = np.array(C, dtype=np.float32).reshape(cshape) mdict = { 'columns': out.shape[0], 'bands': out.shape[1], 'covariances': out, 'Xvert': Xvert, 'Xhoriz': Xhoriz, 'Xvertp': Xvertp, 'Xhorizp': Xhorizp, 'use_ff': use_ff, 'use_C': use_C } scipy.io.savemat(outfile, mdict)
def flush_buffers(self): """Write to file, and refresh the memory map object.""" if self.format == 'ENVI': if self.write: for row, frame in self.frames.items(): valid = np.logical_not(np.isnan(frame[:, 0])) self.memmap[row, valid, :] = frame[valid, :] self.frames = OrderedDict() del self.file self.file = envi.open(envi_header(self.fname), self.fname) self.open_map_with_retries()
def sample_calibration_uncertainty(input_file: pathlib.Path, output_file: pathlib.Path, cov_l: np.ndarray, cov_wl: np.ndarray, rad_wl: np.ndarray, bias_scale=1.0): input_file_hdr = envi_header(str(input_file)) output_file_hdr = envi_header(str(output_file)) shutil.copy(input_file, output_file) shutil.copy(input_file_hdr, output_file_hdr) img = sp.open_image(str(output_file_hdr)) img_m = img.open_memmap(writable=True) # Here, we assume that the calibration bias is constant across the entire # image (i.e., the same bias is added to all pixels). z = np.random.normal(size=cov_l.shape[0], scale=bias_scale) Az = 1.0 + cov_l @ z # Resample the added noise vector to match the wavelengths of the target # image. Az_resampled = interp1d(cov_wl, Az, fill_value="extrapolate")(rad_wl) img_m *= Az_resampled return output_file
def extract_chunk(lstart: int, lend: int, in_file: str, labels: np.array, flag: float, logfile=None, loglevel='INFO'): """ Extract a small chunk of the image Args: lstart: line to start extraction at lend: line to end extraction at in_file: file to read image from labels: labels to use for data read flag: nodata value of image logfile: logging file name loglevel: logging level Returns: out_index: array of output indices (based on labels) out_data: array of output data """ logging.basicConfig(format='%(levelname)s:%(message)s', level=loglevel, filename=logfile) logging.info(f'{lstart}: starting') in_img = envi.open(envi_header(in_file)) img_mm = in_img.open_memmap(interleave='bip', writable=False) # Which labels will we extract? ignore zero index active = labels[lstart:lend, :] active = active[active >= 1] active = np.unique(active) logging.debug(f'{lstart}: found {len(active)} unique labels') if len(active) == 0: return None, None # Handle labels extending outside our chunk by expanding margins cs = lend - lstart boundary_min = max(lstart - cs, 0) boundary_max = min(lend + cs, labels.shape[0]) active_area = np.zeros((boundary_max - boundary_min, labels.shape[1])) for i in active: active_area[labels[boundary_min:boundary_max, :] == i] = True active_locs = np.where(active_area) lstart_adjust = min(active_locs[0]) + boundary_min lend_adjust = max(active_locs[0]) + boundary_min + 1 cstart_adjust = min(active_locs[1]) cend_adjust = max(active_locs[1]) + 1 logging.debug( f'{lstart} area subset: {lstart_adjust}, {lend_adjust} :::: {cstart_adjust}, {cend_adjust}' ) chunk_lbl = np.array(labels[lstart_adjust:lend_adjust, cstart_adjust:cend_adjust]) chunk_inp = np.array(img_mm[lstart_adjust:lend_adjust, cstart_adjust:cend_adjust, :]) out_data = np.zeros((len(active), img_mm.shape[-1])) + flag logging.debug(f'{lstart}: running extraction from local array') for _lab, lab in enumerate(active): out_data[_lab, :] = 0 locs = np.where(chunk_lbl == lab) for row, col in zip(locs[0], locs[1]): out_data[_lab, :] += np.squeeze(chunk_inp[row, col, :]) out_data[_lab, :] /= float(len(locs[0])) unique_labels = np.unique(labels) unique_labels = unique_labels[unique_labels >= 1] if unique_labels[0] != 0: unique_labels = np.hstack([np.zeros(1), unique_labels]) match_idx = np.searchsorted(unique_labels, active) out_data[np.logical_not(np.isfinite(out_data))] = flag logging.debug(f'{lstart}: complete') return match_idx, out_data
import matplotlib.pyplot as plt from isofit.core.common import envi_header assert len(sys.argv) > 1, "Please specify a JSON config file." configfile = sys.argv[1] with open(configfile, "r") as f: config = json.load(f) outdir = Path(config["outdir"]) reflfiles = list(outdir.glob("**/estimated-reflectance")) assert len(reflfiles) > 0, f"No reflectance files found in directory {outdir}" true_refl_file = Path(config["reflectance_file"]).expanduser() true_reflectance = sp.open_image(envi_header(str(true_refl_file))) true_waves = np.array(true_reflectance.metadata["wavelength"], dtype=float) true_refl_m = true_reflectance.open_memmap() windows = config["isofit"]["implementation"]["inversion"]["windows"] def parse_dir(ddir): grps = {"directory": [str(ddir)]} for key in ["atm", "noise", "prior", "inversion"]: pat = f".*{key}_(.+?)" + r"(__|/|\Z)" match = re.match(pat, str(ddir)) if match is not None: match = match.group(1) grps[key] = [match] for key in ["szen", "ozen", "zen", "saz", "oaz", "az",
def _run_chunk(start_line: int, stop_line: int, reference_radiance_file: str, reference_atm_file: str, reference_locations_file: str, input_radiance_file: str, input_locations_file: str, segmentation_file: str, isofit_config: dict, output_reflectance_file: str, output_uncertainty_file: str, radiance_factors: np.array, nneighbors: int, nodata_value: float) -> None: """ Args: start_line: line to start empirical line run at stop_line: line to stop empirical line run at reference_radiance_file: source file for radiance (interpolation built from this) reference_atm_file: source file for atmosphere coefficients (interpolation built from this) reference_locations_file: source file for file locations (lon, lat, elev), (interpolation built from this) input_radiance_file: input radiance file (interpolate over this) input_locations_file: input location file (interpolate over this) segmentation_file: input file noting the per-pixel segmentation used isofit_config: dictionary-stype isofit configuration output_reflectance_file: location to write output reflectance to output_uncertainty_file: location to write output uncertainty to radiance_factors: radiance adjustment factors nneighbors: number of neighbors to use for interpolation nodata_value: nodata value of input and output Returns: None """ # Load reference images reference_radiance_img = envi.open(envi_header(reference_radiance_file), reference_radiance_file) reference_atm_img = envi.open(envi_header(reference_atm_file), reference_atm_file) reference_locations_img = envi.open(envi_header(reference_locations_file), reference_locations_file) n_reference_lines, n_radiance_bands, n_reference_columns = [ int(reference_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] # Load input images input_radiance_img = envi.open(envi_header(input_radiance_file), input_radiance_file) n_input_lines, n_input_bands, n_input_samples = [ int(input_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] wl = np.array( [float(w) for w in input_radiance_img.metadata['wavelength']]) input_locations_img = envi.open(envi_header(input_locations_file), input_locations_file) n_location_bands = int(input_locations_img.metadata['bands']) # Load output images output_reflectance_img = envi.open(envi_header(output_reflectance_file), output_reflectance_file) output_uncertainty_img = envi.open(envi_header(output_uncertainty_file), output_uncertainty_file) n_output_reflectance_bands = int(output_reflectance_img.metadata['bands']) n_output_uncertainty_bands = int(output_uncertainty_img.metadata['bands']) # Load reference data reference_locations_mm = reference_locations_img.open_memmap( interleave='source', writable=False) reference_locations = np.array(reference_locations_mm[:, :, :]).reshape( (n_reference_lines, n_location_bands)) reference_radiance_mm = reference_radiance_img.open_memmap( interleave='source', writable=False) reference_radiance = np.array(reference_radiance_mm[:, :, :]).reshape( (n_reference_lines, n_radiance_bands)) reference_atm_mm = reference_atm_img.open_memmap(interleave='source', writable=False) reference_atm = np.array(reference_atm_mm[:, :, :]).reshape( (n_reference_lines, n_radiance_bands * 5)) rhoatm = reference_atm[:, :n_radiance_bands] sphalb = reference_atm[:, n_radiance_bands:(n_radiance_bands * 2)] transm = reference_atm[:, (n_radiance_bands * 2):(n_radiance_bands * 3)] solirr = reference_atm[:, (n_radiance_bands * 3):(n_radiance_bands * 4)] coszen = reference_atm[:, (n_radiance_bands * 4):(n_radiance_bands * 5)] # Load segmentation data if segmentation_file: segmentation_img = envi.open(envi_header(segmentation_file), segmentation_file) segmentation_img = segmentation_img.read_band(0) else: segmentation_img = None # Prepare instrument model, if available if isofit_config is not None: config = configs.create_new_config(isofit_config) instrument = Instrument(config) logging.info('Loading instrument') else: instrument = None # Load radiance factors if radiance_factors is None: radiance_adjustment = np.ones(n_radiance_bands, ) else: radiance_adjustment = np.loadtxt(radiance_factors) # PCA coefficients rdn_pca = PCA(n_components=2) reference_pca = rdn_pca.fit_transform(reference_radiance * radiance_adjustment) # Create the tree to find nearest neighbor segments. # Assume (heuristically) that, for distance purposes, 1 m vertically is # comparable to 10 m horizontally, and that there are 100 km per latitude # degree. This is all approximate of course. Elevation appears in the # Third element, and the first two are latitude/longitude coordinates # The fourth and fifth elements are "spectral distance" determined by the # top principal component coefficients loc_scaling = np.array([1e5, 1e5, 10, 100, 100]) scaled_ref_loc = np.concatenate( (reference_locations, reference_pca), axis=1) * loc_scaling tree = KDTree(scaled_ref_loc) # Fit GP parameters on transmissivity of an H2O feature, in the # first 400 datapoints use = np.arange(min(len(rhoatm), 400)) h2oband = np.argmin(abs(wl - 940)) scale = (500, 500, 500, 500, 500) bounds = ((100, 2000), (100, 2000), (100, 2000), (100, 2000), (100, 2000)) kernel = RBF(length_scale=scale, length_scale_bounds=bounds) +\ WhiteKernel(noise_level=0.01, noise_level_bounds=(1e-10, 0.1)) gp = GaussianProcessRegressor(kernel=kernel, alpha=0.0, normalize_y=True) gp = gp.fit(scaled_ref_loc[use, :], transm[use, h2oband]) kernel = gp.kernel_ # Iterate through image. Each segment has its own GP, stored in a # hash table indexed by location in the segmentation map hash_table = {} for row in np.arange(start_line, stop_line): # Load inline input data input_radiance_mm = input_radiance_img.open_memmap(interleave='source', writable=False) input_radiance = np.array(input_radiance_mm[row, :, :]) if input_radiance_img.metadata['interleave'] == 'bil': input_radiance = input_radiance.transpose((1, 0)) input_radiance = input_radiance * radiance_adjustment input_locations_mm = input_locations_img.open_memmap( interleave='source', writable=False) input_locations = np.array(input_locations_mm[row, :, :]) if input_locations_img.metadata['interleave'] == 'bil': input_locations = input_locations.transpose((1, 0)) output_reflectance_row = np.zeros(input_radiance.shape) + nodata_value output_uncertainty_row = np.zeros(input_radiance.shape) + nodata_value nspectra, start = 0, time.time() for col in np.arange(n_input_samples): # Get radiance, pca coordinates, physical location for this datum my_rdn = input_radiance[col, :] my_pca = rdn_pca.transform(my_rdn[np.newaxis, :]) my_loc = np.r_[input_locations[col, :], my_pca[0, :]] * loc_scaling if np.all(np.isclose(my_rdn, nodata_value)): output_reflectance_row[col, :] = nodata_value output_uncertainty_row[col, :] = nodata_value continue # Retrieve or build the GP gp_rhoatm, gp_sphalb, gp_transm, irr = None, None, None, None hash_idx = segmentation_img[row, col] if hash_idx in hash_table: gp_rhoatm, gp_sphalb, gp_transm, irr = hash_table[hash_idx] else: # There is no GP for this segment, so we build one from # the atmospheric coefficients from closest neighbors dists, nn = tree.query(my_loc, nneighbors) neighbor_rhoatm = rhoatm[nn, :] neighbor_transm = transm[nn, :] neighbor_sphalb = sphalb[nn, :] neighbor_coszen = coszen[nn, :] neighbor_solirr = solirr[nn, :] neighbor_locs = scaled_ref_loc[nn, :] # Create a new GP using the optimized parameters as a fixed kernel gp_rhoatm = GaussianProcessRegressor(kernel=kernel, alpha=0.0, normalize_y=True, optimizer=None) gp_rhoatm.fit(neighbor_locs, neighbor_rhoatm) gp_sphalb = GaussianProcessRegressor(kernel=kernel, alpha=0.0, normalize_y=True, optimizer=None) gp_sphalb.fit(neighbor_locs, neighbor_sphalb) gp_transm = GaussianProcessRegressor(kernel=kernel, alpha=0.0, normalize_y=True, optimizer=None) gp_transm.fit(neighbor_locs, neighbor_transm) irr = solirr[1, :] * coszen[1, :] irr[irr < 1e-8] = 1e-8 hash_table[hash_idx] = (gp_rhoatm, gp_sphalb, gp_transm, irr) my_rhoatm = gp_rhoatm.predict(my_loc[np.newaxis, :]) my_sphalb = gp_sphalb.predict(my_loc[np.newaxis, :]) my_transm = gp_transm.predict(my_loc[np.newaxis, :]) my_rho = (my_rdn * np.pi) / irr my_rfl = 1.0 / (my_transm / (my_rho - my_rhoatm) + my_sphalb) output_reflectance_row[col, :] = my_rfl # Calculate uncertainties. Sy approximation rather than Seps for # speed, for now... but we do take into account instrument # radiometric uncertainties #output_uncertainty_row[col, :] = np.zeros() #if instrument is None: #else: # Sy = instrument.Sy(x, geom=None) # calunc = instrument.bval[:instrument.n_chan] # output_uncertainty_row[col, :] = np.sqrt( # np.diag(Sy) + pow(calunc * x, 2)) * bhat[:, 1] # if loglevel == 'DEBUG': # plot_example(xv, yv, bhat) nspectra = nspectra + 1 elapsed = float(time.time() - start) logging.info('row {}/{}, ({}/{} local), {} spectra per second'.format( row, n_input_lines, int(row - start_line), int(stop_line - start_line), round(float(nspectra) / elapsed, 2))) del input_locations_mm del input_radiance_mm output_reflectance_row = output_reflectance_row.transpose((1, 0)) output_uncertainty_row = output_uncertainty_row.transpose((1, 0)) shp = output_reflectance_row.shape output_reflectance_row = output_reflectance_row.reshape( (1, shp[0], shp[1])) shp = output_uncertainty_row.shape output_uncertainty_row = output_uncertainty_row.reshape( (1, shp[0], shp[1])) _write_bil_chunk( output_reflectance_row, output_reflectance_file, row, (n_input_lines, n_output_reflectance_bands, n_input_samples)) _write_bil_chunk( output_uncertainty_row, output_uncertainty_file, row, (n_input_lines, n_output_uncertainty_bands, n_input_samples))
def extractions(inputfile, labels, output, chunksize, flag, n_cores: int = 1, ray_address: str = None, ray_redis_password: str = None, ray_temp_dir: str = None, ray_ip_head=None, logfile: str = None, loglevel: str = 'INFO'): """...""" in_file = inputfile lbl_file = labels out_file = output nchunk = chunksize dtm = {'4': np.float32, '5': np.float64} # Open input data, get dimensions in_img = envi.open(envi_header(in_file), in_file) meta = in_img.metadata nl, nb, ns = [int(meta[n]) for n in ('lines', 'bands', 'samples')] img_mm = in_img.open_memmap(interleave='bip', writable=False) lbl_img = envi.open(envi_header(lbl_file), lbl_file) labels = lbl_img.read_band(0) un_labels = np.unique(labels).tolist() if 0 not in un_labels: un_labels.insert(0, 0) nout = len(un_labels) # Start up a ray instance for parallel work rayargs = { 'ignore_reinit_error': True, 'local_mode': n_cores == 1, "address": ray_address, "_redis_password": ray_redis_password } if rayargs['local_mode']: rayargs['_temp_dir'] = ray_temp_dir # Used to run on a VPN ray.services.get_node_ip_address = lambda: '127.0.0.1' # We can only set the num_cpus if running on a single-node if ray_ip_head is None and ray_redis_password is None: rayargs['num_cpus'] = n_cores ray.init(**rayargs) atexit.register(ray.shutdown) labelid = ray.put(labels) jobs = [] for lstart in np.arange(0, nl, nchunk): lend = min(lstart + nchunk, nl) jobs.append( extract_chunk.remote(lstart, lend, in_file, labelid, flag, logfile=logfile, loglevel=loglevel)) # Collect results rreturn = [ray.get(jid) for jid in jobs] ## Iterate through image "chunks," segmenting as we go out = np.zeros((nout, nb, 1)) for idx, ret in rreturn: if ret is not None: out[idx, :, 0] = ret del rreturn ray.shutdown() meta["lines"] = str(nout) meta["bands"] = str(nb) meta["samples"] = '1' meta["interleave"] = "bil" out_img = envi.create_image(envi_header(out_file), metadata=meta, ext='', force=True) del out_img if dtm[meta['data type']] == np.float32: type = 'float32' else: type = 'float64' write_bil_chunk(out, out_file, 0, out.shape, dtype=type)
def surface_model(config_path: str, wavelength_path: str = None, output_path: str = None) -> None: """The surface model tool contains everything you need to build basic multicomponent (i.e. colleciton of Gaussian) surface priors for the multicomponent surface model. Args: config_path: path to a JSON formatted surface model configuration wavelength_path: optional path to a three-column wavelength file, overriding the configuration file settings output_path: optional path to the destination .mat file, overriding the configuration file settings Returns: None """ # Load configuration JSON into a local dictionary configdir, _ = os.path.split(os.path.abspath(config_path)) config = json_load_ascii(config_path, shell_replace=True) # Determine top level parameters for q in ['output_model_file', 'sources', 'normalize', 'wavelength_file']: if q not in config: raise ValueError("Missing parameter: %s" % q) if wavelength_path is not None: wavelength_file = wavelength_path else: wavelength_file = expand_path(configdir, config['wavelength_file']) if output_path is not None: outfile = output_path else: outfile = expand_path(configdir, config['output_model_file']) normalize = config['normalize'] reference_windows = config['reference_windows'] # load wavelengths file, and change units to nm if needed q = np.loadtxt(wavelength_file) if q.shape[1] > 2: q = q[:, 1:] if q[0, 0] < 100: q = q * 1000.0 wl = q[:, 0] nchan = len(wl) # build global reference windows refwl = [] for wi, window in enumerate(reference_windows): active_wl = np.logical_and(wl >= window[0], wl < window[1]) refwl.extend(wl[active_wl]) normind = np.array([np.argmin(abs(wl - w)) for w in refwl]) refwl = np.array(refwl, dtype=float) # create basic model template model = { 'normalize': normalize, 'wl': wl, 'means': [], 'covs': [], 'attribute_means': [], 'attribute_covs': [], 'attributes': [], 'refwl': refwl } # each "source" (i.e. spectral library) is treated separately for si, source_config in enumerate(config['sources']): # Determine source parameters for q in [ 'input_spectrum_files', 'windows', 'n_components', 'windows' ]: if q not in source_config: raise ValueError('Source %i is missing a parameter: %s' % (si, q)) # Determine whether we should synthesize our own mixtures if 'mixtures' in source_config: mixtures = source_config['mixtures'] elif 'mixtures' in config: mixtures = config['mixtures'] else: mixtures = 0 # open input files associated with this source infiles = [ expand_path(configdir, fi) for fi in source_config['input_spectrum_files'] ] # associate attributes, if they exist. These will not be used # in the retrieval, but can be used in post-analysis if 'input_attribute_files' in source_config: infiles_attributes = [ expand_path(configdir, fi) for fi in source_config['input_attribute_files'] ] if len(infiles_attributes) != len(infiles): raise IndexError('spectrum / attribute file mismatch') else: infiles_attributes = [ None for fi in source_config['input_spectrum_files'] ] ncomp = int(source_config['n_components']) windows = source_config['windows'] # load spectra spectra, attributes = [], [] for infile, attribute_file in zip(infiles, infiles_attributes): rfl = envi.open(envi_header(infile), infile) nl, nb, ns = [ int(rfl.metadata[n]) for n in ('lines', 'bands', 'samples') ] swl = np.array([float(f) for f in rfl.metadata['wavelength']]) # Maybe convert to nanometers if swl[0] < 100: swl = swl * 1000.0 # Load library and adjust interleave, if needed rfl_mm = rfl.open_memmap(interleave='bip', writable=False) x = np.array(rfl_mm[:, :, :]) x = x.reshape(nl * ns, nb) # import spectra and resample for x1 in x: p = scipy.interpolate.interp1d(swl, x1, kind='linear', bounds_error=False, fill_value='extrapolate') spectra.append(p(wl)) # Load attributes if attribute_file is not None: attr = envi.open(envi_header(attribute_file), attribute_file) nla, nba, nsa = [ int(attr.metadata[n]) for n in ('lines', 'bands', 'samples') ] # Load library and adjust interleave, if needed attr_mm = attr.open_memmap(interleave='bip', writable=False) x = np.array(attr_mm[:, :, :]) x = x.reshape(nla * nsa, nba) model['attributes'] = attr.metadata['band names'] # import spectra and resample for x1 in x: attributes.append(x1) if len(attributes) > 0 and len(attributes) != len(spectra): raise IndexError('Mismatch in number of spectra vs. attributes') # calculate mixtures, if needed if len(attributes) > 0 and mixtures > 0: raise ValueError('Synthetic mixtures w/ attributes is not advised') n = float(len(spectra)) nmix = int(n * mixtures) for mi in range(nmix): s1, m1 = spectra[int(np.random.rand() * n)], np.random.rand() s2, m2 = spectra[int(np.random.rand() * n)], 1.0 - m1 spectra.append(m1 * s1 + m2 * s2) # Lists to arrays spectra = np.array(spectra) attributes = np.array(attributes) # Flag bad data use = np.all(np.isfinite(spectra), axis=1) spectra = spectra[use, :] if len(attributes) > 0: attributes = attributes[use, :] # Accumulate total list of window indices window_idx = -np.ones((nchan), dtype=int) for wi, win in enumerate(windows): active_wl = np.logical_and(wl >= win['interval'][0], wl < win['interval'][1]) window_idx[active_wl] = wi # Two step model generation. First step is k-means clustering. # This is more "stable" than Expectation Maximization with an # unconstrained covariance matrix kmeans = KMeans(init='k-means++', n_clusters=ncomp, n_init=10) kmeans.fit(spectra) Z = kmeans.predict(spectra) # Build a combined dataset of attributes and spectra if len(attributes) > 0: spectra_attr = np.concatenate((spectra, attributes), axis=1) # Now fit the full covariance for each component for ci in range(ncomp): m = np.mean(spectra[Z == ci, :], axis=0) C = np.cov(spectra[Z == ci, :], rowvar=False) if len(attributes) > 0: m_attr = np.mean(spectra_attr[Z == ci, :], axis=0) C_attr = np.cov(spectra_attr[Z == ci, :], rowvar=False) for i in range(nchan): window = windows[window_idx[i]] # Each spectral interval, or window, is constructed # using one of several rules. We can draw the covariance # directly from the data... if window['correlation'] == 'EM': C[i, i] = C[i, i] + float(window['regularizer']) # Alternatively, we can use a band diagonal form, # a Gaussian process that promotes local smoothness. elif window['correlation'] == 'GP': width = float(window['gp_width']) magnitude = float(window['gp_magnitude']) kernel = scipy.stats.norm.pdf((wl - wl[i]) / width) kernel = kernel / kernel.sum() * magnitude C[i, :] = kernel C[:, i] = kernel C[i, i] = C[i, i] + float(window['regularizer']) # To minimize bias, leave the channels independent # and uncorrelated elif window['correlation'] == 'decorrelated': ci = C[i, i] C[:, i] = 0 C[i, :] = 0 C[i, i] = ci + float(window['regularizer']) else: raise ValueError('I do not recognize the method ' + window['correlation']) # Normalize the component spectrum if desired if normalize == 'Euclidean': z = np.sqrt(np.sum(pow(m[normind], 2))) elif normalize == 'RMS': z = np.sqrt(np.mean(pow(m[normind], 2))) elif normalize == 'None': z = 1.0 else: raise ValueError('Unrecognized normalization: %s\n' % normalize) m = m / z C = C / (z**2) model['means'].append(m) model['covs'].append(C) if len(attributes) > 0: model['attribute_means'].append(m_attr) model['attribute_covs'].append(C_attr) model['means'] = np.array(model['means']) model['covs'] = np.array(model['covs']) model['attribute_means'] = np.array(model['attribute_means']) model['attribute_covs'] = np.array(model['attribute_covs']) scipy.io.savemat(outfile, model)
def interpolate_atmosphere(reference_radiance_file: str, reference_atm_file: str, reference_locations_file: str, segmentation_file: str, input_radiance_file: str, input_locations_file: str, output_reflectance_file: str, output_uncertainty_file: str, nneighbors: int = 15, nodata_value: float = -9999.0, level: str = 'INFO', radiance_factors: np.array = None, isofit_config: dict = None, n_cores: int = -1) -> None: """ Perform a Gaussian process interpolation of atmospheric parameters. It relies on precalculated atmospheric coefficients at a subset of spatial locations stored in a file. The file has each coefficient defined for every radiance channel, appearing in the order: (1) atmospheric path reflectance; (2) spherical sky albedo; (3) total diffuse and direct transmittance of the two-part downwelling and upwelling path; (4) extraterrestrial solar irradiance; (5) cosine of solar zenith angle. Args: reference_radiance_file: source file for radiance (interpolation built from this) reference_atm_file: source file for atmospheric coefficients (interpolation from this) reference_locations_file: source file for file locations (lon, lat, elev), (interpolation from this) segmentation_file: input file noting the per-pixel segmentation used input_radiance_file: input radiance file (interpolate over this) input_locations_file: input location file (interpolate over this) output_reflectance_file: location to write output reflectance output_uncertainty_file: location to write output uncertainty nneighbors: number of neighbors to use for interpolation nodata_value: nodata value of input and output level: logging level radiance_factors: radiance adjustment factors isofit_config: dictionary-stype isofit configuration n_cores: number of cores to run on Returns: None """ loglevel = level logging.basicConfig(format='%(message)s', level=loglevel) # Open input data to check that band formatting is correct # Load reference set radiance reference_radiance_img = envi.open(envi_header(reference_radiance_file), reference_radiance_file) n_reference_lines, n_radiance_bands, n_reference_columns = [ int(reference_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if n_reference_columns != 1: raise IndexError("Reference data should be a single-column list") # Load reference set atmospheric coefficients reference_atm_img = envi.open(envi_header(reference_atm_file), reference_atm_file) nrefa, nba, srefa = [ int(reference_atm_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if nrefa != n_reference_lines or srefa != n_reference_columns: raise IndexError("Reference file dimension mismatch (atmosphere)") if nba != (n_radiance_bands * 5): raise IndexError( "Reference atmosphere file has incorrect dimensioning") # Load reference set locations reference_locations_img = envi.open(envi_header(reference_locations_file), reference_locations_file) nrefl, lb, ls = [ int(reference_locations_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if nrefl != n_reference_lines or lb != 3: raise IndexError("Reference file dimension mismatch (locations)") input_radiance_img = envi.open(envi_header(input_radiance_file), input_radiance_file) n_input_lines, n_input_bands, n_input_samples = [ int(input_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if n_radiance_bands != n_input_bands: msg = 'Number of channels mismatch: input (%i) vs. reference (%i)' raise IndexError(msg % (n_input_bands, n_radiance_bands)) input_locations_img = envi.open(envi_header(input_locations_file), input_locations_file) nll, nlb, nls = [ int(input_locations_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if nll != n_input_lines or nlb != 3 or nls != n_input_samples: raise IndexError('Input location dimension mismatch') # Create output files output_metadata = input_radiance_img.metadata output_metadata['interleave'] = 'bil' output_reflectance_img = envi.create_image( envi_header(output_reflectance_file), ext='', metadata=output_metadata, force=True) output_uncertainty_img = envi.create_image( envi_header(output_uncertainty_file), ext='', metadata=output_metadata, force=True) # Now cleanup inputs and outputs, we'll write dynamically above del output_reflectance_img, output_uncertainty_img del reference_atm_img, reference_locations_img, input_radiance_img, input_locations_img # Determine the number of cores to use if n_cores == -1: n_cores = multiprocessing.cpu_count() n_cores = min(n_cores, n_input_lines) # Break data into sections line_sections = np.linspace(0, n_input_lines, num=n_cores + 1, dtype=int) # Set up our pool pool = multiprocessing.Pool(processes=n_cores) start_time = time.time() logging.info( 'Beginning atmospheric interpolation inversions using {} cores'.format( n_cores)) # Run the pool (or run serially) results = [] for l in range(len(line_sections) - 1): args = ( line_sections[l], line_sections[l + 1], reference_radiance_file, reference_atm_file, reference_locations_file, input_radiance_file, input_locations_file, segmentation_file, isofit_config, output_reflectance_file, output_uncertainty_file, radiance_factors, nneighbors, nodata_value, ) if n_cores != 1: results.append(pool.apply_async(_run_chunk, args)) else: _run_chunk(*args) pool.close() pool.join() total_time = time.time() - start_time logging.info( 'Parallel empirical line inversions complete. {} s total, {} spectra/s, {} spectra/s/core' .format(total_time, line_sections[-1] * n_input_samples / total_time, line_sections[-1] * n_input_samples / total_time / n_cores))
def empirical_line(reference_radiance_file: str, reference_reflectance_file: str, reference_uncertainty_file: str, reference_locations_file: str, segmentation_file: str, input_radiance_file: str, input_locations_file: str, output_reflectance_file: str, output_uncertainty_file: str, nneighbors: int = 400, nodata_value: float = -9999.0, level: str = 'INFO', logfile: str = None, radiance_factors: np.array = None, isofit_config: str = None, n_cores: int = -1) -> None: """ Perform an empirical line interpolation for reflectance and uncertainty extrapolation Args: reference_radiance_file: source file for radiance (interpolation built from this) reference_reflectance_file: source file for reflectance (interpolation built from this) reference_uncertainty_file: source file for uncertainty (interpolation built from this) reference_locations_file: source file for file locations (lon, lat, elev), (interpolation built from this) segmentation_file: input file noting the per-pixel segmentation used input_radiance_file: input radiance file (interpolate over this) input_locations_file: input location file (interpolate over this) output_reflectance_file: location to write output reflectance to output_uncertainty_file: location to write output uncertainty to nneighbors: number of neighbors to use for interpolation nodata_value: nodata value of input and output level: logging level logfile: logging file radiance_factors: radiance adjustment factors isofit_config: path to isofit configuration JSON file n_cores: number of cores to run on Returns: None """ loglevel = level logging.basicConfig(format='%(message)s', level=loglevel, filename=logfile) # Open input data to check that band formatting is correct # Load reference set radiance reference_radiance_img = envi.open(envi_header(reference_radiance_file), reference_radiance_file) n_reference_lines, n_radiance_bands, n_reference_columns = [ int(reference_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if n_reference_columns != 1: raise IndexError("Reference data should be a single-column list") # Load reference set reflectance reference_reflectance_img = envi.open( envi_header(reference_reflectance_file), reference_reflectance_file) nrefr, nbr, srefr = [ int(reference_reflectance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if nrefr != n_reference_lines or nbr != n_radiance_bands or srefr != n_reference_columns: raise IndexError("Reference file dimension mismatch (reflectance)") # Load reference set uncertainty, assuming reflectance uncertainty is # recoreded in the first n_radiance_bands channels of data reference_uncertainty_img = envi.open( envi_header(reference_uncertainty_file), reference_uncertainty_file) nrefu, ns, srefu = [ int(reference_uncertainty_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if nrefu != n_reference_lines or ns < n_radiance_bands or srefu != n_reference_columns: raise IndexError("Reference file dimension mismatch (uncertainty)") # Load reference set locations reference_locations_img = envi.open(envi_header(reference_locations_file), reference_locations_file) nrefl, lb, ls = [ int(reference_locations_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if nrefl != n_reference_lines or lb != 3: raise IndexError("Reference file dimension mismatch (locations)") input_radiance_img = envi.open(envi_header(input_radiance_file), input_radiance_file) n_input_lines, n_input_bands, n_input_samples = [ int(input_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if n_radiance_bands != n_input_bands: msg = 'Number of channels mismatch: input (%i) vs. reference (%i)' raise IndexError(msg % (nbr, n_radiance_bands)) input_locations_img = envi.open(envi_header(input_locations_file), input_locations_file) nll, nlb, nls = [ int(input_locations_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] if nll != n_input_lines or nlb != 3 or nls != n_input_samples: raise IndexError('Input location dimension mismatch') # Create output files output_metadata = input_radiance_img.metadata output_metadata['interleave'] = 'bil' output_reflectance_img = envi.create_image( envi_header(output_reflectance_file), ext='', metadata=output_metadata, force=True) output_uncertainty_img = envi.create_image( envi_header(output_uncertainty_file), ext='', metadata=output_metadata, force=True) # Now cleanup inputs and outputs, we'll write dynamically above del output_reflectance_img, output_uncertainty_img del reference_reflectance_img, reference_uncertainty_img, reference_locations_img, input_radiance_img, input_locations_img # Initialize ray cluster start_time = time.time() if isofit_config is not None: iconfig = configs.create_new_config(isofit_config) else: # If none, create a temporary config to get default ray parameters iconfig = configs.Config({}) if n_cores == -1: n_cores = iconfig.implementation.n_cores rayargs = { 'ignore_reinit_error': iconfig.implementation.ray_ignore_reinit_error, 'local_mode': n_cores == 1, "address": iconfig.implementation.ip_head, '_temp_dir': iconfig.implementation.ray_temp_dir, "_redis_password": iconfig.implementation.redis_password } # We can only set the num_cpus if running on a single-node if iconfig.implementation.ip_head is None and iconfig.implementation.redis_password is None: rayargs['num_cpus'] = n_cores ray.init(**rayargs) atexit.register(ray.shutdown) n_ray_cores = ray.available_resources()["CPU"] n_cores = min(n_ray_cores, n_input_lines) logging.info( 'Beginning empirical line inversions using {} cores'.format(n_cores)) # Break data into sections line_sections = np.linspace(0, n_input_lines, num=int(n_cores + 1), dtype=int) start_time = time.time() # Run the pool (or run serially) results = [] for l in range(len(line_sections) - 1): args = (line_sections[l], line_sections[l + 1], reference_radiance_file, reference_reflectance_file, reference_uncertainty_file, reference_locations_file, input_radiance_file, input_locations_file, segmentation_file, isofit_config, output_reflectance_file, output_uncertainty_file, radiance_factors, nneighbors, nodata_value, level, logfile) results.append(_run_chunk.remote(*args)) _ = ray.get(results) total_time = time.time() - start_time logging.info( 'Parallel empirical line inversions complete. {} s total, {} spectra/s, {} spectra/s/core' .format(total_time, line_sections[-1] * n_input_samples / total_time, line_sections[-1] * n_input_samples / total_time / n_cores))
def _run_chunk(start_line: int, stop_line: int, reference_radiance_file: str, reference_reflectance_file: str, reference_uncertainty_file: str, reference_locations_file: str, input_radiance_file: str, input_locations_file: str, segmentation_file: str, isofit_config: str, output_reflectance_file: str, output_uncertainty_file: str, radiance_factors: np.array, nneighbors: int, nodata_value: float, loglevel: str, logfile: str) -> None: """ Args: start_line: line to start empirical line run at stop_line: line to stop empirical line run at reference_radiance_file: source file for radiance (interpolation built from this) reference_reflectance_file: source file for reflectance (interpolation built from this) reference_uncertainty_file: source file for uncertainty (interpolation built from this) reference_locations_file: source file for file locations (lon, lat, elev), (interpolation built from this) input_radiance_file: input radiance file (interpolate over this) input_locations_file: input location file (interpolate over this) segmentation_file: input file noting the per-pixel segmentation used isofit_config: path to isofit configuration JSON file output_reflectance_file: location to write output reflectance to output_uncertainty_file: location to write output uncertainty to radiance_factors: radiance adjustment factors nneighbors: number of neighbors to use for interpolation nodata_value: nodata value of input and output loglevel: logging level logfile: logging file Returns: None """ logging.basicConfig(format='%(message)s', level=loglevel, filename=logfile) # Load reference images reference_radiance_img = envi.open(envi_header(reference_radiance_file), reference_radiance_file) reference_reflectance_img = envi.open( envi_header(reference_reflectance_file), reference_reflectance_file) reference_uncertainty_img = envi.open( envi_header(reference_uncertainty_file), reference_uncertainty_file) reference_locations_img = envi.open(envi_header(reference_locations_file), reference_locations_file) n_reference_lines, n_radiance_bands, n_reference_columns = [ int(reference_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] n_reference_uncertainty_bands = int( reference_uncertainty_img.metadata['bands']) # Load input images input_radiance_img = envi.open(envi_header(input_radiance_file), input_radiance_file) n_input_lines, n_input_bands, n_input_samples = [ int(input_radiance_img.metadata[n]) for n in ('lines', 'bands', 'samples') ] input_locations_img = envi.open(envi_header(input_locations_file), input_locations_file) n_location_bands = int(input_locations_img.metadata['bands']) # Load output images output_reflectance_img = envi.open(envi_header(output_reflectance_file), output_reflectance_file) output_uncertainty_img = envi.open(envi_header(output_uncertainty_file), output_uncertainty_file) n_output_reflectance_bands = int(output_reflectance_img.metadata['bands']) n_output_uncertainty_bands = int(output_uncertainty_img.metadata['bands']) # Load reference data reference_locations_mm = reference_locations_img.open_memmap( interleave='bip', writable=False) reference_locations = np.array(reference_locations_mm[:, :, :]).reshape( (n_reference_lines, n_location_bands)) reference_radiance_mm = reference_radiance_img.open_memmap( interleave='bip', writable=False) reference_radiance = np.array(reference_radiance_mm[:, :, :]).reshape( (n_reference_lines, n_radiance_bands)) reference_reflectance_mm = reference_reflectance_img.open_memmap( interleave='bip', writable=False) reference_reflectance = np.array( reference_reflectance_mm[:, :, :]).reshape( (n_reference_lines, n_radiance_bands)) reference_uncertainty_mm = reference_uncertainty_img.open_memmap( interleave='bip', writable=False) reference_uncertainty = np.array( reference_uncertainty_mm[:, :, :]).reshape( (n_reference_lines, n_reference_uncertainty_bands)) reference_uncertainty = reference_uncertainty[:, : n_radiance_bands].reshape( (n_reference_lines, n_radiance_bands)) # Load segmentation data if segmentation_file: segmentation_img = envi.open(envi_header(segmentation_file), segmentation_file) segmentation_img = segmentation_img.read_band(0) else: segmentation_img = None # Prepare instrument model, if available if isofit_config is not None: config = configs.create_new_config(isofit_config) instrument = Instrument(config) logging.info('Loading instrument') # Make sure the instrument is configured for single-pixel noise (no averaging) instrument.integrations = 1 else: instrument = None # Load radiance factors if radiance_factors is None: radiance_adjustment = np.ones(n_radiance_bands, ) else: radiance_adjustment = np.loadtxt(radiance_factors) # Load Tree loc_scaling = np.array([1e5, 1e5, 0.1]) scaled_ref_loc = reference_locations * loc_scaling tree = KDTree(scaled_ref_loc) # Assume (heuristically) that, for distance purposes, 1 m vertically is # comparable to 10 m horizontally, and that there are 100 km per latitude # degree. This is all approximate of course. Elevation appears in the # Third element, and the first two are latitude/longitude coordinates # Iterate through image hash_table = {} for row in np.arange(start_line, stop_line): # Load inline input data input_radiance_mm = input_radiance_img.open_memmap(interleave='bip', writable=False) input_radiance = np.array(input_radiance_mm[row, :, :]) input_radiance = input_radiance * radiance_adjustment input_locations_mm = input_locations_img.open_memmap(interleave='bip', writable=False) input_locations = np.array(input_locations_mm[row, :, :]) output_reflectance_row = np.zeros(input_radiance.shape) + nodata_value output_uncertainty_row = np.zeros(input_radiance.shape) + nodata_value nspectra, start = 0, time.time() for col in np.arange(n_input_samples): x = input_radiance[col, :] if np.all(np.isclose(x, nodata_value)): output_reflectance_row[col, :] = nodata_value output_uncertainty_row[col, :] = nodata_value continue bhat = None if segmentation_img is not None: hash_idx = segmentation_img[row, col] if hash_idx in hash_table: bhat, bmarg, bcov = hash_table[hash_idx] else: loc = reference_locations[ np.array(hash_idx, dtype=int), :] * loc_scaling else: loc = input_locations[col, :] * loc_scaling if bhat is None: dists, nn = tree.query(loc, nneighbors) xv = reference_radiance[nn, :] yv = reference_reflectance[nn, :] uv = reference_uncertainty[nn, :] bhat = np.zeros((n_radiance_bands, 2)) bmarg = np.zeros((n_radiance_bands, 2)) bcov = np.zeros((n_radiance_bands, 2, 2)) for i in np.arange(n_radiance_bands): use = yv[:, i] > 0 n = sum(use) X = np.concatenate((np.ones((n, 1)), xv[use, i:i + 1]), axis=1) W = np.diag(np.ones(n)) # /uv[use, i]) y = yv[use, i:i + 1] try: bhat[i, :] = (inv(X.T @ W @ X) @ X.T @ W @ y).T bcov[i, :, :] = inv(X.T @ W @ X) except: bhat[i, :] = 0 bcov[i, :, :] = 0 bmarg[i, :] = np.diag(bcov[i, :, :]) if (segmentation_img is not None) and not (hash_idx in hash_table): hash_table[hash_idx] = bhat, bmarg, bcov A = np.array((np.ones(n_radiance_bands), x)) output_reflectance_row[col, :] = (np.multiply(bhat.T, A).sum(axis=0)) # Calculate uncertainties. Sy approximation rather than Seps for # speed, for now... but we do take into account instrument # radiometric uncertainties if instrument is None: output_uncertainty_row[col, :] = np.sqrt( np.multiply(bmarg.T, A).sum(axis=0)) else: Sy = instrument.Sy(x, geom=None) calunc = instrument.bval[:instrument.n_chan] output_uncertainty_row[col, :] = np.sqrt( np.diag(Sy) + pow(calunc * x, 2)) * bhat[:, 1] # if loglevel == 'DEBUG': # plot_example(xv, yv, bhat) nspectra = nspectra + 1 elapsed = float(time.time() - start) logging.info('row {}/{}, ({}/{} local), {} spectra per second'.format( row, n_input_lines, int(row - start_line), int(stop_line - start_line), round(float(nspectra) / elapsed, 2))) del input_locations_mm del input_radiance_mm output_reflectance_row = output_reflectance_row.transpose((1, 0)) output_uncertainty_row = output_uncertainty_row.transpose((1, 0)) shp = output_reflectance_row.shape output_reflectance_row = output_reflectance_row.reshape( (1, shp[0], shp[1])) shp = output_uncertainty_row.shape output_uncertainty_row = output_uncertainty_row.reshape( (1, shp[0], shp[1])) write_bil_chunk( output_reflectance_row, output_reflectance_file, row, (n_input_lines, n_output_reflectance_bands, n_input_samples)) write_bil_chunk( output_uncertainty_row, output_uncertainty_file, row, (n_input_lines, n_output_uncertainty_bands, n_input_samples))
def __init__(self, fname, write=False, n_rows=None, n_cols=None, n_bands=None, interleave=None, dtype=np.float32, wavelengths=None, fwhm=None, band_names=None, bad_bands='[]', zrange='{0.0, 1.0}', flag=-9999.0, ztitles='{Wavelength (nm), Magnitude}', map_info='{}'): """.""" self.frames = OrderedDict() self.write = write self.fname = os.path.abspath(fname) self.wl = wavelengths self.band_names = band_names self.fwhm = fwhm self.flag = flag self.n_rows = n_rows self.n_cols = n_cols self.n_bands = n_bands if self.fname.endswith('.txt'): # The .txt suffix implies a space-separated ASCII text file of # one or more data columns. This is cheap to load and store, so # we do not defer read/write operations. logging.debug('Inferred ASCII file format for %s' % self.fname) self.format = 'ASCII' if not self.write: self.data, self.wl = load_spectrum(self.fname) self.n_rows, self.n_cols, self.map_info = 1, 1, '{}' if self.wl is not None: self.n_bands = len(self.wl) else: self.n_bands = None self.meta = {} elif self.fname.endswith('.mat'): # The .mat suffix implies a matlab-style file, i.e. a dictionary # of 2D arrays and other matlab-like objects. This is typically # only used for specific output products associated with single # spectrum retrievals; there is no read option. logging.debug('Inferred MATLAB file format for %s' % self.fname) self.format = 'MATLAB' if not self.write: logging.error('Unsupported MATLAB file in input block') raise IOError('MATLAB format in input block not supported') else: # Otherwise we assume it is an ENVI-format file, which is # basically just a binary data cube with a detached human- # readable ASCII header describing dimensions, interleave, and # metadata. We buffer this data in self.frames, reading and # writing individual rows of the cube on-demand. logging.debug('Inferred ENVI file format for %s' % self.fname) self.format = 'ENVI' if not self.write: # If we are an input file, the header must preexist. if not os.path.exists(envi_header(self.fname)): logging.error('Could not find %s' % (envi_header(self.fname))) raise IOError('Could not find %s' % (envi_header(self.fname))) # open file and copy metadata self.file = envi.open(envi_header(self.fname), fname) self.meta = self.file.metadata.copy() self.n_rows = int(self.meta['lines']) self.n_cols = int(self.meta['samples']) self.n_bands = int(self.meta['bands']) if 'data ignore value' in self.meta: self.flag = float(self.meta['data ignore value']) else: self.flag = -9999.0 else: # If we are an output file, we may have to build the header # from scratch. Hopefully the caller has supplied the # necessary metadata details. meta = { 'lines': n_rows, 'samples': n_cols, 'bands': n_bands, 'byte order': 0, 'header offset': 0, 'map info': map_info, 'file_type': 'ENVI Standard', 'sensor type': 'unknown', 'interleave': interleave, 'data type': typemap[dtype], 'wavelength units': 'nm', 'z plot range': zrange, 'z plot titles': ztitles, 'fwhm': fwhm, 'bbl': bad_bands, 'band names': band_names, 'wavelength': self.wl } for k, v in meta.items(): if v is None: logging.error('Must specify %s' % (k)) raise IOError('Must specify %s' % (k)) if os.path.isfile(envi_header(fname)) is False: self.file = envi.create_image(envi_header(fname), meta, ext='', force=True) else: self.file = envi.open(envi_header(fname)) self.open_map_with_retries()
def do_inverse(isofit_inv: dict, radfile: pathlib.Path, est_refl_file: pathlib.Path, est_state_file: pathlib.Path, atm_coef_file: pathlib.Path, post_unc_file: pathlib.Path, overwrite: bool, use_empirical_line: bool): if use_empirical_line: # Segment first, then run on segmented file SEGMENTATION_SIZE = 40 CHUNKSIZE = 256 lbl_working_path = radfile.parent / str(radfile).replace( "toa-radiance", "segmentation") rdn_subs_path = radfile.with_suffix("-subs") rfl_subs_path = est_refl_file.with_suffix("-subs") state_subs_path = est_state_file.with_suffix("-subs") atm_subs_path = atm_coef_file.with_suffix("-subs") unc_subs_path = post_unc_file.with_suffix("-subs") isofit_inv["input"]["measured_radiance_file"] = str(rdn_subs_path) isofit_inv["output"] = { "estimated_reflectance_file": str(rfl_subs_path), "estimated_state_file": str(state_subs_path), "atmospheric_coefficients_file": str(atm_subs_path), "posterior_uncertainty_file": str(unc_subs_path) } if not overwrite and lbl_working_path.exists( ) and rdn_subs_path.exists(): logger.info( "Skipping segmentation and extraction because files exist.") else: logger.info( "Fixing any radiance values slightly less than zero...") rad_img = sp.open_image(envi_header(str(radfile))) rad_m = rad_img.open_memmap(writable=True) nearzero = np.logical_and(rad_m < 0, rad_m > -2) rad_m[nearzero] = 0.0001 del rad_m del rad_img logger.info("Segmenting...") segment(spectra=(str(radfile), str(lbl_working_path)), flag=-9999, npca=5, segsize=SEGMENTATION_SIZE, nchunk=CHUNKSIZE) logger.info("Extracting...") extractions(inputfile=str(radfile), labels=str(lbl_working_path), output=str(rdn_subs_path), chunksize=CHUNKSIZE, flag=-9999) else: # Run Isofit directly isofit_inv["input"]["measured_radiance_file"] = str(radfile) isofit_inv["output"] = { "estimated_reflectance_file": str(est_refl_file), "estimated_state_file": str(est_state_file), "atmospheric_coefficients_file": str(atm_coef_file), "posterior_uncertainty_file": str(post_unc_file) } if not overwrite and pathlib.Path( isofit_inv["output"]["estimated_reflectance_file"]).exists(): logger.info("Skipping inversion because output file exists.") else: invfile = radfile.parent / ( str(radfile).replace("toa-radiance", "inverse") + ".json") json.dump(isofit_inv, open(invfile, "w"), indent=2) Isofit(invfile).run() if use_empirical_line: if not overwrite and est_refl_file.exists(): logger.info("Skipping empirical line because output exists.") else: logger.info("Applying empirical line...") empirical_line(reference_radiance_file=str(rdn_subs_path), reference_reflectance_file=str(rfl_subs_path), reference_uncertainty_file=str(unc_subs_path), reference_locations_file=None, segmentation_file=str(lbl_working_path), input_radiance_file=str(radfile), input_locations_file=None, output_reflectance_file=str(est_refl_file), output_uncertainty_file=str(post_unc_file), isofit_config=str(invfile))
def do_hypertrace(isofit_config, wavelength_file, reflectance_file, rtm_template_file, lutdir, outdir, surface_file="./data/prior.mat", noisefile=None, snr=300, aod=0.1, h2o=1.0, atmosphere_type="ATM_MIDLAT_WINTER", atm_aod_h2o=None, solar_zenith=0, observer_zenith=0, solar_azimuth=0, observer_azimuth=0, observer_altitude_km=99.9, dayofyear=200, latitude=34.15, longitude=-118.14, localtime=10.0, elevation_km=0.01, inversion_mode="inversion", use_empirical_line=False, calibration_uncertainty_file=None, n_calibration_draws=1, calibration_scale=1, create_lut=True, overwrite=False): """One iteration of the hypertrace workflow. Required arguments: isofit_config: dict of isofit configuration options `wavelength_file`: Path to ASCII space delimited table containing two columns, wavelength and full width half max (FWHM); both in nanometers. `reflectance_file`: Path to input reflectance file. Note that this has to be an ENVI-formatted binary reflectance file, and this path is to the associated header file (`.hdr`), not the image file itself (following the convention of the `spectral` Python library, which will be used to read this file). rtm_template_file: Path to the atmospheric RTM template. For LibRadtran, note that this is slightly different from the Isofit template in that the Isofit fields are surrounded by two sets of `{{` while a few additional options related to geometry are surrounded by just `{` (this is because Hypertrace does an initial pass at formatting the files). `lutdir`: Directory where look-up tables will be stored. Will be created if missing. `outdir`: Directory where outputs will be stored. Will be created if missing. Keyword arguments: surface_file: Matlab (`.mat`) file containing a multicomponent surface prior. See Isofit documentation for details. noisefile: Parametric instrument noise file. See Isofit documentation for details. Default = `None` snr: Instrument signal-to-noise ratio. Ignored if `noisefile` is present. Default = 300 aod: True aerosol optical depth. Default = 0.1 h2o: True water vapor content. Default = 1.0 atmosphere_type: LibRadtran or Modtran atmosphere type. See RTM manuals for details. Default = `ATM_MIDLAT_WINTER` atm_aod_h2o: A list containing three elements: The atmosphere type, AOD, and H2O. This provides a way to iterate over specific known atmospheres that are combinations of the three previous variables. If this is set, it overrides the three previous arguments. Default = `None` solar_zenith, observer_zenith: Solar and observer zenith angles, respectively (0 = directly overhead, 90 = horizon). These are in degrees off nadir. Default = 0 for both. (Note that off-nadir angles make LibRadtran run _much_ more slowly, so be prepared if you need to generate those LUTs). (Note: For `modtran` and `modtran_simulator`, `solar_zenith` is calculated from the `gmtime` and location, so this parameter is ignored.) solar_azimuth, observer_azimuth: Solar and observer azimuth angles, respectively, in degrees. Observer azimuth is the sensor _position_ (so 180 degrees off from view direction) relative to N, rotating counterclockwise; i.e., 0 = Sensor in N, looking S; 90 = Sensor in W, looking E (this follows the LibRadtran convention). Default = 0 for both. Note: For `modtran` and `modtran_simulator`, `observer_azimuth` is used as `to_sensor_azimuth`; i.e., the *relative* azimuth of the sensor. The true solar azimuth is calculated from lat/lon and time, so `solar_azimuth` is ignored. observer_altitude_km: Sensor altitude in km. Must be less than 100. Default = 99.9. (`modtran` and `modtran_simulator` only) dayofyear: Julian date of observation. Default = 200 (`modtran` and `modtran_simulator` only) latitude, longitude: Decimal degree coordinates of observation. Default = 34.15, -118.14 (Pasadena, CA). (`modtran` and `modtran_simulator` only) localtime: Local time, in decimal hours (0-24). Default = 10.0 (`modtran` and `modtran_simulator` only) elevation_km: Target elevation above sea level, in km. Default = 0.01 (`modtran` and `modtran_simulator` only) inversion_mode: Inversion algorithm to use. Must be either "inversion" (default) for standard optimal estimation, or "mcmc_inversion" for MCMC. use_empirical_line: (boolean, default = `False`) If `True`, perform atmospheric correction on a segmented image and then resample using the empirical line method. If `False`, run Isofit pixel-by-pixel. overwrite: (boolean, default = `False`) If `False` (default), skip steps where output files already exist. If `True`, run the full workflow regardless of existing files. """ outdir = mkabs(outdir) outdir.mkdir(parents=True, exist_ok=True) assert observer_altitude_km < 100, "Isofit 6S does not support altitude >= 100km" isofit_common = copy.deepcopy(isofit_config) # NOTE: All of these settings are *not* copied, but referenced. So these # changes propagate to the `forward_settings` object below. forward_settings = isofit_common["forward_model"] instrument_settings = forward_settings["instrument"] # NOTE: This also propagates to the radiative transfer engine instrument_settings["wavelength_file"] = str(mkabs(wavelength_file)) surface_settings = forward_settings["surface"] surface_settings["surface_file"] = str(mkabs(surface_file)) if noisefile is not None: noisetag = f"noise_{pathlib.Path(noisefile).stem}" if "SNR" in instrument_settings: instrument_settings.pop("SNR") instrument_settings["parametric_noise_file"] = str(mkabs(noisefile)) if "integrations" not in instrument_settings: instrument_settings["integrations"] = 1 elif snr is not None: noisetag = f"snr_{snr}" instrument_settings["SNR"] = snr priortag = f"prior_{pathlib.Path(surface_file).stem}__" +\ f"inversion_{inversion_mode}" if atm_aod_h2o is not None: atmosphere_type = atm_aod_h2o[0] aod = atm_aod_h2o[1] h2o = atm_aod_h2o[2] atmtag = f"aod_{aod:.3f}__h2o_{h2o:.3f}" if calibration_uncertainty_file is not None: caltag = f"cal_{pathlib.Path(calibration_uncertainty_file).stem}__" +\ f"draw_{n_calibration_draws}__" +\ f"scale_{calibration_scale}" else: caltag = "cal_NONE__draw_0__scale_0" if create_lut: lutdir = mkabs(lutdir) lutdir.mkdir(parents=True, exist_ok=True) vswir_conf = forward_settings["radiative_transfer"][ "radiative_transfer_engines"]["vswir"] atmospheric_rtm = vswir_conf["engine_name"] if atmospheric_rtm == "libradtran": lrttag = f"atm_{atmosphere_type}__" +\ f"szen_{solar_zenith:.2f}__" +\ f"ozen_{observer_zenith:.2f}__" +\ f"saz_{solar_azimuth:.2f}__" +\ f"oaz_{observer_azimuth:.2f}" lutdir2 = lutdir / lrttag lutdir2.mkdir(parents=True, exist_ok=True) lrtfile = lutdir2 / "lrt-template.inp" with open(rtm_template_file, "r") as f: fs = f.read() open(lrtfile, "w").write( fs.format(atmosphere=atmosphere_type, solar_azimuth=solar_azimuth, solar_zenith=solar_zenith, cos_observer_zenith=np.cos(observer_zenith * np.pi / 180.0), observer_azimuth=observer_azimuth)) open(lutdir2 / "prescribed_geom", "w").write(f"99:99:99 {solar_zenith} {solar_azimuth}") elif atmospheric_rtm in ("modtran", "sRTMnet"): loctag = f"atm_{atmosphere_type}__" +\ f"alt_{observer_altitude_km:.2f}__" +\ f"doy_{dayofyear:.0f}__" +\ f"lat_{latitude:.3f}__lon_{longitude:.3f}" angtag = f"az_{observer_azimuth:.2f}__" +\ f"zen_{180 - observer_zenith:.2f}__" +\ f"time_{localtime:.2f}__" +\ f"elev_{elevation_km:.2f}" lrttag = loctag + "/" + angtag lutdir2 = lutdir / lrttag lutdir2.mkdir(parents=True, exist_ok=True) lrtfile = lutdir2 / "modtran-template-h2o.json" mt_params = { "atmosphere_type": atmosphere_type, "fid": "hypertrace", "altitude_km": observer_altitude_km, "dayofyear": dayofyear, "latitude": latitude, "longitude": longitude, "to_sensor_azimuth": observer_azimuth, "to_sensor_zenith": 180 - observer_zenith, "gmtime": localtime, "elevation_km": elevation_km, "output_file": lrtfile, "ihaze_type": "AER_NONE" } write_modtran_template(**mt_params) mt_params["ihaze_type"] = "AER_RURAL" mt_params["output_file"] = lutdir2 / "modtran-template.json" write_modtran_template(**mt_params) vswir_conf["modtran_template_path"] = str(mt_params["output_file"]) if atmospheric_rtm == "sRTMnet": vswir_conf["interpolator_base_path"] = str( lutdir2 / "sRTMnet_interpolator") # These need to be absolute file paths for path in [ "emulator_aux_file", "emulator_file", "earth_sun_distance_file", "irradiance_file" ]: vswir_conf[path] = str(mkabs(vswir_conf[path])) else: raise ValueError(f"Invalid atmospheric rtm {atmospheric_rtm}") vswir_conf["lut_path"] = str(lutdir2) vswir_conf["template_file"] = str(lrtfile) outdir2 = outdir / lrttag / noisetag / priortag / atmtag / caltag outdir2.mkdir(parents=True, exist_ok=True) # Observation file, which describes the geometry # Angles follow LibRadtran conventions obsfile = outdir2 / "obs.txt" geomvec = [ -999, # path length; not used observer_azimuth, # Degrees 0-360; 0 = Sensor in N, looking S; 90 = Sensor in W, looking E observer_zenith, # Degrees 0-90; 0 = directly overhead, 90 = horizon solar_azimuth, # Degrees 0-360; 0 = Sun in S; 90 = Sun in W. solar_zenith, # Same units as observer zenith 180.0 - abs(observer_zenith), # MODTRAN OBSZEN -- t observer_azimuth - solar_azimuth + 180.0, # MODTRAN relative azimuth observer_azimuth, # MODTRAN azimuth np.cos(observer_zenith * np.pi / 180.0) # Libradtran cos obsever zenith ] np.savetxt(obsfile, np.array([geomvec])) isofit_common["input"] = {"obs_file": str(obsfile)} isofit_fwd = copy.deepcopy(isofit_common) isofit_fwd["input"]["reflectance_file"] = str(mkabs(reflectance_file)) isofit_fwd["implementation"]["mode"] = "simulation" isofit_fwd["implementation"]["inversion"]["simulation_mode"] = True fwd_surface = isofit_fwd["forward_model"]["surface"] fwd_surface["surface_category"] = "surface" # Check that prior and wavelength file have the same dimensions prior = loadmat(mkabs(surface_file)) prior_wl = prior["wl"][0] prior_nwl = len(prior_wl) file_wl = np.loadtxt(wavelength_file) file_nwl = file_wl.shape[0] assert prior_nwl == file_nwl, \ f"Mismatch between wavelength file ({file_nwl}) " +\ f"and prior ({prior_nwl})." fwd_surface["wavelength_file"] = str(wavelength_file) radfile = outdir2 / "toa-radiance" isofit_fwd["output"] = {"simulated_measurement_file": str(radfile)} fwd_state = isofit_fwd["forward_model"]["radiative_transfer"][ "statevector"] fwd_state["AOT550"]["init"] = aod fwd_state["H2OSTR"]["init"] = h2o # Also set the LUT grid to only target state. We don't want to interpolate # over the LUT for our forward simulations! fwd_lut = isofit_fwd["forward_model"]["radiative_transfer"]["lut_grid"] fwd_lut["AOT550"] = [aod] fwd_lut["H2OSTR"] = [h2o] # Also have to create a one-off LUT directory for the forward run, to avoid # using an (incorrect) previously cached one. fwd_lutdir = outdir2 / "fwd_lut" fwd_lutdir.mkdir(parents=True, exist_ok=True) fwd_vswir = (isofit_fwd["forward_model"]["radiative_transfer"] ["radiative_transfer_engines"]["vswir"]) fwd_vswir["lut_path"] = str(fwd_lutdir) fwd_vswir["interpolator_base_path"] = str(fwd_lutdir) if radfile.exists() and not overwrite: logger.info("Skipping forward simulation because file exists.") else: fwdfile = outdir2 / "forward.json" json.dump(isofit_fwd, open(fwdfile, "w"), indent=2) logger.info("Starting forward simulation.") Isofit(fwdfile).run() logger.info("Forward simulation complete.") isofit_inv = copy.deepcopy(isofit_common) if inversion_mode == "simple": # Special case! Use the optimal estimation code, but set `max_nfev` to 1. inversion_mode = "inversion" imp_inv = isofit_inv["implementation"]["inversion"] if "least_squares_params" not in imp_inv: imp_inv["least_squares_params"] = {} imp_inv["least_squares_params"]["max_nfev"] = 1 isofit_inv["implementation"]["mode"] = inversion_mode isofit_inv["input"]["measured_radiance_file"] = str(radfile) est_refl_file = outdir2 / "estimated-reflectance" post_unc_path = outdir2 / "posterior-uncertainty" # Inverse mode est_state_file = outdir2 / "estimated-state" atm_coef_file = outdir2 / "atmospheric-coefficients" post_unc_file = outdir2 / "posterior-uncertainty" isofit_inv["output"] = { "estimated_reflectance_file": str(est_refl_file), "estimated_state_file": str(est_state_file), "atmospheric_coefficients_file": str(atm_coef_file), "posterior_uncertainty_file": str(post_unc_file) } # Run the workflow if calibration_uncertainty_file is not None: # Apply calibration uncertainty here calmat = loadmat(calibration_uncertainty_file) cov = calmat["Covariance"] cov_l = np.linalg.cholesky(cov) cov_wl = np.squeeze(calmat["wavelengths"]) rad_img = sp.open_image(envi_header(str(radfile))) rad_wl = rad_img.bands.centers del rad_img for ical in range(n_calibration_draws): icalp1 = ical + 1 radfile_cal = f"{str(radfile)}-{icalp1:02d}" reflfile_cal = f"{str(est_refl_file)}-{icalp1:02d}" statefile_cal = f"{str(est_state_file)}-{icalp1:02d}" atmfile_cal = f"{str(atm_coef_file)}-{icalp1:02d}" uncfile_cal = f"{str(post_unc_file)}-{icalp1:02d}" if pathlib.Path(reflfile_cal).exists() and not overwrite: logger.info("Skipping calibration %d/%d because output exists", icalp1, n_calibration_draws) next logger.info("Applying calibration uncertainty (%d/%d)", icalp1, n_calibration_draws) sample_calibration_uncertainty(radfile, radfile_cal, cov_l, cov_wl, rad_wl, bias_scale=calibration_scale) logger.info("Starting inversion (calibration %d/%d)", icalp1, n_calibration_draws) do_inverse(copy.deepcopy(isofit_inv), radfile_cal, reflfile_cal, statefile_cal, atmfile_cal, uncfile_cal, overwrite=overwrite, use_empirical_line=use_empirical_line) logger.info("Inversion complete (calibration %d/%d)", icalp1, n_calibration_draws) else: if est_refl_file.exists() and not overwrite: logger.info("Skipping inversion because output exists.") else: logger.info("Starting inversion.") do_inverse(copy.deepcopy(isofit_inv), radfile, est_refl_file, est_state_file, atm_coef_file, post_unc_file, overwrite=overwrite, use_empirical_line=use_empirical_line) logger.info("Inversion complete.") logger.info("Workflow complete!")
def segment_chunk(lstart, lend, in_file, nodata_value, npca, segsize, logfile=None, loglevel='INFO'): """ Segment a small chunk of the image Args: lstart: starting position in image file lend: stopping position in image file in_file: file path to segment nodata_value: value to ignore npca: number of pca components to use segsize: mean segmentation size logfile: logging file name loglevel: logging level Returns: lstart: starting position in image file lend: stopping position in image file labels: labeled image chunk """ logging.basicConfig(format='%(levelname)s:%(message)s', level=loglevel, filename=logfile) logging.info(f'{lstart}: starting') in_img = envi.open(envi_header(in_file), in_file) meta = in_img.metadata nl, nb, ns = [int(meta[n]) for n in ('lines', 'bands', 'samples')] img_mm = in_img.open_memmap(interleave='bip', writable=False) # Do quick single-band screen before reading all bands use = np.logical_not( np.isclose(np.array(img_mm[lstart:lend, :, 0]), nodata_value)) if np.sum(use) == 0: logging.info(f'{lstart}: no non null data present, returning early') return lstart, lend, np.zeros((use.shape[0], ns)) x = np.array(img_mm[lstart:lend, :, :]).astype(np.float32) nc = x.shape[0] x = x.reshape((nc * ns, nb)) logging.debug(f'{lstart}: read and reshaped data') # Excluding bad locations, calculate top PCA coefficients use = np.all(abs(x - nodata_value) > 1e-6, axis=1) # If this chunk is empty, return immediately if np.sum(use) == 0: logging.info(f'{lstart}: no non null data present, returning early') return lstart, lend, np.zeros((nc, ns)) mu = x[use, :].mean(axis=0) C = np.cov(x[use, :], rowvar=False) [v, d] = scipy.linalg.eigh(C) # Determine segmentation compactness scaling based on eigenvalues # Override with a floor value to prevent zeros cmpct = scipy.linalg.norm(np.sqrt(v[-npca:])) if cmpct < 1e-6: cmpct = 10.0 print('Compactness override: %f' % cmpct) # Project, redimension as an image with "npca" channels, and segment x_pca_subset = (x[use, :] - mu) @ d[:, -npca:] del x, mu, d x_pca = np.zeros((nc, ns, npca)) x_pca[use.reshape(nc, ns), :] = x_pca_subset del x_pca_subset x_pca = x_pca.reshape([nc, ns, npca]) seg_in_chunk = int(sum(use) / float(segsize)) logging.debug(f'{lstart}: starting slic') labels = slic(x_pca, n_segments=seg_in_chunk, compactness=cmpct, max_iter=10, sigma=0, multichannel=True, enforce_connectivity=True, min_size_factor=0.5, max_size_factor=3, mask=use.reshape(nc, ns)) # Reindex the subscene labels and place them into the larger scene labels = labels.reshape([nc * ns]) labels[np.logical_not(use)] = 0 labels = labels.reshape([nc, ns]) logging.info(f'{lstart}: completing') return lstart, lend, labels
def segment(spectra: tuple, nodata_value: float, npca: int, segsize: int, nchunk: int, n_cores: int = 1, ray_address: str = None, ray_redis_password: str = None, ray_temp_dir=None, ray_ip_head=None, logfile=None, loglevel='INFO'): """ Segment an image using SLIC on a PCA. Args: spectra: tuple of filepaths of image to segment and (optionally) output label file nodata_value: data to ignore in radiance image npca: number of pca components to use segsize: mean segmentation size nchunk: size of each image chunk n_cores: number of cores to use ray_address: ray address to connect to (for multinode implementation) ray_redis_password: ray password to use (for multinode implementation) ray_temp_dir: ray temp directory to reference ray_ip_head: ray ip head to reference (for multinode use) logfile: logging file to output to loglevel: logging level to use """ logging.basicConfig(format='%(levelname)s:%(message)s', level=loglevel, filename=logfile) in_file = spectra[0] if len(spectra) > 1 and type(spectra) is tuple: lbl_file = spectra[1] else: lbl_file = spectra + '_lbl' # Open input data, get dimensions in_img = envi.open(envi_header(in_file), in_file) meta = in_img.metadata nl, nb, ns = [int(meta[n]) for n in ('lines', 'bands', 'samples')] # Start up a ray instance for parallel work rayargs = { 'ignore_reinit_error': True, 'local_mode': n_cores == 1, "address": ray_address, '_temp_dir': ray_temp_dir, "_redis_password": ray_redis_password } # We can only set the num_cpus if running on a single-node if ray_ip_head is None and ray_redis_password is None: rayargs['num_cpus'] = n_cores ray.init(**rayargs) atexit.register(ray.shutdown) # Iterate through image "chunks," segmenting as we go all_labels = np.zeros((nl, ns), dtype=np.int64) jobs = [] # Enforce a minimum chunk size to prevent singularities downstream # This could eventually be made a user-tunable parameter but this # value should work in all cases min_lines_per_chunk = 10 for lstart in np.arange(0, nl - min_lines_per_chunk, nchunk): # Extend any chunk that falls within a small margin of the # end of the flightline lend = min(lstart + nchunk, nl) if lend > (nl - min_lines_per_chunk): lend = nl # Extract data jobs.append( segment_chunk.remote(lstart, lend, in_file, nodata_value, npca, segsize, logfile=logfile, loglevel=loglevel)) # Collect results, making sure each chunk is distinct, and enforce an order next_label = 1 rreturn = [ray.get(jid) for jid in jobs] for lstart, lend, ret in rreturn: if ret is not None: logging.debug(f'Collecting chunk: {lstart}') chunk_label = ret.copy() unique_chunk_labels = np.unique(chunk_label[chunk_label != 0]) ordered_chunk_labels = np.zeros(chunk_label.shape) for lbl in unique_chunk_labels: ordered_chunk_labels[chunk_label == lbl] = next_label next_label += 1 all_labels[lstart:lend, ...] = ordered_chunk_labels del rreturn ray.shutdown() # Final file I/O logging.debug('Writing output') lbl_meta = { "samples": str(ns), "lines": str(nl), "bands": "1", "header offset": "0", "file type": "ENVI Standard", "data type": "4", "interleave": "bil" } lbl_img = envi.create_image(envi_header(lbl_file), lbl_meta, ext='', force=True) lbl_mm = lbl_img.open_memmap(interleave='source', writable=True) lbl_mm[:, :] = np.array(all_labels, dtype=np.float32).reshape((nl, 1, ns)) del lbl_mm