def _untangle_raw(data, hdr_info, stack_size): """Corrects for the tangled raw mib format. Only the case for quad chip is considered here. Parameters ---------- data: dask array as stack with the detector array unreshaped, e.g. for a single frame 512*512: (1, 262144) hdr_info: dict info read from the header- ouput of the _parse_hdr function stack_size: int The number of frames in the data Returns ------- untangled_data: dask array corrected dask array object reshaped on the detector plane, e.g. for a single frame case as above: (1, 512, 512) """ width = hdr_info["width"] height = hdr_info["height"] width_height = width * height if ( hdr_info["Counter Depth (number)"] == 24 or hdr_info["Counter Depth (number)"] == 12 ): cols = 4 elif hdr_info["Counter Depth (number)"] == 1: cols = 64 elif hdr_info["Counter Depth (number)"] == 6: cols = 8 data = data.reshape((stack_size * width_height)) data = data.reshape(stack_size, height * (height // cols), cols) data = da.flip(data, 2) if hdr_info["Assembly Size"] == "2x2": data = data.reshape((stack_size * width_height)) data = data.reshape(stack_size, 512 // 2, 512 * 2) det1 = data[:, :, 0:256] det2 = data[:, :, 256:512] det3 = data[:, :, 512 : 512 + 256] det4 = data[:, :, 512 + 256 :] det3 = da.flip(det3, 2) det3 = da.flip(det3, 1) det4 = da.flip(det4, 2) det4 = da.flip(det4, 1) untangled_data = da.concatenate( (da.concatenate((det1, det3), 1), da.concatenate((det2, det4), 1)), 2 ) return untangled_data
def flip_dim_coord(cube, coord_name): """Flip (reverse) dimensional coordinate of cube.""" logger.info("Flipping dimensional coordinate %s...", coord_name) coord = cube.coord(coord_name, dim_coords=True) coord_idx = cube.coord_dims(coord)[0] coord.points = np.flip(coord.points) if coord.bounds is not None: coord.bounds = np.flip(coord.bounds, axis=0) cube.data = da.flip(cube.core_data(), axis=coord_idx)
def get_image_chunk_mmap(self, im_start, buffer_number): # (("t_value"),("<u4")), (("Milliseconds"), ("<u2")), (("Microseconds"), ("<u2"))] record_dtype = [("FRAME", np.uint16, (self.image_dict["ImageHeight"], self.image_dict["ImageWidth"])), ("TimeStamps", bytes, 8)] off = 8192 + buffer_number * self.image_dict["GroupingBytes"] top = np.memmap(self.top, dtype=record_dtype, offset=off, shape=self.segment_prebuffer) bottom = np.memmap(self.bottom, dtype=record_dtype, offset=off, shape=self.segment_prebuffer) d = da.concatenate((da.flip(top["Frame"], axis=0), bottom["Frame"]), axis=0) return d["Frame"]
def get_dataset(self, dataset_id, ds_info): """Load a dataset.""" file_key = ds_info.get('file_key', dataset_id['name']) dsname = 'Grid/' + file_key data = self.get(dsname) data = data.squeeze().transpose() if data.ndim >= 2: data = data.rename({data.dims[-2]: 'y', data.dims[-1]: 'x'}) data.data = da.flip(data.data, axis=0) fill = data.attrs['_FillValue'] data = data.where(data != fill) for key in list(data.attrs.keys()): val = data.attrs[key] if isinstance(val, h5py.h5r.Reference): del data.attrs[key] if isinstance(val, np.ndarray): if isinstance(val[0][0], h5py.h5r.Reference): del data.attrs[key] return data
def _read_mib(fp, hdr_info, mmap_mode='r'): """Read a raw .mib file using memory mapping where the array is stored on disk and not directly loaded, but may be treated like a numpy.ndarray. Parameters ---------- fp: str Filepath of .mib file to be loaded. hdr_info: dict A dictionary containing the keywords as parsed by read_hdr mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional If not None, then memory-map the file, using the given mode (see `numpy.memmap`). The mode has no effect for pickled or zipped files. Returns ------- data : numpy.memmap """ reader_offset = 0 width = hdr_info['width'] height = hdr_info['height'] offset = hdr_info['offset'] data_length = hdr_info['data-length'] data_type = hdr_info['data-type'] endian = hdr_info['byte-order'] record_by = hdr_info['record-by'] depth = _get_mib_depth(hdr_info, fp) if data_type == 'signed': data_type = 'int' elif data_type == 'unsigned': data_type = 'uint' elif data_type == 'float': pass else: raise TypeError('Unknown "data-type" string.') # mib data always big-endian endian = '>' data_type += str(int(data_length)) if data_type == 'uint1': data_type = 'uint8' data_type = np.dtype(data_type) else: data_type = np.dtype(data_type) data_type = data_type.newbyteorder(endian) if data_length == '1': hdr_multiplier = 1 else: hdr_multiplier = (int(data_length) / 8)**-1 hdr_bits = int(hdr_info['data offset'] * hdr_multiplier) data = np.memmap(fp, offset=reader_offset, dtype=data_type, mode=mmap_mode) data = da.from_array(data) if record_by == 'vector': # spectral image size = (height, width, depth) try: data = data.reshape(size) # in case of incomplete frame: except ValueError: if hdr_info['raw'] == 'R64': data = data.reshape(depth) elif record_by == 'image': # stack of images width_height = width * height size = (depth, height, width) # remove headers at the beginning of each frame and reshape if hdr_info['Assembly Size'] == '2x2': if hdr_info['Counter Depth (number)'] == 1: # RAW 1 bit data: the header bits are written as uint8 but the frames # are binary and need to be unpacked as such. data = data.reshape(-1, width_height / 8 + hdr_bits) data = data[:, hdr_bits:] data = np.unpackbits(data) data = data.reshape(depth, width, height) else: data = data.reshape(-1, width_height + hdr_bits)[:, -width_height:].reshape( depth, width, height) elif hdr_info['Assembly Size'] == '1x1': data = data.reshape(-1, width_height + hdr_bits)[:, -width_height:].reshape( depth, width, height) data = data.reshape(depth, 256, 256) if hdr_info['raw'] == 'R64': if hdr_info['Counter Depth (number)'] == 24 or hdr_info[ 'Counter Depth (number)'] == 12: COLS = 4 if hdr_info['Counter Depth (number)'] == 1: COLS = 64 if hdr_info['Counter Depth (number)'] == 6: COLS = 8 data = data.reshape((depth * width_height)) data = data.reshape(depth, height * (height // COLS), COLS) data = da.flip(data, 2) if hdr_info['Assembly Size'] == '2x2': data = data.reshape((depth * width_height)) data = data.reshape(depth, 512 // 2, 512 * 2) det1 = data[:, :, 0:256] det2 = data[:, :, 256:512] det3 = data[:, :, 512:512 + 256] det4 = data[:, :, 512 + 256:] det3 = da.flip(det3, 2) det3 = da.flip(det3, 1) det4 = da.flip(det4, 2) det4 = da.flip(det4, 1) data = da.concatenate((da.concatenate( (det1, det3), 1), da.concatenate((det2, det4), 1)), 2) elif record_by == 'dont-care': # stack of images size = (height, width) data = data.reshape(size) return data
def get_poss_bergs_fr_raster(onedem, usedask): # trans=onedem.attrs['transform'] flipax = [] # if trans[0] < 0: # flipax.append(1) # if trans[4] < 0: # flipax.append(0) if pd.Series(onedem.x).is_monotonic_decreasing: flipax.append(1) if pd.Series(onedem.y).is_monotonic_increasing: flipax.append(0) fjord = onedem.attrs['fjord'] min_area = fjord_props.get_min_berg_area(fjord) res = onedem.attrs['res'][ 0] #Note: the pixel area will be inaccurate if the resolution is not the same in x and y if usedask == True: # Daskify the iceberg segmentation process. Note that dask-image has some functionality to operate # directly on dask arrays (e.g. dask_image.ndfilters.sobel), which would need to be put into utils.raster.py # https://dask-image.readthedocs.io/en/latest/dask_image.ndfilters.html # However, as of yet there doesn't appear to be a way to easily implement the watershed segmentation, other than in chunks # print(onedem) # see else statement with non-dask version for descriptions of what each step is doing def seg_wrapper(tiles): return raster_ops.labeled_from_segmentation(tiles, [3, 10], resolution=res, min_area=min_area, flipax=[]) def filter_wrapper(tiles, elevs): return raster_ops.border_filtering(tiles, elevs, flipax=[]) elev_copy = onedem.elevation.data # should return a dask array for ax in flipax: elev_copy = da.flip(elev_copy, axis=ax) # print(type(elev_copy)) elev_overlap = da.overlap.overlap(elev_copy, depth=10, boundary='nearest') seglabeled_overlap = da.map_overlap( seg_wrapper, elev_overlap, trim=False) # including depth=10 here will ADD another overlap print("Got labeled raster of potential icebergs for an image") labeled_overlap = da.map_overlap(filter_wrapper, seglabeled_overlap, elev_overlap, trim=False, dtype='int32') labeled_arr = da.overlap.trim_overlap(labeled_overlap, depth=10) # re-flip the labeled_arr so it matches the orientation of the original elev data that's within the xarray for ax in flipax: labeled_arr = da.flip(labeled_arr, axis=ax) # import matplotlib.pyplot as plt # print(plt.imshow(labeled_arr)) try: del elev_copy del elev_overlap del seglabeled_overlap del labeled_overlap print("deleted the intermediate steps") except NameError: pass # print(da.min(labeled_arr).compute()) # print(da.max(labeled_arr).compute()) print("about to get the list of possible bergs") print( 'Please note the transform computation is very application specific (negative y coordinates) and may need generalizing' ) print( "this transform computation is particularly sensitive to axis order (y,x) because it is accessed by index number" ) poss_bergs_list = [] ''' # I think that by using concatenate=True, it might not actually be using dask for the computation def get_bergs(labeled_blocks): # Note: features.shapes returns a generator. However, if we try to iterate through it with a for loop, the StopIteration exception # is not passed up into the for loop and execution hangs when it hits the end of the for loop without completing the function block_bergs = list(poly[0]['coordinates'][0] for poly in rasterio.features.shapes( labeled_blocks.astype('int32'), transform=onedem.attrs['transform']))[:-1] poss_bergs_list.append(block_bergs) da.blockwise(get_bergs, '', labeled_arr, 'ij', meta=pd.DataFrame({'c':[]}), concatenate=True).compute() # print(poss_bergs_list[0]) # print(type(poss_bergs_list)) poss_bergs_gdf = gpd.GeoDataFrame({'geometry':[Polygon(poly) for poly in poss_bergs_list[0]]}) # another approach could be to try and coerce the output from map_blocks into an array, but I suspect you'd still have the geospatial issue # https://github.com/dask/dask/issues/3590#issuecomment-464609620 ''' # URL: https://stackoverflow.com/questions/66232232/produce-vector-output-from-a-dask-array/66245347?noredirect=1#comment117119583_66245347 @dask.delayed def get_bergs(labeled_blocks, pointer, chunk0, chunk1): print("running the dask delayed function") def getpx(chunkid, chunksz): amin = chunkid[0] * chunksz[0][0] amax = amin + chunksz[0][0] bmin = chunkid[1] * chunksz[1][0] bmax = bmin + chunksz[1][0] return (amin, amax, bmin, bmax) # order of all inputs (and outputs) should be y, x when axis order is used chunksz = (onedem.chunks['y'], onedem.chunks['x']) # rasterio_trans = rasterio.transform.guard_transform(onedem.attrs["transform"]) # print(rasterio_trans) ymini, ymaxi, xmini, xmaxi = getpx((chunk0, chunk1), chunksz) # print(chunk0, chunk1) # print(xmini) # print(xmaxi) # print(ymini) # print(ymaxi) # use rasterio Windows and rioxarray to construct transform # https://rasterio.readthedocs.io/en/latest/topics/windowed-rw.html#window-transforms chwindow = rasterio.windows.Window(xmini, ymini, xmaxi - xmini, ymaxi - ymini) trans = onedem.rio.isel_window(chwindow).rio.transform(recalc=True) # print(trans) return list( poly[0]['coordinates'][0] for poly in rasterio.features.shapes( labeled_blocks.astype('int32'), transform=trans))[:-1] for __, obj in enumerate(labeled_arr.to_delayed()): for bl in obj: piece = dask.delayed(get_bergs)(bl, *bl.key) poss_bergs_list.append(piece) del piece poss_bergs_list = dask.compute(*poss_bergs_list) # tried working with this instead of the for loops above # poss_bergs_list = dask.compute([get_bergs(bl, *bl.key) for bl in obj for __, obj in enumerate(labeled_arr.to_delayed())])[0] # print(poss_bergs_list) # unnest the list of polygons returned by using dask to polygonize concat_list = [ item for sublist in poss_bergs_list for item in sublist if len(item) != 0 ] # print(concat_list) poss_bergs_gdf = gpd.GeoDataFrame( {'geometry': [Polygon(poly) for poly in concat_list]}) # convert to a geodataframe, combine geometries (in case any bergs were on chunk borders), and generate new polygon list print(poss_bergs_gdf) # print(poss_bergs_gdf.geometry.plot()) poss_berg_combined = gpd.overlay(poss_bergs_gdf, poss_bergs_gdf, how='union') # print(poss_berg_combined) # print(poss_berg_combined.geometry.plot()) poss_bergs = [berg for berg in poss_berg_combined.geometry] # print(poss_bergs) print(len(poss_bergs)) try: del labeled_arr del poss_bergs_list del concat_list del poss_berg_combined except NameError: pass else: print("NOT USING DASK") # create copy of elevation values so original dataset values are not impacted by image manipulations # and positive/negative coordinate systems can be ignored (note flipax=[] below) # something wonky is happening and when I ran this code on Pangeo I needed to NOT flip the elevation values here and then switch the bounding box y value order # Not entirely sure what's going on, but need to be aware of this!! # print("Note: check for proper orientation of results depending on compute environment. Pangeo results were upside down.") elev_copy = np.copy(np.flip(onedem.elevation.values, axis=flipax)) # flipax=[] # generate a labeled array of potential iceberg features, excluding those that are too large or small seglabeled_arr = raster_ops.labeled_from_segmentation( elev_copy, [3, 10], resolution=res, min_area=min_area, flipax=[]) print("Got labeled raster of potential icebergs for an image") # remove features whose borders are >50% no data values (i.e. the "iceberg" edge is really a DEM edge) labeled_arr = raster_ops.border_filtering(seglabeled_arr, elev_copy, flipax=[]).astype( seglabeled_arr.dtype) # apparently rasterio can't handle int64 inputs, which is what border_filtering returns # import matplotlib.pyplot as plt # print(plt.imshow(labeled_arr)) # create iceberg polygons # somehow a < 1 pixel berg made it into this list... I'm doing a secondary filtering by area in the iceberg filter step for now poss_bergs = list( poly[0]['coordinates'][0] for poly in rasterio.features.shapes( labeled_arr, transform=onedem.attrs['transform']))[:-1] try: del elev_copy del seglabeled_arr del labeled_arr except NameError: pass return poss_bergs
def advection_step(self, time, output_time=False): """Perform forward-backward advection at a single point in time. This routine is responsible for creating a new ParticleSet at the given time, and performing the forward and backward advection steps in the Lagrangian transformation. Args: time (float): The point in time at which to calculate filtered data. output_time (Optional[bool]): Whether to include "time" as a numpy array in the output dictionary, for doing manual analysis. Note: If ``output_time`` is True, the output object will not be compatible with the default filtering workflow, :func:`~filter_step`! If ``output_dt`` has not been set on the filtering object, it will default to the difference between successive time steps in the first grid defined in the parcels FieldSet. This may be a concern if using data which has been sampled at different frequencies in the input data files. Returns: Dict[str, Tuple[int, dask.array.Array]]: A dictionary of the advection data, mapping variable names to a pair. The first element is the index of the sampled timestep in the data, and the second element is a lazy dask array concatenating the forward and backward advection data. """ # seed all particles at gridpoints ps = self.particleset(time) # execute the sample-only kernel to efficiently grab the initial condition ps.kernel = self.sample_kernel ps.execute(self.sample_kernel, runtime=0, dt=self.advection_dt) # set up the temporary output file for the initial condition and # forward advection outfile = self._advection_cache_class(ps, self.output_dt, self.sample_variables, **self._advection_cache_kwargs) # now the forward advection kernel can run outfile.set_group("forward") ps.kernel = self.kernel ps.execute( self.kernel, runtime=self.window_size, dt=self.advection_dt, output_file=outfile, recovery={ parcels.ErrorCode.ErrorOutOfBounds: _recovery_kernel_out_of_bounds }, ) # reseed particles back on the grid, then advect backwards # we don't need any initial condition sampling since we've already done it outfile.set_group("backward") ps = self.particleset(time) ps.kernel = self.kernel ps.execute( self.kernel, runtime=self.window_size, dt=-self.advection_dt, output_file=outfile, recovery={ parcels.ErrorCode.ErrorOutOfBounds: _recovery_kernel_out_of_bounds }, ) # stitch together and filter all sample variables from the temporary # output data da_out = {} for v in self.sample_variables: # load data lazily as dask arrays, for forward and backward segments var_array_forward = da.from_array(outfile.data("forward")[v], chunks=(None, "auto"))[:-1, :] var_array_backward = da.from_array(outfile.data("backward")[v], chunks=(None, "auto"))[:-1, :] # get an index into the middle of the array time_index_data = var_array_backward.shape[0] - 1 # construct proper sequence by concatenating data and flipping the backward segment # for var_array_forward, skip the initial output for both the sample-only and # sample-advection kernels, which have meaningless data var_array = da.concatenate((da.flip(var_array_backward[1:, :], axis=0), var_array_forward)) da_out[v] = (time_index_data, var_array) if output_time: da_out["time"] = np.concatenate(( outfile.data("backward").attrs["time"][1:-1][::-1], outfile.data("forward").attrs["time"][:-1], )) return da_out
def filter_step(self, time_index, time): """Perform forward-backward advection at a single timestep.""" # seed all particles at gridpoints ps = self.particleset(time) # execute the sample-only kernel to efficiently grab the initial condition ps.kernel = self.sample_kernel ps.execute(self.sample_kernel, runtime=0, dt=self.advection_dt) # set up the temporary output file for the initial condition and # forward advection outfile = LagrangeParticleFile(ps, self.output_dt, self.sample_variables) # now the forward advection kernel can run outfile.set_group("forward") ps.kernel = self.kernel ps.execute( self.kernel, runtime=self.window_size, dt=self.advection_dt, output_file=outfile, recovery={ parcels.ErrorCode.ErrorOutOfBounds: recovery_kernel_out_of_bounds }, ) # reseed particles back on the grid, then advect backwards # we don't need any initial condition sampling since we've already done it outfile.set_group("backward") ps = self.particleset(time) ps.kernel = self.kernel ps.execute( self.kernel, runtime=self.window_size, dt=-self.advection_dt, output_file=outfile, recovery={ parcels.ErrorCode.ErrorOutOfBounds: recovery_kernel_out_of_bounds }, ) # stitch together and filter all sample variables from the temporary # output data da_out = {} for v in self.sample_variables: # load data lazily as dask arrays, for forward and backward segments var_array_forward = da.from_array(outfile.data("forward")[v], chunks=(None, "auto")) var_array_backward = da.from_array(outfile.data("backward")[v], chunks=(None, "auto")) # get an index into the middle of the array time_index_data = var_array_backward.shape[0] # construct proper sequence by concatenating data and flipping the backward segment # for var_array_forward, skip the initial output for both the sample-only and # sample-advection kernels, which have meaningless data var_array = da.concatenate((da.flip(var_array_backward[1:, :], axis=0), var_array_forward)) def filter_select(x): return signal.filtfilt(*self.inertial_filter, x)[..., time_index_data] # apply scipy filter as a ufunc # mapping an array to scalar over the first axis, automatically vectorize execution # and allow rechunking (since we have a chunk boundary across the first axis) filtered = da.apply_gufunc( filter_select, "(i)->()", var_array, axis=0, output_dtypes=var_array.dtype, allow_rechunk=True, ) da_out[v] = filtered.compute() return da_out