def compute_prefetch_keys(self, missed_key): """From the missed key, determine what to prefetch. Args: missed_key (string): Cached-cuboid key. Returns: (list): List of cache-cuboid keys to fetch. """ key_parts = missed_key.rsplit('&', 1) morton_id = key_parts[1] coords = ndlib.MortonXYZ(int(morton_id)) z = coords[2] coords_above = coords.copy() coords_above[2] = z + 1 mortonid_above = ndlib.XYZMorton(coords_above) key_above = '{}&{}'.format(key_parts[0], mortonid_above) if z - 1 < 0: return [key_above] coords_below = coords.copy() coords_below[2] = z - 1 mortonid_below = ndlib.XYZMorton(coords_below) key_below = '{}&{}'.format(key_parts[0], mortonid_below) return [key_above, key_below]
def cutout(self, resource, corner, extent, resolution, time_sample_range=None, filter_ids=None, iso=False, no_cache=False): """Extract a cube of arbitrary size. Need not be aligned to cuboid boundaries. corner represents the location of the cutout and extent the size. As an example in 1D, if asking for a corner of 3 and extent of 2, this would be the values at 3 and 4. Provide a list of ids to filter the cutout contents if desired. The list must be convertible to a numpy array via numpy.asarray(). Args: resource (spdb.project.BossResource): Data model info based on the request or target resource corner ((int, int, int)): the xyz location of the corner of the cutout extent ((int, int, int)): the xyz extents resolution (int): the resolution level time_sample_range (list((int)): a range of time samples to get [start, stop). Default is [0,1) if omitted filter_ids (optional[list]): Defaults to None. Otherwise, is a list of uint64 ids to filter cutout by. iso (bool): Flag indicating if you want to get to the "isotropic" version of a cuboid, if available no_cache (bool): True to read directly from S3 and bypass the cache. Returns: cube.Cube: The cutout data stored in a Cube instance Raises: (SPDBError): """ boss_logger = BossLogger() boss_logger.setLevel("info") blog = boss_logger.logger if not time_sample_range: # If not time sample list defined, used default of 0 time_sample_range = [0, 1] # if cutout is below resolution, get a smaller cube and scaleup # ONLY FOR ANNO CHANNELS - if data is missing on the current resolution but exists elsewhere...extrapolate # resource.get_channel().base_resolution is the "base" resolution and you assume data exists there. # If downsampled you don't have to worry about this. # currently we don't upsample annotations when hardening the database, so don't need to check for propagated. # Create namedtuple for consistency with re-sampling paths through the code result_tuple = namedtuple( 'ResampleCoords', ['corner', 'extent', 'x_pixel_offset', 'y_pixel_offset']) # Check if you need to scale a cutout due to off-base resolution cutout and the downsample state channel = resource.get_channel() if not channel.is_image(): # The channel is an annotation so we can dynamically re-sample base_res = channel.base_resolution if base_res > resolution and not resource.is_downsampled(): # Desired cutout is below base res in hierarchy (higher res image). Must up-sample cutout dynamically # Find the effective dimensions of the up-sampled cutout raise SpdbError( 'Not Implemented', 'Dynamic resolution up-sampling not yet implemented.', ErrorCodes.FUTURE) # cutout_coords = self._up_sample_cutout(resource, corner, extent, resolution) # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res] # cutout_resolution = base_res elif not channel.is_image( ) and base_res < resolution and not resource.is_downsampled(): # Currently, let's not support this. We can cutout a smaller cube and up-sample for the user, but do not # want to deal with cutting out large regions and down-sampling raise SpdbError( 'Not Implemented', 'Dynamic resolution down-sampling not yet implemented.', ErrorCodes.FUTURE) # If cutout is an annotation channel, above base resolution (lower res), and NOT propagated, down-sample # cutout_coords = self._down_sample_cutout(resource, corner, extent, resolution) # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res] # cutout_resolution = base_res else: # this is the default path when not DYNAMICALLY scaling the resolution # get the size of the image and cube [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[resolution] cutout_resolution = resolution # Create namedtuple for consistency with re-sampling paths through the code cutout_coords = result_tuple(corner, extent, None, None) else: # Resource is an image channel, so no re-sampling # get the size of the image and cube [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[resolution] cutout_resolution = resolution # Create namedtuple for consistency with re-sampling paths through the code cutout_coords = result_tuple(corner, extent, None, None) # Round to the nearest larger cube in all dimensions z_start = cutout_coords.corner[2] // z_cube_dim y_start = cutout_coords.corner[1] // y_cube_dim x_start = cutout_coords.corner[0] // x_cube_dim z_num_cubes = (cutout_coords.corner[2] + cutout_coords.extent[2] + z_cube_dim - 1) // z_cube_dim - z_start y_num_cubes = (cutout_coords.corner[1] + cutout_coords.extent[1] + y_cube_dim - 1) // y_cube_dim - y_start x_num_cubes = (cutout_coords.corner[0] + cutout_coords.extent[0] + x_cube_dim - 1) // x_cube_dim - x_start # Initialize the final output cube (before trim operation since adding full cuboids) out_cube = Cube.create_cube(resource, [ x_num_cubes * x_cube_dim, y_num_cubes * y_cube_dim, z_num_cubes * z_cube_dim ], time_sample_range) # Build a list of indexes to access # TODO: Move this for loop directly into c-lib list_of_idxs = [] for z in range(z_num_cubes): for y in range(y_num_cubes): for x in range(x_num_cubes): morton_idx = ndlib.XYZMorton( [x + x_start, y + y_start, z + z_start]) list_of_idxs.append(morton_idx) # Sort the indexes in Morton order list_of_idxs.sort() # xyz offset stored for later use lowxyz = ndlib.MortonXYZ(list_of_idxs[0]) # Get index of missing keys for cuboids to read missing_key_idx, cached_key_idx, all_keys = self.kvio.get_missing_read_cache_keys( resource, cutout_resolution, time_sample_range, list_of_idxs, iso=iso) # Wait for cuboids that are currently being written to finish start_time = datetime.now() dirty_keys = all_keys blog.debug( "Waiting for {} writes to finish before read can complete".format( len(dirty_keys))) while dirty_keys: dirty_flags = self.kvio.is_dirty(dirty_keys) dirty_keys_temp, clean_keys = [], [] for key, flag in zip(dirty_keys, dirty_flags): (dirty_keys_temp if flag else clean_keys).append(key) dirty_keys = dirty_keys_temp if (datetime.now() - start_time).seconds > self.dirty_read_timeout: # Took too long! Something must have crashed raise SpdbError( '{} second timeout reached while waiting for dirty cubes to be flushed.' .format(self.dirty_read_timeout), ErrorCodes.ASYNC_ERROR) # Sleep a bit so you don't kill the DB time.sleep(0.05) # # All dirty cubes flushed, can begin reading. # s3_key_idx = [] cache_cuboids = [] s3_cuboids = [] zero_cuboids = [] if no_cache: # If not using the cache, then consider all keys are missing. blog.debug("Bypassing cache; loading all cuboids directly from S3") missing_key_idx = [i for i in range(len(all_keys))] if len(missing_key_idx) > 0: # There are keys that are missing in the cache # Get index of missing keys that are in S3 s3_key_idx, zero_key_idx = self.objectio.cuboids_exist( all_keys, missing_key_idx) if len(s3_key_idx) > 0: if no_cache: temp_keys = self.objectio.cached_cuboid_to_object_keys( itemgetter(*s3_key_idx)(all_keys)) # Get objects temp_cubes = self.objectio.get_objects(temp_keys) # keys will be just the morton id and time sample. keys_and_cubes = [] for key, cube in zip(temp_keys, temp_cubes): vals = key.split("&") keys_and_cubes.append( (int(vals[-1]), int(vals[-2]), cube)) s3_cuboids = self.sort_cubes(resource, keys_and_cubes) else: # Load data into cache. blog.debug("Data missing from cache, but present in S3") if len(s3_key_idx) > self.read_lambda_threshold: # Trigger page-in of available blocks from object store and wait for completion blog.debug("Triggering Lambda Page-in") self.page_in_cubes(itemgetter(*s3_key_idx)(all_keys)) else: # Read cuboids from S3 into cache directly # Convert cuboid-cache keys to object keys blog.debug("Paging-in Keys Directly") temp_keys = self.objectio.cached_cuboid_to_object_keys( itemgetter(*s3_key_idx)(all_keys)) # Get objects temp_cubes = self.objectio.get_objects(temp_keys) # write to cache blog.debug("put keys on direct page in: {}".format( itemgetter(*s3_key_idx)(all_keys))) self.kvio.put_cubes( itemgetter(*s3_key_idx)(all_keys), temp_cubes) if len(zero_key_idx) > 0: if not no_cache: blog.debug("Data missing in cache, but not in S3") else: blog.debug( "No data for some keys, making cuboids with zeros") # Keys that don't exist in object store render as zeros [x_cube_dim, y_cube_dim, z_cube_dim] = CUBOIDSIZE[resolution] for idx in zero_key_idx: parts, m_id = all_keys[idx].rsplit("&", 1) _, t_start = parts.rsplit("&", 1) temp_cube = Cube.create_cube( resource, [x_cube_dim, y_cube_dim, z_cube_dim], [int(t_start), int(t_start) + 1]) temp_cube.morton_id = int(m_id) temp_cube.zeros() zero_cuboids.append(temp_cube) # Get cubes from the cache database (either already there or freshly paged in) if not no_cache: # TODO: Optimize access to cache data and checking for dirty cubes if len(s3_key_idx) > 0: blog.debug("Get cubes from cache that were paged in from S3") blog.debug(itemgetter(*s3_key_idx)(all_keys)) s3_cuboids = self.get_cubes(resource, itemgetter(*s3_key_idx)(all_keys)) # Record misses that were found in S3 for possible pre-fetching self.cache_state.add_cache_misses( itemgetter(*s3_key_idx)(all_keys)) # Get previously cached cubes, waiting for dirty cubes to be updated if needed if len(cached_key_idx) > 0: blog.debug("Get cubes that were already present in the cache") # Get the cached keys once in list form cached_keys_list = itemgetter(*cached_key_idx)(all_keys) if isinstance(cached_keys_list, str): cached_keys_list = [cached_keys_list] if isinstance(cached_keys_list, tuple): cached_keys_list = list(cached_keys_list) # Split clean and dirty keys dirty_flags = self.kvio.is_dirty(cached_keys_list) dirty_keys, clean_keys = [], [] for key, flag in zip(cached_keys_list, dirty_flags): (dirty_keys if flag else clean_keys).append(key) # Get all the clean cubes immediately, removing them from the list of cached keys to get for k in clean_keys: cached_keys_list.remove(k) cache_cuboids.extend(self.get_cubes(resource, clean_keys)) # Get the dirty ones when you can with a timeout start_time = datetime.now() while dirty_keys: dirty_flags = self.kvio.is_dirty(cached_keys_list) dirty_keys, clean_keys = [], [] for key, flag in zip(cached_keys_list, dirty_flags): (dirty_keys if flag else clean_keys).append(key) if clean_keys: # Some keys are ready now. Remove from list and get them for k in clean_keys: cached_keys_list.remove(k) cache_cuboids.extend( self.get_cubes(resource, clean_keys)) if (datetime.now() - start_time).seconds > self.dirty_read_timeout: # Took too long! Something must have crashed raise SpdbError( '{} second timeout reached while waiting for dirty cubes to be flushed.' .format(self.dirty_read_timeout), ErrorCodes.ASYNC_ERROR) # Sleep a bit so you don't kill the DB time.sleep(0.05) # # At this point, have all cuboids whether or not the cache was used. # # Add all cuboids (which have all time samples packed in already) to final cube of data for cube in cache_cuboids + s3_cuboids + zero_cuboids: # Compute offset so data inserted properly curxyz = ndlib.MortonXYZ(cube.morton_id) offset = [ curxyz[0] - lowxyz[0], curxyz[1] - lowxyz[1], curxyz[2] - lowxyz[2] ] # add it to the output cube out_cube.add_data(cube, offset) # A smaller cube was cutout due to off-base resolution query: up-sample and trim base_res = channel.base_resolution if not channel.is_image( ) and base_res > resolution and not resource.is_downsampled(): raise SpdbError( 'Not Implemented', 'Dynamic resolution up-sampling not yet implemented.', ErrorCodes.FUTURE) # TODO: implement dynamic re-sampling # out_cube.zoomData(base_res - resolution) # need to trim based on the cube cutout at new resolution # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.x_pixel_offset, # extent[0], # corner[1] % (y_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.y_pixel_offset, # extent[1], # corner[2] % z_cube_dim, # extent[2]) # A larger cube was cutout due to off-base resolution query: down-sample and trim elif not channel.is_image( ) and base_res < resolution and not resource.is_downsampled(): raise SpdbError( 'Not Implemented', 'Dynamic resolution down-sampling not yet implemented.', ErrorCodes.FUTURE) # out_cube.downScale(resolution - base_res) # # need to trim based on the cube cutout at new resolution # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))), # extent[0], # corner[1] % (y_cube_dim * (2 ** (base_res - resolution))), # extent[1], # corner[2] % z_cube_dim, # extent[2]) # Trim cube since cutout was not cuboid aligned elif extent[0] % x_cube_dim == 0 and \ extent[1] % y_cube_dim == 0 and \ extent[2] % z_cube_dim == 0 and \ corner[0] % x_cube_dim == 0 and \ corner[1] % y_cube_dim == 0 and \ corner[2] % z_cube_dim == 0: # Cube is already the correct dimensions pass else: out_cube.trim(corner[0] % x_cube_dim, extent[0], corner[1] % y_cube_dim, extent[1], corner[2] % z_cube_dim, extent[2]) # Filter out ids not in list. if filter_ids is not None: try: out_cube.data = ndlib.filter_ctype_OMP(out_cube.data, filter_ids) except ValueError as ve: raise SpdbError( 'filter_ids probably not convertible to numpy uint64 array: {}' .format(ve), ErrorCodes.DATATYPE_MISMATCH) from ve except: raise SpdbError('unknown error filtering cutout', ErrorCodes.SPDB_ERROR) return out_cube
def from_morton(cls, morton): x, y, z = ndlib.MortonXYZ(morton) return cls(x=x, y=y, z=z)
#!/usr/bin/env python3.4 # This lambda tests that it can read from user-data, and access the cache_state_db # then import spdb and access the compiled c_lib # # { # "lambda-name": "test", # } # print("in test_lambda") import bossutils import spdb from spdb.spatialdb import state print("finished part1 imports") print("checking c_lib") from spdb.c_lib import ndlib print("finished c_lib imports.") id = ndlib.MortonXYZ(10) print("results:") for w in id: print(str(w)) print("finished part2")