def __init__(self, message, code=ErrorCodes.KEYCLOAK_EXCEPTION): """ Custom HTTP error class for converting a KeyCloakError exception into a JsonResponse Args: message (str): Error message to send back to user Note: If called in an exception handler, expects the exception to be a KeyCloakError """ ex = sys.exc_info()[1] self.status_code = ex.status if ex else RESP_CODES[code] data = { 'status': self.status_code, 'code': ErrorCodes.KEYCLOAK_EXCEPTION, 'message': message } if ex: data.update(ex.data) msg = "BossKeycloakError" for k in data: msg += " - {}: {}".format(k.capitalize(), data[k]) log = BossLogger().logger log.info(msg) super(BossKeycloakError, self).__init__(data)
def __init__(self, message, code=ErrorCodes.KEYCLOAK_EXCEPTION): """ Custom HTTP error class for converting a KeyCloakError exception into a JsonResponse Args: message (str): Error message to send back to user Note: If called in an exception handler, expects the exception to be a KeyCloakError """ ex = sys.exc_info()[1] self.status_code = ex.status if ex else RESP_CODES[code] data = { 'status': self.status_code, 'code': ErrorCodes.KEYCLOAK_EXCEPTION, 'message': message } if ex: if isinstance(ex.data, str): val = json.loads(ex.data) else: val = ex.data data.update(val) msg = "BossKeycloakError" for k in data: msg += " - {}: {}".format(k.capitalize(), data[k]) log = BossLogger().logger log.info(msg) super(BossKeycloakError, self).__init__(data)
def __init__(self, message, code): """ Custom HTTP error class :param status: HTTP Status code :type status: int :param code: An optional, arbitrary, and unique code to identify where the error was generated :type code: int :param message: Message to provide feedback to the user :return: """ # Set status code self.status_code = RESP_CODES[code] # Log blog = BossLogger().logger blog.info("BossHTTPError - Status: {0} - Code: {1} - Message: {2}".format(self.status_code, code, message)) # Return data = {'status': self.status_code, 'code': code, 'message': message} super(BossHTTPError, self).__init__(data)
def __init__(self, *args): """ Constructor. Args: *args: arg[0] should be message and arg[1] should be SpdbError. """ # Log # TODO: Look into removing boss logger dependency if len(args) > 1: blog = BossLogger().logger blog.error("SpdbError - Message: {0} - Code: {1}".format(args[0], args[1])) self.message = args[0] self.error_code = args[1] return if len(args) == 1: self.message = args[0] self.error_code = ErrorCodes.SPDB_ERROR return self.message = 'No error message given.' self.error_code = ErrorCodes.SPDB_ERROR
def post(self, request, ingest_job_id): """ Signal an ingest job is complete and should be cleaned up by POSTing to this view Args: request: Django Rest framework Request object ingest_job_id: Ingest job id Returns: """ try: blog = BossLogger().logger ingest_mgmr = IngestManager() ingest_job = ingest_mgmr.get_ingest_job(ingest_job_id) if ingest_job.status == IngestJob.PREPARING: # If status is Preparing. Deny return BossHTTPError( "You cannot complete a job that is still preparing. You must cancel instead.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == IngestJob.UPLOADING: # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can start verification of an ingest job", ErrorCodes.INGEST_NOT_CREATOR) # Disable verification until it is reworked and always return # success for now. blog.info( 'Telling client job complete - completion/verificcation to be fixed later.' ) return Response(status=status.HTTP_204_NO_CONTENT) """ blog.info('Verifying ingest job {}'.format(ingest_job_id)) # Start verification process if not ingest_mgmr.verify_ingest_job(ingest_job): # Ingest not finished return Response(status=status.HTTP_202_ACCEPTED) """ # Verification successful, fall through to the complete process. elif ingest_job.status == IngestJob.COMPLETE: # If status is already Complete, just return another 204 return Response(status=status.HTTP_204_NO_CONTENT) elif ingest_job.status == IngestJob.DELETED: # Job had already been cancelled return BossHTTPError("Ingest job has already been cancelled.", ErrorCodes.BAD_REQUEST) elif ingest_job.status == IngestJob.FAILED: # Job had failed return BossHTTPError( "Ingest job has failed during creation. You must Cancel instead.", ErrorCodes.BAD_REQUEST) # Complete the job. blog.info("Completing Ingest Job {}".format(ingest_job_id)) # Check if user is the ingest job creator or the sys admin if not self.is_user_or_admin(request, ingest_job): return BossHTTPError( "Only the creator or admin can complete an ingest job", ErrorCodes.INGEST_NOT_CREATOR) # TODO SH This is a quick fix to make sure the ingest-client does not run close option. # the clean up code commented out below, because it is not working correctly. return Response(status=status.HTTP_204_NO_CONTENT) # if ingest_job.ingest_type == IngestJob.TILE_INGEST: # # Check if any messages remain in the ingest queue # ingest_queue = ingest_mgmr.get_ingest_job_ingest_queue(ingest_job) # num_messages_in_queue = int(ingest_queue.queue.attributes['ApproximateNumberOfMessages']) # # # Kick off extra lambdas just in case # if num_messages_in_queue: # blog.info("{} messages remaining in Ingest Queue".format(num_messages_in_queue)) # ingest_mgmr.invoke_ingest_lambda(ingest_job, num_messages_in_queue) # # # Give lambda a few seconds to fire things off # time.sleep(30) # # ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # elif ingest_job.ingest_type == IngestJob.VOLUMETRIC_INGEST: # ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # # ToDo: call cleanup method for volumetric ingests. Don't want # # to cleanup until after testing with real data. # #ingest_mgmr.cleanup_ingest_job(ingest_job, IngestJob.COMPLETE) # # blog.info("Complete successful") # return Response(status=status.HTTP_204_NO_CONTENT) except BossError as err: return err.to_http() except Exception as err: blog.error('Caught general exception: {}'.format(err)) return BossError("{}".format(err), ErrorCodes.BOSS_SYSTEM_ERROR).to_http()
import json from functools import wraps from rest_framework.views import APIView from rest_framework.response import Response from bosscore.error import BossKeycloakError, BossHTTPError, ErrorCodes from bosscore.models import BossRole from bosscore.serializers import UserSerializer, BossRoleSerializer from bosscore.privileges import check_role, BossPrivilegeManager from bossutils.keycloak import KeyCloakClient, KeyCloakError from bossutils.logger import BossLogger LOG = BossLogger().logger #### ## Should there be a hard coded list of valid roles, or shoulda all methods defer ## to Keycloak to make the check that the role is valid. Basically, do we expect ## for different applications to have their own roles? #### VALID_ROLES = ('admin', 'user-manager', 'resource-manager') def validate_role(arg=3, kwarg="role_name"): """ Validate the role / role_name function argument Args: kwarg (string): The index into the kwargs dictionary of keyword arguments Note: either arg or kwarg should be specified, based on the argument to check
def get(self, request, collection, experiment, channel): """View to provide a channel's downsample status and properties Args: request: DRF Request object collection (str): Unique Collection identifier, indicating which collection you want to access experiment (str): Experiment identifier, indicating which experiment you want to access channel (str): Channel identifier, indicating which channel you want to access Returns: """ if "iso" in request.query_params: if request.query_params["iso"].lower() == "true": iso = True else: iso = False else: iso = False # Process request and validate try: request_args = { "service": "downsample", "collection_name": collection, "experiment_name": experiment, "channel_name": channel } req = BossRequest(request, request_args) except BossError as err: return err.to_http() # Convert to Resource resource = project.BossResourceDjango(req) # Get Status channel = resource.get_channel() experiment = resource.get_experiment() to_renderer = {"status": channel.downsample_status} # Check Step Function if status is in-progress and update if channel.downsample_status == "IN_PROGRESS": lookup_key = resource.get_lookup_key() _, exp_id, _ = lookup_key.split("&") # Get channel object channel_obj = Channel.objects.get(name=channel.name, experiment=int(exp_id)) # Update the status from the step function session = bossutils.aws.get_session() status = bossutils.aws.sfn_status(session, channel_obj.downsample_arn) if status == "SUCCEEDED": # Change to DOWNSAMPLED channel_obj.downsample_status = "DOWNSAMPLED" channel_obj.save() to_renderer["status"] = "DOWNSAMPLED" # DP NOTE: This code should be moved to spdb when change # tracking is added to automatically calculate # frame extents for the user # DP NOTE: Clear the cache of any cubes for the channel # This is to prevent serving stale data after # (re)downsampling log = BossLogger().logger for pattern in ("CACHED-CUBOID&"+lookup_key+"&*", "CACHED-CUBOID&ISO&"+lookup_key+"&*"): log.debug("Clearing cache of {} cubes".format(pattern)) try: cache = RedisKVIO(settings.KVIO_SETTINGS) pipe = cache.cache_client.pipeline() for key in cache.cache_client.scan_iter(match=pattern): pipe.delete(key) pipe.execute() except Exception as ex: log.exception("Problem clearing cache after downsample finished") elif status == "FAILED" or status == "TIMED_OUT": # Change status to FAILED channel_obj = Channel.objects.get(name=channel.name, experiment=int(exp_id)) channel_obj.downsample_status = "FAILED" channel_obj.save() to_renderer["status"] = "FAILED" # Get hierarchy levels to_renderer["num_hierarchy_levels"] = experiment.num_hierarchy_levels # Gen Voxel dims voxel_size = {} voxel_dims = resource.get_downsampled_voxel_dims(iso=iso) for res, dims in enumerate(voxel_dims): voxel_size["{}".format(res)] = dims to_renderer["voxel_size"] = voxel_size # Gen Extent dims extent = {} extent_dims = resource.get_downsampled_extent_dims(iso=iso) for res, dims in enumerate(extent_dims): extent["{}".format(res)] = dims to_renderer["extent"] = extent # Get Cuboid dims cuboid_size = {} for res in range(0, experiment.num_hierarchy_levels): cuboid_size["{}".format(res)] = CUBOIDSIZE[res] to_renderer["cuboid_size"] = cuboid_size # Send data to renderer return Response(to_renderer)
def write_cuboid(self, resource, corner, resolution, cuboid_data, time_sample_start=0, iso=False): """ Write a 3D/4D volume to the key-value store. Used by API/cache in consistent mode as it reconciles writes If cuboid_data.ndim == 4, data in time-series format - assume t,z,y,x If cuboid_data.ndim == 3, data not in time-series format - assume z,y,x Args: resource (project.BossResource): Data model info based on the request or target resource corner ((int, int, int)): the xyz locatiotn of the corner of the cuout resolution (int): the resolution level cuboid_data (numpy.ndarray): Matrix of data to write as cuboids time_sample_start (int): if cuboid_data.ndim == 3, the time sample for the data if cuboid_data.ndim == 4, the time sample for cuboid_data[0, :, :, :] iso (bool): Flag indicating if you want to write to the "isotropic" version of a channel, if available Returns: None """ boss_logger = BossLogger() boss_logger.setLevel("info") blog = boss_logger.logger # Check if the resource is locked if self.resource_locked(resource.get_lookup_key()): raise SpdbError('Resource Locked', 'The requested resource is locked due to excessive write errors. Contact support.', ErrorCodes.RESOURCE_LOCKED) # TODO LR: This is temporary logic that will be removed after finalizing the current large data ingest. # Check to make sure the user is writing data at the BASE RESOLUTION or BASE RESOLUTION + 1 channel = resource.get_channel() if channel.base_resolution != resolution and resolution != channel.base_resolution+1: raise SpdbError('Resolution Mismatch', "You can only write data to a channel's base resolution or one resolution above it. Base Resolution: {}, Request Resolution: {}".format(channel.base_resolution, resolution), ErrorCodes.RESOLUTION_MISMATCH) # Check if time-series if cuboid_data.ndim == 4: # Time-series - coords in xyz, data in zyx so shuffle to be consistent and drop time value dim = cuboid_data.shape[::-1][:-1] time_sample_stop = time_sample_start + cuboid_data.shape[0] elif cuboid_data.ndim == 3: # Not time-series - coords in xyz, data in zyx so shuffle to be consistent dim = cuboid_data.shape[::-1] cuboid_data = np.expand_dims(cuboid_data, axis=0) time_sample_stop = time_sample_start + 1 else: raise SpdbError('Invalid Data Shape', 'Matrix must be 4D or 3D', ErrorCodes.SPDB_ERROR) # Get the size of cuboids [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[resolution] # Round to the nearest larger cube in all dimensions [x_start, y_start, z_start] = list(map(floordiv, corner, cube_dim)) z_num_cubes = (corner[2] + dim[2] + z_cube_dim - 1) // z_cube_dim - z_start y_num_cubes = (corner[1] + dim[1] + y_cube_dim - 1) // y_cube_dim - y_start x_num_cubes = (corner[0] + dim[0] + x_cube_dim - 1) // x_cube_dim - x_start [x_offset, y_offset, z_offset] = list(map(mod, corner, cube_dim)) # Populate the data buffer data_buffer = np.zeros([time_sample_stop - time_sample_start] + [z_num_cubes * z_cube_dim, y_num_cubes * y_cube_dim, x_num_cubes * x_cube_dim], dtype=cuboid_data.dtype, order="C") data_buffer[:, z_offset:z_offset + dim[2], y_offset:y_offset + dim[1], x_offset:x_offset + dim[0]] = cuboid_data # Get keys ready experiment = resource.get_experiment() if iso is True and resolution > resource.get_isotropic_level() and experiment.hierarchy_method.lower() == "anisotropic": base_write_cuboid_key = "WRITE-CUBOID&ISO&{}&{}".format(resource.get_lookup_key(), resolution) else: base_write_cuboid_key = "WRITE-CUBOID&{}&{}".format(resource.get_lookup_key(), resolution) blog.info("Writing Cuboid - Base Key: {}".format(base_write_cuboid_key)) # Get current cube from db, merge with new cube, write back to the to db # TODO: Move splitting up data and computing morton into c-lib as single method page_out_cnt = 0 for z in range(z_num_cubes): for y in range(y_num_cubes): for x in range(x_num_cubes): # Get the morton ID for the cube morton_idx = ndlib.XYZMorton([x + x_start, y + y_start, z + z_start]) # Get sub-cube temp_cube = Cube.create_cube(resource, [x_cube_dim, y_cube_dim, z_cube_dim], [time_sample_start, time_sample_stop]) temp_cube.data = np.ascontiguousarray(data_buffer[:, z * z_cube_dim:(z + 1) * z_cube_dim, y * y_cube_dim:(y + 1) * y_cube_dim, x * x_cube_dim:(x + 1) * x_cube_dim], dtype=data_buffer.dtype) # For each time sample put cube into write-buffer and add to temp page out key for t in range(time_sample_start, time_sample_stop): # Add cuboid to write buffer write_cuboid_key = self.kvio.insert_cube_in_write_buffer(base_write_cuboid_key, t, morton_idx, temp_cube.to_blosc_by_time_index(t)) # Page Out Attempt Loop temp_page_out_key = "TEMP&{}".format(uuid.uuid4().hex) # Check for page out if self.cache_state.in_page_out(temp_page_out_key, resource.get_lookup_key(), resolution, morton_idx, t): blog.info("Writing Cuboid - Delayed Write: {}".format(write_cuboid_key)) # Delay Write! self.cache_state.add_to_delayed_write(write_cuboid_key, resource.get_lookup_key(), resolution, morton_idx, t, resource.to_json()) # You are done. continue else: # Attempt to get write slot by checking page out in_page_out = self.cache_state.add_to_page_out(temp_page_out_key, resource.get_lookup_key(), resolution, morton_idx, t) if not in_page_out: # Good to trigger lambda! self.objectio.trigger_page_out({"kv_config": self.kv_config, "state_config": self.state_conf, "object_store_config": self.object_store_config}, write_cuboid_key, resource) page_out_cnt += 1 # All done. continue. else: # Ended up in page out during transaction. Make delayed write. blog.info("Writing Cuboid - Delayed Write: {}".format(write_cuboid_key)) self.cache_state.add_to_delayed_write(write_cuboid_key, resource.get_lookup_key(), resolution, morton_idx, t, resource.to_json()) blog.info("Triggered {} Page Out Operations".format(page_out_cnt))
def cutout(self, resource, corner, extent, resolution, time_sample_range=None, filter_ids=None, iso=False, access_mode="cache"): """Extract a cube of arbitrary size. Need not be aligned to cuboid boundaries. corner represents the location of the cutout and extent the size. As an example in 1D, if asking for a corner of 3 and extent of 2, this would be the values at 3 and 4. Provide a list of ids to filter the cutout contents if desired. The list must be convertible to a numpy array via numpy.asarray(). Args: resource (spdb.project.BossResource): Data model info based on the request or target resource corner ((int, int, int)): the xyz location of the corner of the cutout extent ((int, int, int)): the xyz extents resolution (int): the resolution level time_sample_range (list((int)): a range of time samples to get [start, stop). Default is [0,1) if omitted filter_ids (optional[list]): Defaults to None. Otherwise, is a list of uint64 ids to filter cutout by. iso (bool): Flag indicating if you want to get to the "isotropic" version of a cuboid, if available access_mode (str): Indicates one of three possible modes. cache = Will use cache and check for dirty keys no_cache = Will skip checking the cache but check for dirty keys raw = Will skip checking the cache and dirty keys Returns: cube.Cube: The cutout data stored in a Cube instance Raises: (SPDBError): """ boss_logger = BossLogger() boss_logger.setLevel("info") blog = boss_logger.logger if not time_sample_range: # If not time sample list defined, used default of 0 time_sample_range = [0, 1] # if cutout is below resolution, get a smaller cube and scaleup # ONLY FOR ANNO CHANNELS - if data is missing on the current resolution but exists elsewhere...extrapolate # resource.get_channel().base_resolution is the "base" resolution and you assume data exists there. # If downsampled you don't have to worry about this. # currently we don't upsample annotations when hardening the database, so don't need to check for propagated. # Create namedtuple for consistency with re-sampling paths through the code result_tuple = namedtuple('ResampleCoords', ['corner', 'extent', 'x_pixel_offset', 'y_pixel_offset']) # Check if you need to scale a cutout due to off-base resolution cutout and the downsample state channel = resource.get_channel() if not channel.is_image(): # The channel is an annotation so we can dynamically re-sample base_res = channel.base_resolution if base_res > resolution and not resource.is_downsampled(): # Desired cutout is below base res in hierarchy (higher res image). Must up-sample cutout dynamically # Find the effective dimensions of the up-sampled cutout raise SpdbError('Not Implemented', 'Dynamic resolution up-sampling not yet implemented.', ErrorCodes.FUTURE) # cutout_coords = self._up_sample_cutout(resource, corner, extent, resolution) # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res] # cutout_resolution = base_res elif not channel.is_image() and base_res < resolution and not resource.is_downsampled(): # Currently, let's not support this. We can cutout a smaller cube and up-sample for the user, but do not # want to deal with cutting out large regions and down-sampling raise SpdbError('Not Implemented', 'Dynamic resolution down-sampling not yet implemented.', ErrorCodes.FUTURE) # If cutout is an annotation channel, above base resolution (lower res), and NOT propagated, down-sample # cutout_coords = self._down_sample_cutout(resource, corner, extent, resolution) # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res] # cutout_resolution = base_res else: # this is the default path when not DYNAMICALLY scaling the resolution # get the size of the image and cube [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[resolution] cutout_resolution = resolution # Create namedtuple for consistency with re-sampling paths through the code cutout_coords = result_tuple(corner, extent, None, None) else: # Resource is an image channel, so no re-sampling # get the size of the image and cube [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[resolution] cutout_resolution = resolution # Create namedtuple for consistency with re-sampling paths through the code cutout_coords = result_tuple(corner, extent, None, None) # Round to the nearest larger cube in all dimensions z_start = cutout_coords.corner[2] // z_cube_dim y_start = cutout_coords.corner[1] // y_cube_dim x_start = cutout_coords.corner[0] // x_cube_dim z_num_cubes = (cutout_coords.corner[2] + cutout_coords.extent[2] + z_cube_dim - 1) // z_cube_dim - z_start y_num_cubes = (cutout_coords.corner[1] + cutout_coords.extent[1] + y_cube_dim - 1) // y_cube_dim - y_start x_num_cubes = (cutout_coords.corner[0] + cutout_coords.extent[0] + x_cube_dim - 1) // x_cube_dim - x_start # Initialize the final output cube (before trim operation since adding full cuboids) out_cube = Cube.create_cube(resource, [x_num_cubes * x_cube_dim, y_num_cubes * y_cube_dim, z_num_cubes * z_cube_dim], time_sample_range) # Build a list of indexes to access # TODO: Move this for loop directly into c-lib list_of_idxs = [] for z in range(z_num_cubes): for y in range(y_num_cubes): for x in range(x_num_cubes): morton_idx = ndlib.XYZMorton([x + x_start, y + y_start, z + z_start]) list_of_idxs.append(morton_idx) # Sort the indexes in Morton order list_of_idxs.sort() # xyz offset stored for later use lowxyz = ndlib.MortonXYZ(list_of_idxs[0]) # If the user specifies the access_mode to be raw, then the system will bypass checking for dirty keys. # This option is only recommended for large quickly scaling ingest jobs. if access_mode == "raw": blog.info("In access_mode {}, bypassing write check of dirty keys".format(access_mode)) missing_key_idx = [] cached_key_idx = [] all_keys = self.kvio.generate_cached_cuboid_keys(resource, cutout_resolution, list(range(*time_sample_range)), list_of_idxs, iso=iso) # If the user specified either no_cache or cache as the access_mode. Then the system will check for dirty keys. else: # Get index of missing keys for cuboids to read blog.info("In access_mode {}, checking for dirty keys".format(access_mode)) missing_key_idx, cached_key_idx, all_keys = self.kvio.get_missing_read_cache_keys(resource, cutout_resolution, time_sample_range, list_of_idxs, iso=iso) # Wait for cuboids that are currently being written to finish start_time = datetime.now() dirty_keys = all_keys blog.debug("Waiting for {} writes to finish before read can complete".format(len(dirty_keys))) while dirty_keys: dirty_flags = self.kvio.is_dirty(dirty_keys) dirty_keys_temp, clean_keys = [], [] for key, flag in zip(dirty_keys, dirty_flags): (dirty_keys_temp if flag else clean_keys).append(key) dirty_keys = dirty_keys_temp if (datetime.now() - start_time).seconds > self.dirty_read_timeout: # Took too long! Something must have crashed raise SpdbError('{} second timeout reached while waiting for dirty cubes to be flushed.'.format( self.dirty_read_timeout), ErrorCodes.ASYNC_ERROR) # Sleep a bit so you don't kill the DB time.sleep(0.05) # # All dirty cubes flushed, can begin reading. # s3_key_idx = [] cache_cuboids = [] s3_cuboids = [] zero_cuboids = [] # If access_mode is either raw or no_cache, then bypass the cache and load all cuboids directly from S3 if access_mode == "no_cache" or access_mode == "raw": blog.info("In access_mode {}, bypassing cache".format(access_mode)) # If not using the cache or raw flags, then consider all keys are missing. missing_key_idx = [i for i in range(len(all_keys))] if len(missing_key_idx) > 0: # There are keys that are missing in the cache # Get index of missing keys that are in S3 s3_key_idx, zero_key_idx = self.objectio.cuboids_exist(all_keys, missing_key_idx) if len(s3_key_idx) > 0: if access_mode == "no_cache" or access_mode == "raw": temp_keys = self.objectio.cached_cuboid_to_object_keys(itemgetter(*s3_key_idx)(all_keys)) # Get objects temp_cubes = self.objectio.get_objects(temp_keys) # keys will be just the morton id and time sample. keys_and_cubes = [] for key, cube in zip(temp_keys, temp_cubes): vals = key.split("&") keys_and_cubes.append((int(vals[-1]), int(vals[-2]), cube)) s3_cuboids = self.sort_cubes(resource, keys_and_cubes) else: # Load data into cache. blog.debug("Data missing from cache, but present in S3") if len(s3_key_idx) > self.read_lambda_threshold: # Trigger page-in of available blocks from object store and wait for completion blog.debug("Triggering Lambda Page-in") self.page_in_cubes(itemgetter(*s3_key_idx)(all_keys)) else: # Read cuboids from S3 into cache directly # Convert cuboid-cache keys to object keys blog.debug("Paging-in Keys Directly") temp_keys = self.objectio.cached_cuboid_to_object_keys(itemgetter(*s3_key_idx)(all_keys)) # Get objects temp_cubes = self.objectio.get_objects(temp_keys) # write to cache blog.debug("put keys on direct page in: {}".format(itemgetter(*s3_key_idx)(all_keys))) self.kvio.put_cubes(itemgetter(*s3_key_idx)(all_keys), temp_cubes) if len(zero_key_idx) > 0: if access_mode == "cache": blog.debug("Data missing in cache, but not in S3") else: blog.debug("No data for some keys, making cuboids with zeros") # Keys that don't exist in object store render as zeros [x_cube_dim, y_cube_dim, z_cube_dim] = CUBOIDSIZE[resolution] for idx in zero_key_idx: parts, m_id = all_keys[idx].rsplit("&", 1) _, t_start = parts.rsplit("&", 1) temp_cube = Cube.create_cube(resource, [x_cube_dim, y_cube_dim, z_cube_dim], [int(t_start), int(t_start) + 1]) temp_cube.morton_id = int(m_id) temp_cube.zeros() zero_cuboids.append(temp_cube) # Get cubes from the cache database (either already there or freshly paged in) if access_mode =="cache": blog.info("In access_mode {}, using cache".format(access_mode)) # TODO: Optimize access to cache data and checking for dirty cubes if len(s3_key_idx) > 0: blog.debug("Get cubes from cache that were paged in from S3") blog.debug(itemgetter(*s3_key_idx)(all_keys)) s3_cuboids = self.get_cubes(resource, itemgetter(*s3_key_idx)(all_keys)) # Record misses that were found in S3 for possible pre-fetching self.cache_state.add_cache_misses(itemgetter(*s3_key_idx)(all_keys)) # Get previously cached cubes, waiting for dirty cubes to be updated if needed if len(cached_key_idx) > 0: blog.debug("Get cubes that were already present in the cache") # Get the cached keys once in list form cached_keys_list = itemgetter(*cached_key_idx)(all_keys) if isinstance(cached_keys_list, str): cached_keys_list = [cached_keys_list] if isinstance(cached_keys_list, tuple): cached_keys_list = list(cached_keys_list) # Split clean and dirty keys dirty_flags = self.kvio.is_dirty(cached_keys_list) dirty_keys, clean_keys = [], [] for key, flag in zip(cached_keys_list, dirty_flags): (dirty_keys if flag else clean_keys).append(key) # Get all the clean cubes immediately, removing them from the list of cached keys to get for k in clean_keys: cached_keys_list.remove(k) cache_cuboids.extend(self.get_cubes(resource, clean_keys)) # Get the dirty ones when you can with a timeout start_time = datetime.now() while dirty_keys: dirty_flags = self.kvio.is_dirty(cached_keys_list) dirty_keys, clean_keys = [], [] for key, flag in zip(cached_keys_list, dirty_flags): (dirty_keys if flag else clean_keys).append(key) if clean_keys: # Some keys are ready now. Remove from list and get them for k in clean_keys: cached_keys_list.remove(k) cache_cuboids.extend(self.get_cubes(resource, clean_keys)) if (datetime.now() - start_time).seconds > self.dirty_read_timeout: # Took too long! Something must have crashed raise SpdbError('{} second timeout reached while waiting for dirty cubes to be flushed.'.format(self.dirty_read_timeout), ErrorCodes.ASYNC_ERROR) # Sleep a bit so you don't kill the DB time.sleep(0.05) if access_mode != "cache" and access_mode != "no_cache" and access_mode != "raw": raise SpdbError('The access_mode "{}" specified is not valid'.format(access_mode), ErrorCodes.SPDB_ERROR) # # At this point, have all cuboids whether or not the cache was used. # # Add all cuboids (which have all time samples packed in already) to final cube of data for cube in cache_cuboids + s3_cuboids + zero_cuboids: # Compute offset so data inserted properly curxyz = ndlib.MortonXYZ(cube.morton_id) offset = [curxyz[0] - lowxyz[0], curxyz[1] - lowxyz[1], curxyz[2] - lowxyz[2]] # add it to the output cube out_cube.add_data(cube, offset) # A smaller cube was cutout due to off-base resolution query: up-sample and trim base_res = channel.base_resolution if not channel.is_image() and base_res > resolution and not resource.is_downsampled(): raise SpdbError('Not Implemented', 'Dynamic resolution up-sampling not yet implemented.', ErrorCodes.FUTURE) # TODO: implement dynamic re-sampling # out_cube.zoomData(base_res - resolution) # need to trim based on the cube cutout at new resolution # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.x_pixel_offset, # extent[0], # corner[1] % (y_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.y_pixel_offset, # extent[1], # corner[2] % z_cube_dim, # extent[2]) # A larger cube was cutout due to off-base resolution query: down-sample and trim elif not channel.is_image() and base_res < resolution and not resource.is_downsampled(): raise SpdbError('Not Implemented', 'Dynamic resolution down-sampling not yet implemented.', ErrorCodes.FUTURE) # out_cube.downScale(resolution - base_res) # # need to trim based on the cube cutout at new resolution # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))), # extent[0], # corner[1] % (y_cube_dim * (2 ** (base_res - resolution))), # extent[1], # corner[2] % z_cube_dim, # extent[2]) # Trim cube since cutout was not cuboid aligned elif extent[0] % x_cube_dim == 0 and \ extent[1] % y_cube_dim == 0 and \ extent[2] % z_cube_dim == 0 and \ corner[0] % x_cube_dim == 0 and \ corner[1] % y_cube_dim == 0 and \ corner[2] % z_cube_dim == 0: # Cube is already the correct dimensions pass else: out_cube.trim(corner[0] % x_cube_dim, extent[0], corner[1] % y_cube_dim, extent[1], corner[2] % z_cube_dim, extent[2]) # Filter out ids not in list. if filter_ids is not None: try: out_cube.data = ndlib.filter_ctype_OMP(out_cube.data, filter_ids) except ValueError as ve: raise SpdbError( 'filter_ids probably not convertible to numpy uint64 array: {}'.format(ve), ErrorCodes.DATATYPE_MISMATCH) from ve except: raise SpdbError('unknown error filtering cutout', ErrorCodes.SPDB_ERROR) return out_cube