def test_downsample_volume(self, fake_decompress, fake_s3, fake_s3_ind): """ Just execute the majority of the code in downsample_volume() to catch typos and other errors that might show up at runtime. """ fake_s3.get.return_value = None fake_decompress.return_value = np.random.randint(0, 256, (16, 512, 512), dtype='uint64') args = dict(collection_id=1, experiment_id=2, channel_id=3, annotation_channel=True, data_type='uint64', s3_bucket='testBucket.example.com', s3_index='s3index.example.com', resolution=0, type='isotropic', iso_resolution=4, aws_region='us-east-1') target = XYZ(0, 0, 0) step = XYZ(2, 2, 2) dim = XYZ(512, 512, 16) use_iso_key = True dsv.downsample_volume(args, target, step, dim, use_iso_key)
def upload_data(self, args): """ Fill the coord frame with random data. Args: args (dict): This should be the dict returned by get_downsample_args(). """ cuboid_size = CUBOIDSIZE[0] x_dim = cuboid_size[0] y_dim = cuboid_size[1] z_dim = cuboid_size[2] resource = BossResourceBasic() resource.from_dict(self.get_image_dict()) resolution = 0 ts = 0 version = 0 # DP HACK: uploading all cubes will take longer than the actual downsample # just upload the first volume worth of cubes. # The downsample volume lambda will only read these cubes when # passed the 'test' argument. bucket = S3Bucket(self.bosslet_config.session, args['s3_bucket']) print('Uploading test data', end='', flush=True) for cube in xyz_range(XYZ(0,0,0), XYZ(2,2,2)): key = AWSObjectStore.generate_object_key(resource, resolution, ts, cube.morton) key += "&0" # Add the version number #print('morton: {}'.format(cube.morton)) #print('key: {}'.format(key)) #print("{} -> {} -> {}".format(cube, cube.morton, key)) cube = Cube.create_cube(resource, [x_dim, y_dim, z_dim]) cube.random() data = cube.to_blosc() bucket.put(key, data) print('.', end='', flush=True) print(' Done uploading.')
def handler(args, context): def convert(args_, key): args_[key] = XYZ(*args_[key]) convert(args, 'step') convert(args, 'dim') sqs = boto3.resource('sqs') cubes = sqs.Queue(args['cubes_arn']) msgs = cubes.receive_messages(MaxNumberOfMessages=args['bucket_size'], WaitTimeSeconds=5) for msg in msgs: downsample_volume(args['args'], XYZ(*json.loads(msg.body)), args['step'], args['dim'], args['use_iso_flag']) msg.delete()
def convert(key): args[key] = XYZ(*args[key])
def downsample_volume(args, target, step, dim, use_iso_key, index_annotations): """Downsample a volume into a single cube Download `step` cubes from S3, downsample them into a single cube, upload to S3 and update the S3 index for the new cube. Args: args { collection_id (int) experiment_id (int) channel_id (int) annotation_channel (bool) data_type (str) 'uint8' | 'uint16' | 'uint64' s3_bucket (URL) s3_index (URL) id_index (URL) resolution (int) The resolution to downsample. Creates resolution + 1 type (str) 'isotropic' | 'anisotropic' iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both } target (XYZ) : Corner of volume to downsample step (XYZ) : Extent of the volume to downsample dim (XYZ) : Dimensions of a single cube use_iso_key (boolean) : If the BOSS keys should include an 'ISO=' flag """ log.debug("Downsampling {}".format(target)) # Hard coded values version = 0 t = 0 dim_t = 1 iso = 'ISO' if use_iso_key else None # If anisotropic and resolution is when neariso is reached, the first # isotropic downsample needs to use the anisotropic data. Future isotropic # downsamples will use the previous isotropic data. parent_iso = None if args['resolution'] == args['iso_resolution'] else iso col_id = args['collection_id'] exp_id = args['experiment_id'] chan_id = args['channel_id'] data_type = args['data_type'] annotation_chan = args['annotation_channel'] resolution = args['resolution'] s3 = S3Bucket(args['s3_bucket']) s3_index = DynamoDBTable(args['s3_index']) id_index = DynamoDBTable(args['id_index']) # Download all of the cubes that will be downsamples volume = Buffer.zeros(dim * step, dtype=np_types[data_type], order='C') volume.dim = dim volume.cubes = step volume_empty = True # abort if the volume doesn't exist in S3 for offset in xyz_range(step): cube = target + offset try: obj_key = HashedKey(parent_iso, col_id, exp_id, chan_id, resolution, t, cube.morton, version=version) data = s3.get(obj_key) data = blosc.decompress(data) # DP ???: Check to see if the buffer is all zeros? data = Buffer.frombuffer(data, dtype=np_types[data_type]) data.resize(dim) #log.debug("Downloaded cube {}".format(cube)) volume[offset * dim:(offset + 1) * dim] = data volume_empty = False except Exception as e: # TODO: Create custom exception for S3 download #log.exception("Problem downloading cubes {}".format(cube)) #log.debug("No cube at {}".format(cube)) # Eat the error, we don't care if the cube doesn't exist # If the cube doesn't exist blank data will be used for downsampling # If all the cubes don't exist, then the downsample is finished pass if volume_empty: log.debug("Completely empty volume, not downsampling") return # Create downsampled cube new_dim = XYZ(*CUBOIDSIZE[resolution + 1]) cube = Buffer.zeros(new_dim, dtype=np_types[data_type], order='C') cube.dim = new_dim cube.cubes = XYZ(1, 1, 1) downsample_cube(volume, cube, annotation_chan) target = target / step # scale down the output # Save new cube in S3 obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t, target.morton, version=version) compressed = blosc.compress(cube, typesize=(np.dtype(cube.dtype).itemsize)) s3.put(obj_key, compressed) # Update indicies # Same key scheme, but without the version obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t, target.morton) # Create S3 Index if it doesn't exist idx_key = S3IndexKey(obj_key, version) if not s3_index.exists(idx_key): ingest_job = 0 # Valid to be 0, as posting a cutout uses 0 idx_key = S3IndexKey( obj_key, version, col_id, '{}&{}&{}&{}'.format(exp_id, chan_id, resolution + 1, ingest_job)) s3_index.put(idx_key) if annotation_chan and index_annotations: ids = ndlib.unique(cube) # Convert IDs to strings and drop any IDs that equal zero ids = [str(id) for id in ids if id != 0] if len(ids) > 0: idx_key = S3IndexKey(obj_key, version) s3_index.update_ids(idx_key, ids) for id in ids: idx_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, id) chan_key = IdIndexKey(idx_key, version) id_index.update_id(chan_key, obj_key)
def convert(args_, key): args_[key] = XYZ(*args_[key])
def downsample_volume(args, target, step, dim, use_iso_key): """Downsample a volume into a single cube Download `step` cubes from S3, downsample them into a single cube, upload to S3 and update the S3 index for the new cube. Args: args { collection_id (int) experiment_id (int) channel_id (int) annotation_channel (bool) data_type (str) 'uint8' | 'uint16' | 'uint64' s3_bucket (URL) s3_index (str) resolution (int) The resolution to downsample. Creates resolution + 1 type (str) 'isotropic' | 'anisotropic' iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both aws_region (str) AWS region to run in such as us-east-1 } target (XYZ) : Corner of volume to downsample step (XYZ) : Extent of the volume to downsample dim (XYZ) : Dimensions of a single cube use_iso_key (boolean) : If the BOSS keys should include an 'ISO=' flag """ log.debug("Downsampling {}".format(target)) # Hard coded values version = 0 t = 0 dim_t = 1 iso = 'ISO' if use_iso_key else None # If anisotropic and resolution is when neariso is reached, the first # isotropic downsample needs to use the anisotropic data. Future isotropic # downsamples will use the previous isotropic data. parent_iso = None if args['resolution'] == args['iso_resolution'] else iso col_id = args['collection_id'] exp_id = args['experiment_id'] chan_id = args['channel_id'] data_type = args['data_type'] annotation_chan = args['annotation_channel'] resolution = args['resolution'] s3 = S3Bucket(args['s3_bucket']) s3_index = S3DynamoDBTable(args['s3_index'], args['aws_region']) # Download all of the cubes that will be downsamples volume = Buffer.zeros(dim * step, dtype=np_types[data_type], order='C') volume.dim = dim volume.cubes = step volume_empty = True # abort if the volume doesn't exist in S3 for offset in xyz_range(step): if args.get('test'): # Enable Test Mode # This is where the cubes downsamples are all taken from 0/0/0 # so that the entire frame doesn't have to be populated to test # the code paths that downsample cubes cube = offset # use target 0/0/0 else: cube = target + offset obj_key = HashedKey(parent_iso, col_id, exp_id, chan_id, resolution, t, cube.morton, version=version) data = s3.get(obj_key) if data: data = blosc.decompress(data) # DP ???: Check to see if the buffer is all zeros? data = Buffer.frombuffer(data, dtype=np_types[data_type]) data.resize(dim) #log.debug("Downloaded cube {}".format(cube)) volume[offset * dim:(offset + 1) * dim] = data volume_empty = False if volume_empty: log.debug("Completely empty volume, not downsampling") return # Create downsampled cube new_dim = XYZ(*CUBOIDSIZE[resolution + 1]) cube = Buffer.zeros(new_dim, dtype=np_types[data_type], order='C') cube.dim = new_dim cube.cubes = XYZ(1, 1, 1) downsample_cube(volume, cube, annotation_chan) target = target / step # scale down the output # Save new cube in S3 obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t, target.morton, version=version) compressed = blosc.compress(cube, typesize=(np.dtype(cube.dtype).itemsize)) s3.put(obj_key, compressed) # Update indicies # Same key scheme, but without the version obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t, target.morton) # Create S3 Index if it doesn't exist idx_key = S3IndexKey(obj_key, version) if not s3_index.exists(idx_key): ingest_job = 0 # Valid to be 0, as posting a cutout uses 0 idx_key = S3IndexKey( obj_key, version, col_id, '{}&{}&{}&{}'.format(exp_id, chan_id, resolution + 1, ingest_job), # Replaced call to SPDB AWSObjectStore.generate_lookup_key, as SPDB master doesn't contain this call # AWSObjectStore.generate_lookup_key(col_id, exp_id, chan_id, resolution + 1) '{}&{}&{}&{}&{}'.format(col_id, exp_id, chan_id, resolution + 1, randrange(LOOKUP_KEY_MAX_N))) s3_index.put(idx_key)
def frame(key): return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')])
def downsample_channel(args): """ Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at resolution + 1. Makes use of the bossutils.multidimensional library for simplified vector math. Args: args { downsample_volume_sfn (ARN) collection_id (int) experiment_id (int) channel_id (int) annotation_channel (bool) data_type (str) 'uint8' | 'uint16' | 'uint64' s3_bucket (URL) s3_index (URL) id_index (URL) x_start (int) y_start (int) z_start (int) x_stop (int) y_stop (int) z_stop (int) resolution (int) The resolution to downsample. Creates resolution + 1 resolution_max (int) The maximum resolution to generate res_lt_max (bool) = args['resolution'] < (args['resolution_max'] - 1) annotation_index_max (int) The maximum resolution to index annotation channel cubes at When annotation_index_max = N, indices will exist for res 0 - (N - 1) type (str) 'isotropic' | 'anisotropic' iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both } """ #log.debug("Downsampling resolution " + str(args['resolution'])) resolution = args['resolution'] dim = XYZ(*CUBOIDSIZE[resolution]) #log.debug("Cube dimensions: {}".format(dim)) def frame(key): return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')]) # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic # downsampling. If both are happening, fanout one and then the other in series. configs = [] if args['type'] == 'isotropic': configs.append({ 'name': 'isotropic', 'step': XYZ(2,2,2), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) else: configs.append({ 'name': 'anisotropic', 'step': XYZ(2,2,1), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) if resolution >= args['iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments configs.append({ 'name': 'isotropic', 'step': XYZ(2,2,2), 'iso_flag': True, 'frame_start_key': 'iso_{}_start', 'frame_stop_key': 'iso_{}_stop', }) for config in configs: frame_start = frame(config['frame_start_key']) frame_stop = frame(config['frame_stop_key']) step = config['step'] use_iso_flag = config['iso_flag'] # If the resulting cube should be marked with the ISO flag index_annotations = args['resolution'] < (args['annotation_index_max'] - 1) # Round to the furthest full cube from the center of the data cubes_start = frame_start // dim cubes_stop = ceildiv(frame_stop, dim) log.debug('Downsampling {} resolution {}'.format(config['name'], resolution)) log.debug("Frame corner: {}".format(frame_start)) log.debug("Frame extent: {}".format(frame_stop)) log.debug("Cubes corner: {}".format(cubes_start)) log.debug("Cubes extent: {}".format(cubes_stop)) log.debug("Downsample step: {}".format(step)) log.debug("Indexing Annotations: {}".format(index_annotations)) # Call the downsample_volume lambda to process the data fanout(aws.get_session(), args['downsample_volume_sfn'], make_args(args, cubes_start, cubes_stop, step, dim, use_iso_flag, index_annotations), max_concurrent = MAX_NUM_PROCESSES, rampup_delay = RAMPUP_DELAY, rampup_backoff = RAMPUP_BACKOFF, poll_delay = POLL_DELAY, status_delay = STATUS_DELAY) # Resize the coordinate frame extents as the data shrinks # DP NOTE: doesn't currently work correctly with non-zero frame starts def resize(var, size): start = config['frame_start_key'].format(var) stop = config['frame_stop_key'].format(var) args[start] //= size args[stop] = ceildiv(args[stop], size) resize('x', step.x) resize('y', step.y) resize('z', step.z) # if next iteration will split into aniso and iso downsampling, copy the coordinate frame if args['type'] != 'isotropic' and (resolution + 1) == args['iso_resolution']: def copy(var): args['iso_{}_start'.format(var)] = args['{}_start'.format(var)] args['iso_{}_stop'.format(var)] = args['{}_stop'.format(var)] copy('x') copy('y') copy('z') # Advance the loop and recalculate the conditional # Using max - 1 because resolution_max should not be a valid resolution # and res < res_max will end with res = res_max - 1, which generates res_max resolution args['resolution'] = resolution + 1 args['res_lt_max'] = args['resolution'] < (args['resolution_max'] - 1) return args
def downsample_channel(args): """ Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at resolution + 1. Makes use of the bossutils.multidimensional library for simplified vector math. Generators are used as much as possible (instead of lists) so that large lists of data are not actualized and kept in memory. Args: args { msg { (this holds the contents of the msg from the downsample queue) downsample_volume_lambda (ARN | lambda name) collection_id (int) experiment_id (int) channel_id (int) annotation_channel (bool) data_type (str) 'uint8' | 'uint16' | 'uint64' s3_bucket (URL) s3_index (URL) x_start (int) y_start (int) z_start (int) x_stop (int) y_stop (int) z_stop (int) resolution (int) The resolution to downsample. Creates resolution + 1 resolution_max (int) The maximum resolution to generate res_lt_max (bool) = args['msg']['resolution'] < (args['msg']['resolution_max'] - 1) type (str) 'isotropic' | 'anisotropic' iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both aws_region (str) AWS region to run in such as us-east-1 } job_receipt_handle (str) Used by downstream state to delete the downsample job from queue queue_url (str) URL of downsample queue; downstream state deletes from this queue sfn_arn (str) <arn of the downsample step fcn> db_host (str) Host of MySQL database. } Returns: (dict): An updated argument dictionary containing the shrunk frame, resolution, res_lt_max values, and lookup_key """ # TODO: load downsample_volume_lambda from boss config #log.debug("Downsampling resolution " + str(args['msg']['resolution'])) resolution = args['msg']['resolution'] dim = XYZ(*CUBOIDSIZE[resolution]) #log.debug("Cube dimensions: {}".format(dim)) def frame(key): return XYZ(args['msg'][key.format('x')], args['msg'][key.format('y')], args['msg'][key.format('z')]) # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic # downsampling. If both are happening, fanout one and then the other in series. configs = [] if args['msg']['type'] == 'isotropic': configs.append({ 'name': 'isotropic', 'step': XYZ(2, 2, 2), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) else: configs.append({ 'name': 'anisotropic', 'step': XYZ(2, 2, 1), 'iso_flag': False, 'frame_start_key': '{}_start', 'frame_stop_key': '{}_stop', }) # if this iteration will split into aniso and iso downsampling, copy the coordinate frame if resolution == args['msg']['iso_resolution']: def copy(var): args['msg']['iso_{}_start'.format(var)] = args['msg'][ '{}_start'.format(var)] args['msg']['iso_{}_stop'.format(var)] = args['msg'][ '{}_stop'.format(var)] copy('x') copy('y') copy('z') if resolution >= args['msg'][ 'iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments configs.append({ 'name': 'isotropic', 'step': XYZ(2, 2, 2), 'iso_flag': True, 'frame_start_key': 'iso_{}_start', 'frame_stop_key': 'iso_{}_stop', }) for config in configs: # Different ID and queue for each resolution, as it takes 60 seconds to delete a queue # Different ID and queue for each iso/aniso downsample incase a a DLQ message is received # for the previous config downsample_id = str( random.random())[2:] # remove the '0.' part of the number dlq_arn = create_queue('downsample-dlq-' + downsample_id) cubes_arn = create_queue('downsample-cubes-' + downsample_id) try: frame_start = frame(config['frame_start_key']) frame_stop = frame(config['frame_stop_key']) step = config['step'] use_iso_flag = config[ 'iso_flag'] # If the resulting cube should be marked with the ISO flag # Round to the furthest full cube from the center of the data cubes_start = frame_start // dim cubes_stop = ceildiv(frame_stop, dim) # For a non-zero start, make sure start cube aligns with a zero start downsample # so that the data aligns and there are no shifts with the new downsampled data mod = cubes_start % step if mod.x != 0: cubes_start = XYZ(cubes_start.x - 1, cubes_start.y, cubes_start.z) if mod.y != 0: cubes_start = XYZ(cubes_start.x, cubes_start.y - 1, cubes_start.z) if mod.z != 0: cubes_start = XYZ(cubes_start.x, cubes_start.y, cubes_start.z - 1) log.debug('Downsampling {} resolution {}'.format( config['name'], resolution)) log.debug("Frame corner: {}".format(frame_start)) log.debug("Frame extent: {}".format(frame_stop)) log.debug("Cubes corner: {}".format(cubes_start)) log.debug("Cubes extent: {}".format(cubes_stop)) log.debug("Downsample step: {}".format(step)) log.debug("Populating input cube") cube_count = populate_cubes(cubes_arn, cubes_start, cubes_stop, step) log.debug("Invoking downsample lambdas") lambda_count = ceildiv(cube_count, BUCKET_SIZE) + EXTRA_LAMBDAS lambda_args = { 'bucket_size': BUCKET_SIZE, 'args': args['msg'], 'step': step, 'dim': dim, 'use_iso_flag': use_iso_flag, 'dlq_arn': dlq_arn, 'cubes_arn': cubes_arn, } launch_lambdas(lambda_count, args['msg']['downsample_volume_lambda'], json.dumps(lambda_args).encode('UTF8'), dlq_arn, cubes_arn, args['queue_url'], args['job_receipt_handle']) # Resize the coordinate frame extents as the data shrinks # DP NOTE: doesn't currently work correctly with non-zero frame starts def resize(var, size): start = config['frame_start_key'].format(var) stop = config['frame_stop_key'].format(var) args['msg'][start] //= size args['msg'][stop] = ceildiv(args['msg'][stop], size) resize('x', step.x) resize('y', step.y) resize('z', step.z) finally: delete_queue(dlq_arn) delete_queue(cubes_arn) # Advance the loop and recalculate the conditional # Using max - 1 because resolution_max should not be a valid resolution # and res < res_max will end with res = res_max - 1, which generates res_max resolution args['msg']['resolution'] = resolution + 1 args['msg']['res_lt_max'] = args['msg']['resolution'] < ( args['msg']['resolution_max'] - 1) # Move this up one level for use by states that follow. args['lookup_key'] = args['msg']['lookup_key'] return args