def test_downsample_volume(self, fake_decompress, fake_s3, fake_s3_ind):
        """
        Just execute the majority of the code in downsample_volume() to catch
        typos and other errors that might show up at runtime.
        """
        fake_s3.get.return_value = None
        fake_decompress.return_value = np.random.randint(0,
                                                         256, (16, 512, 512),
                                                         dtype='uint64')

        args = dict(collection_id=1,
                    experiment_id=2,
                    channel_id=3,
                    annotation_channel=True,
                    data_type='uint64',
                    s3_bucket='testBucket.example.com',
                    s3_index='s3index.example.com',
                    resolution=0,
                    type='isotropic',
                    iso_resolution=4,
                    aws_region='us-east-1')
        target = XYZ(0, 0, 0)
        step = XYZ(2, 2, 2)
        dim = XYZ(512, 512, 16)
        use_iso_key = True

        dsv.downsample_volume(args, target, step, dim, use_iso_key)
Esempio n. 2
0
    def upload_data(self, args):
        """
        Fill the coord frame with random data.

        Args:
            args (dict): This should be the dict returned by get_downsample_args().
        """
        cuboid_size = CUBOIDSIZE[0]
        x_dim = cuboid_size[0]
        y_dim = cuboid_size[1]
        z_dim = cuboid_size[2]

        resource = BossResourceBasic()
        resource.from_dict(self.get_image_dict())
        resolution = 0
        ts = 0
        version = 0

        # DP HACK: uploading all cubes will take longer than the actual downsample
        #          just upload the first volume worth of cubes.
        #          The downsample volume lambda will only read these cubes when
        #          passed the 'test' argument.
        bucket = S3Bucket(self.bosslet_config.session, args['s3_bucket'])
        print('Uploading test data', end='', flush=True)
        for cube in xyz_range(XYZ(0,0,0), XYZ(2,2,2)):
            key = AWSObjectStore.generate_object_key(resource, resolution, ts, cube.morton)
            key += "&0" # Add the version number
            #print('morton: {}'.format(cube.morton))
            #print('key: {}'.format(key))
            #print("{} -> {} -> {}".format(cube, cube.morton, key))
            cube = Cube.create_cube(resource, [x_dim, y_dim, z_dim])
            cube.random()
            data = cube.to_blosc()
            bucket.put(key, data)
            print('.', end='', flush=True)
        print(' Done uploading.')
Esempio n. 3
0
def handler(args, context):
    def convert(args_, key):
        args_[key] = XYZ(*args_[key])

    convert(args, 'step')
    convert(args, 'dim')

    sqs = boto3.resource('sqs')
    cubes = sqs.Queue(args['cubes_arn'])
    msgs = cubes.receive_messages(MaxNumberOfMessages=args['bucket_size'],
                                  WaitTimeSeconds=5)

    for msg in msgs:
        downsample_volume(args['args'], XYZ(*json.loads(msg.body)),
                          args['step'], args['dim'], args['use_iso_flag'])
        msg.delete()
Esempio n. 4
0
 def convert(key):
     args[key] = XYZ(*args[key])
Esempio n. 5
0
def downsample_volume(args, target, step, dim, use_iso_key, index_annotations):
    """Downsample a volume into a single cube

    Download `step` cubes from S3, downsample them into a single cube, upload
    to S3 and update the S3 index for the new cube.

    Args:
        args {
            collection_id (int)
            experiment_id (int)
            channel_id (int)
            annotation_channel (bool)
            data_type (str) 'uint8' | 'uint16' | 'uint64'

            s3_bucket (URL)
            s3_index (URL)
            id_index (URL)

            resolution (int) The resolution to downsample. Creates resolution + 1

            type (str) 'isotropic' | 'anisotropic'
            iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both
        }

        target (XYZ) : Corner of volume to downsample
        step (XYZ) : Extent of the volume to downsample
        dim (XYZ) : Dimensions of a single cube
        use_iso_key (boolean) : If the BOSS keys should include an 'ISO=' flag
    """
    log.debug("Downsampling {}".format(target))
    # Hard coded values
    version = 0
    t = 0
    dim_t = 1

    iso = 'ISO' if use_iso_key else None

    # If anisotropic and resolution is when neariso is reached, the first
    # isotropic downsample needs to use the anisotropic data. Future isotropic
    # downsamples will use the previous isotropic data.
    parent_iso = None if args['resolution'] == args['iso_resolution'] else iso

    col_id = args['collection_id']
    exp_id = args['experiment_id']
    chan_id = args['channel_id']
    data_type = args['data_type']
    annotation_chan = args['annotation_channel']

    resolution = args['resolution']

    s3 = S3Bucket(args['s3_bucket'])
    s3_index = DynamoDBTable(args['s3_index'])
    id_index = DynamoDBTable(args['id_index'])

    # Download all of the cubes that will be downsamples
    volume = Buffer.zeros(dim * step, dtype=np_types[data_type], order='C')
    volume.dim = dim
    volume.cubes = step

    volume_empty = True  # abort if the volume doesn't exist in S3
    for offset in xyz_range(step):
        cube = target + offset
        try:
            obj_key = HashedKey(parent_iso,
                                col_id,
                                exp_id,
                                chan_id,
                                resolution,
                                t,
                                cube.morton,
                                version=version)
            data = s3.get(obj_key)
            data = blosc.decompress(data)

            # DP ???: Check to see if the buffer is all zeros?
            data = Buffer.frombuffer(data, dtype=np_types[data_type])
            data.resize(dim)

            #log.debug("Downloaded cube {}".format(cube))
            volume[offset * dim:(offset + 1) * dim] = data
            volume_empty = False
        except Exception as e:  # TODO: Create custom exception for S3 download
            #log.exception("Problem downloading cubes {}".format(cube))
            #log.debug("No cube at {}".format(cube))

            # Eat the error, we don't care if the cube doesn't exist
            # If the cube doesn't exist blank data will be used for downsampling
            # If all the cubes don't exist, then the downsample is finished
            pass

    if volume_empty:
        log.debug("Completely empty volume, not downsampling")
        return

    # Create downsampled cube
    new_dim = XYZ(*CUBOIDSIZE[resolution + 1])
    cube = Buffer.zeros(new_dim, dtype=np_types[data_type], order='C')
    cube.dim = new_dim
    cube.cubes = XYZ(1, 1, 1)

    downsample_cube(volume, cube, annotation_chan)

    target = target / step  # scale down the output

    # Save new cube in S3
    obj_key = HashedKey(iso,
                        col_id,
                        exp_id,
                        chan_id,
                        resolution + 1,
                        t,
                        target.morton,
                        version=version)
    compressed = blosc.compress(cube, typesize=(np.dtype(cube.dtype).itemsize))
    s3.put(obj_key, compressed)

    # Update indicies
    # Same key scheme, but without the version
    obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t,
                        target.morton)
    # Create S3 Index if it doesn't exist
    idx_key = S3IndexKey(obj_key, version)
    if not s3_index.exists(idx_key):
        ingest_job = 0  # Valid to be 0, as posting a cutout uses 0
        idx_key = S3IndexKey(
            obj_key, version, col_id,
            '{}&{}&{}&{}'.format(exp_id, chan_id, resolution + 1, ingest_job))
        s3_index.put(idx_key)

    if annotation_chan and index_annotations:
        ids = ndlib.unique(cube)

        # Convert IDs to strings and drop any IDs that equal zero
        ids = [str(id) for id in ids if id != 0]

        if len(ids) > 0:
            idx_key = S3IndexKey(obj_key, version)
            s3_index.update_ids(idx_key, ids)

            for id in ids:
                idx_key = HashedKey(iso, col_id, exp_id, chan_id,
                                    resolution + 1, id)
                chan_key = IdIndexKey(idx_key, version)
                id_index.update_id(chan_key, obj_key)
Esempio n. 6
0
 def convert(args_, key):
     args_[key] = XYZ(*args_[key])
Esempio n. 7
0
def downsample_volume(args, target, step, dim, use_iso_key):
    """Downsample a volume into a single cube

    Download `step` cubes from S3, downsample them into a single cube, upload
    to S3 and update the S3 index for the new cube.

    Args:
        args {
            collection_id (int)
            experiment_id (int)
            channel_id (int)
            annotation_channel (bool)
            data_type (str) 'uint8' | 'uint16' | 'uint64'

            s3_bucket (URL)
            s3_index (str)

            resolution (int) The resolution to downsample. Creates resolution + 1

            type (str) 'isotropic' | 'anisotropic'
            iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both

            aws_region (str) AWS region to run in such as us-east-1
        }

        target (XYZ) : Corner of volume to downsample
        step (XYZ) : Extent of the volume to downsample
        dim (XYZ) : Dimensions of a single cube
        use_iso_key (boolean) : If the BOSS keys should include an 'ISO=' flag
    """
    log.debug("Downsampling {}".format(target))
    # Hard coded values
    version = 0
    t = 0
    dim_t = 1

    iso = 'ISO' if use_iso_key else None

    # If anisotropic and resolution is when neariso is reached, the first
    # isotropic downsample needs to use the anisotropic data. Future isotropic
    # downsamples will use the previous isotropic data.
    parent_iso = None if args['resolution'] == args['iso_resolution'] else iso

    col_id = args['collection_id']
    exp_id = args['experiment_id']
    chan_id = args['channel_id']
    data_type = args['data_type']
    annotation_chan = args['annotation_channel']

    resolution = args['resolution']

    s3 = S3Bucket(args['s3_bucket'])
    s3_index = S3DynamoDBTable(args['s3_index'], args['aws_region'])

    # Download all of the cubes that will be downsamples
    volume = Buffer.zeros(dim * step, dtype=np_types[data_type], order='C')
    volume.dim = dim
    volume.cubes = step

    volume_empty = True  # abort if the volume doesn't exist in S3
    for offset in xyz_range(step):
        if args.get('test'):
            # Enable Test Mode
            # This is where the cubes downsamples are all taken from 0/0/0
            # so that the entire frame doesn't have to be populated to test
            # the code paths that downsample cubes
            cube = offset  # use target 0/0/0
        else:
            cube = target + offset

        obj_key = HashedKey(parent_iso,
                            col_id,
                            exp_id,
                            chan_id,
                            resolution,
                            t,
                            cube.morton,
                            version=version)
        data = s3.get(obj_key)
        if data:
            data = blosc.decompress(data)

            # DP ???: Check to see if the buffer is all zeros?
            data = Buffer.frombuffer(data, dtype=np_types[data_type])
            data.resize(dim)

            #log.debug("Downloaded cube {}".format(cube))
            volume[offset * dim:(offset + 1) * dim] = data
            volume_empty = False

    if volume_empty:
        log.debug("Completely empty volume, not downsampling")
        return

    # Create downsampled cube
    new_dim = XYZ(*CUBOIDSIZE[resolution + 1])
    cube = Buffer.zeros(new_dim, dtype=np_types[data_type], order='C')
    cube.dim = new_dim
    cube.cubes = XYZ(1, 1, 1)

    downsample_cube(volume, cube, annotation_chan)

    target = target / step  # scale down the output

    # Save new cube in S3
    obj_key = HashedKey(iso,
                        col_id,
                        exp_id,
                        chan_id,
                        resolution + 1,
                        t,
                        target.morton,
                        version=version)
    compressed = blosc.compress(cube, typesize=(np.dtype(cube.dtype).itemsize))
    s3.put(obj_key, compressed)

    # Update indicies
    # Same key scheme, but without the version
    obj_key = HashedKey(iso, col_id, exp_id, chan_id, resolution + 1, t,
                        target.morton)
    # Create S3 Index if it doesn't exist
    idx_key = S3IndexKey(obj_key, version)
    if not s3_index.exists(idx_key):
        ingest_job = 0  # Valid to be 0, as posting a cutout uses 0
        idx_key = S3IndexKey(
            obj_key,
            version,
            col_id,
            '{}&{}&{}&{}'.format(exp_id, chan_id, resolution + 1, ingest_job),
            # Replaced call to SPDB AWSObjectStore.generate_lookup_key, as SPDB master doesn't contain this call
            # AWSObjectStore.generate_lookup_key(col_id, exp_id, chan_id, resolution + 1)
            '{}&{}&{}&{}&{}'.format(col_id, exp_id, chan_id, resolution + 1,
                                    randrange(LOOKUP_KEY_MAX_N)))
        s3_index.put(idx_key)
Esempio n. 8
0
 def frame(key):
     return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')])
Esempio n. 9
0
def downsample_channel(args):
    """
    Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then
    sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at
    resolution + 1.

    Makes use of the bossutils.multidimensional library for simplified vector
    math.

    Args:
        args {
            downsample_volume_sfn (ARN)

            collection_id (int)
            experiment_id (int)
            channel_id (int)
            annotation_channel (bool)
            data_type (str) 'uint8' | 'uint16' | 'uint64'

            s3_bucket (URL)
            s3_index (URL)
            id_index (URL)

            x_start (int)
            y_start (int)
            z_start (int)

            x_stop (int)
            y_stop (int)
            z_stop (int)

            resolution (int) The resolution to downsample. Creates resolution + 1
            resolution_max (int) The maximum resolution to generate
            res_lt_max (bool) = args['resolution'] < (args['resolution_max'] - 1)

            annotation_index_max (int) The maximum resolution to index annotation channel cubes at
                                       When annotation_index_max = N, indices will exist for res 0 - (N - 1)

            type (str) 'isotropic' | 'anisotropic'
            iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both
        }
    """

    #log.debug("Downsampling resolution " + str(args['resolution']))

    resolution = args['resolution']

    dim = XYZ(*CUBOIDSIZE[resolution])
    #log.debug("Cube dimensions: {}".format(dim))

    def frame(key):
        return XYZ(args[key.format('x')], args[key.format('y')], args[key.format('z')])

    # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic
    # downsampling. If both are happening, fanout one and then the other in series.
    configs = []
    if args['type'] == 'isotropic':
        configs.append({
            'name': 'isotropic',
            'step': XYZ(2,2,2),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })
    else:
        configs.append({
            'name': 'anisotropic',
            'step': XYZ(2,2,1),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })

        if resolution >= args['iso_resolution']: # DP TODO: Figure out how to launch aniso iso version with mutating arguments
            configs.append({
                'name': 'isotropic',
                'step': XYZ(2,2,2),
                'iso_flag': True,
                'frame_start_key': 'iso_{}_start',
                'frame_stop_key': 'iso_{}_stop',
            })

    for config in configs:
        frame_start = frame(config['frame_start_key'])
        frame_stop = frame(config['frame_stop_key'])
        step = config['step']
        use_iso_flag = config['iso_flag'] # If the resulting cube should be marked with the ISO flag
        index_annotations = args['resolution'] < (args['annotation_index_max'] - 1)

        # Round to the furthest full cube from the center of the data
        cubes_start = frame_start // dim
        cubes_stop = ceildiv(frame_stop, dim)

        log.debug('Downsampling {} resolution {}'.format(config['name'], resolution))
        log.debug("Frame corner: {}".format(frame_start))
        log.debug("Frame extent: {}".format(frame_stop))
        log.debug("Cubes corner: {}".format(cubes_start))
        log.debug("Cubes extent: {}".format(cubes_stop))
        log.debug("Downsample step: {}".format(step))
        log.debug("Indexing Annotations: {}".format(index_annotations))

        # Call the downsample_volume lambda to process the data
        fanout(aws.get_session(),
               args['downsample_volume_sfn'],
               make_args(args, cubes_start, cubes_stop, step, dim, use_iso_flag, index_annotations),
               max_concurrent = MAX_NUM_PROCESSES,
               rampup_delay = RAMPUP_DELAY,
               rampup_backoff = RAMPUP_BACKOFF,
               poll_delay = POLL_DELAY,
               status_delay = STATUS_DELAY)

        # Resize the coordinate frame extents as the data shrinks
        # DP NOTE: doesn't currently work correctly with non-zero frame starts
        def resize(var, size):
            start = config['frame_start_key'].format(var)
            stop = config['frame_stop_key'].format(var)
            args[start] //= size
            args[stop] = ceildiv(args[stop], size)
        resize('x', step.x)
        resize('y', step.y)
        resize('z', step.z)

    # if next iteration will split into aniso and iso downsampling, copy the coordinate frame
    if args['type'] != 'isotropic' and (resolution + 1) == args['iso_resolution']:
        def copy(var):
            args['iso_{}_start'.format(var)] = args['{}_start'.format(var)]
            args['iso_{}_stop'.format(var)] = args['{}_stop'.format(var)]
        copy('x')
        copy('y')
        copy('z')

    # Advance the loop and recalculate the conditional
    # Using max - 1 because resolution_max should not be a valid resolution
    # and res < res_max will end with res = res_max - 1, which generates res_max resolution
    args['resolution'] = resolution + 1
    args['res_lt_max'] = args['resolution'] < (args['resolution_max'] - 1)
    return args
def downsample_channel(args):
    """
    Slice the given channel into chunks of 2x2x2 or 2x2x1 cubes that are then
    sent to the downsample_volume lambda for downsampling into a 1x1x1 cube at
    resolution + 1.

    Makes use of the bossutils.multidimensional library for simplified vector
    math.

    Generators are used as much as possible (instead of lists) so that large
    lists of data are not actualized and kept in memory.

    Args:
        args {
            msg { (this holds the contents of the msg from the downsample queue)
                downsample_volume_lambda (ARN | lambda name)

                collection_id (int)
                experiment_id (int)
                channel_id (int)
                annotation_channel (bool)
                data_type (str) 'uint8' | 'uint16' | 'uint64'

                s3_bucket (URL)
                s3_index (URL)

                x_start (int)
                y_start (int)
                z_start (int)

                x_stop (int)
                y_stop (int)
                z_stop (int)

                resolution (int) The resolution to downsample. Creates resolution + 1
                resolution_max (int) The maximum resolution to generate
                res_lt_max (bool) = args['msg']['resolution'] < (args['msg']['resolution_max'] - 1)

                type (str) 'isotropic' | 'anisotropic'
                iso_resolution (int) if resolution >= iso_resolution && type == 'anisotropic' downsample both

                aws_region (str) AWS region to run in such as us-east-1
            }
            job_receipt_handle (str) Used by downstream state to delete the downsample job from queue
            queue_url (str) URL of downsample queue; downstream state deletes from this queue
            sfn_arn (str) <arn of the downsample step fcn>
            db_host (str) Host of MySQL database.
        }

    Returns:
        (dict): An updated argument dictionary containing the shrunk frame,
                resolution, res_lt_max values, and lookup_key
    """

    # TODO: load downsample_volume_lambda from boss config

    #log.debug("Downsampling resolution " + str(args['msg']['resolution']))

    resolution = args['msg']['resolution']

    dim = XYZ(*CUBOIDSIZE[resolution])

    #log.debug("Cube dimensions: {}".format(dim))

    def frame(key):
        return XYZ(args['msg'][key.format('x')], args['msg'][key.format('y')],
                   args['msg'][key.format('z')])

    # Figure out variables for isotropic, anisotropic, or isotropic and anisotropic
    # downsampling. If both are happening, fanout one and then the other in series.
    configs = []
    if args['msg']['type'] == 'isotropic':
        configs.append({
            'name': 'isotropic',
            'step': XYZ(2, 2, 2),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })
    else:
        configs.append({
            'name': 'anisotropic',
            'step': XYZ(2, 2, 1),
            'iso_flag': False,
            'frame_start_key': '{}_start',
            'frame_stop_key': '{}_stop',
        })

        # if this iteration will split into aniso and iso downsampling, copy the coordinate frame
        if resolution == args['msg']['iso_resolution']:

            def copy(var):
                args['msg']['iso_{}_start'.format(var)] = args['msg'][
                    '{}_start'.format(var)]
                args['msg']['iso_{}_stop'.format(var)] = args['msg'][
                    '{}_stop'.format(var)]

            copy('x')
            copy('y')
            copy('z')

        if resolution >= args['msg'][
                'iso_resolution']:  # DP TODO: Figure out how to launch aniso iso version with mutating arguments
            configs.append({
                'name': 'isotropic',
                'step': XYZ(2, 2, 2),
                'iso_flag': True,
                'frame_start_key': 'iso_{}_start',
                'frame_stop_key': 'iso_{}_stop',
            })

    for config in configs:
        # Different ID and queue for each resolution, as it takes 60 seconds to delete a queue
        # Different ID and queue for each iso/aniso downsample incase a a DLQ message is received
        #     for the previous config
        downsample_id = str(
            random.random())[2:]  # remove the '0.' part of the number
        dlq_arn = create_queue('downsample-dlq-' + downsample_id)
        cubes_arn = create_queue('downsample-cubes-' + downsample_id)

        try:
            frame_start = frame(config['frame_start_key'])
            frame_stop = frame(config['frame_stop_key'])
            step = config['step']
            use_iso_flag = config[
                'iso_flag']  # If the resulting cube should be marked with the ISO flag

            # Round to the furthest full cube from the center of the data
            cubes_start = frame_start // dim
            cubes_stop = ceildiv(frame_stop, dim)

            # For a non-zero start, make sure start cube aligns with a zero start downsample
            # so that the data aligns and there are no shifts with the new downsampled data
            mod = cubes_start % step
            if mod.x != 0:
                cubes_start = XYZ(cubes_start.x - 1, cubes_start.y,
                                  cubes_start.z)
            if mod.y != 0:
                cubes_start = XYZ(cubes_start.x, cubes_start.y - 1,
                                  cubes_start.z)
            if mod.z != 0:
                cubes_start = XYZ(cubes_start.x, cubes_start.y,
                                  cubes_start.z - 1)

            log.debug('Downsampling {} resolution {}'.format(
                config['name'], resolution))
            log.debug("Frame corner: {}".format(frame_start))
            log.debug("Frame extent: {}".format(frame_stop))
            log.debug("Cubes corner: {}".format(cubes_start))
            log.debug("Cubes extent: {}".format(cubes_stop))
            log.debug("Downsample step: {}".format(step))

            log.debug("Populating input cube")
            cube_count = populate_cubes(cubes_arn, cubes_start, cubes_stop,
                                        step)

            log.debug("Invoking downsample lambdas")
            lambda_count = ceildiv(cube_count, BUCKET_SIZE) + EXTRA_LAMBDAS
            lambda_args = {
                'bucket_size': BUCKET_SIZE,
                'args': args['msg'],
                'step': step,
                'dim': dim,
                'use_iso_flag': use_iso_flag,
                'dlq_arn': dlq_arn,
                'cubes_arn': cubes_arn,
            }

            launch_lambdas(lambda_count,
                           args['msg']['downsample_volume_lambda'],
                           json.dumps(lambda_args).encode('UTF8'), dlq_arn,
                           cubes_arn, args['queue_url'],
                           args['job_receipt_handle'])

            # Resize the coordinate frame extents as the data shrinks
            # DP NOTE: doesn't currently work correctly with non-zero frame starts
            def resize(var, size):
                start = config['frame_start_key'].format(var)
                stop = config['frame_stop_key'].format(var)
                args['msg'][start] //= size
                args['msg'][stop] = ceildiv(args['msg'][stop], size)

            resize('x', step.x)
            resize('y', step.y)
            resize('z', step.z)
        finally:
            delete_queue(dlq_arn)
            delete_queue(cubes_arn)

    # Advance the loop and recalculate the conditional
    # Using max - 1 because resolution_max should not be a valid resolution
    # and res < res_max will end with res = res_max - 1, which generates res_max resolution
    args['msg']['resolution'] = resolution + 1
    args['msg']['res_lt_max'] = args['msg']['resolution'] < (
        args['msg']['resolution_max'] - 1)

    # Move this up one level for use by states that follow.
    args['lookup_key'] = args['msg']['lookup_key']
    return args