예제 #1
    def compute_prefetch_keys(self, missed_key):
        """From the missed key, determine what to prefetch.

            missed_key (string): Cached-cuboid key.

            (list): List of cache-cuboid keys to fetch.
        key_parts = missed_key.rsplit('&', 1)
        morton_id = key_parts[1]
        coords = ndlib.MortonXYZ(int(morton_id))
        z = coords[2]
        coords_above = coords.copy()
        coords_above[2] = z + 1
        mortonid_above = ndlib.XYZMorton(coords_above)
        key_above = '{}&{}'.format(key_parts[0], mortonid_above)

        if z - 1 < 0:
            return [key_above]

        coords_below = coords.copy()
        coords_below[2] = z - 1
        mortonid_below = ndlib.XYZMorton(coords_below)
        key_below = '{}&{}'.format(key_parts[0], mortonid_below)

        return [key_above, key_below]
예제 #2
    def cutout(self,
        """Extract a cube of arbitrary size. Need not be aligned to cuboid boundaries.

        corner represents the location of the cutout and extent the size.  As an example in 1D, if asking for
        a corner of 3 and extent of 2, this would be the values at 3 and 4.

        Provide a list of ids to filter the cutout contents if desired.  The list must be convertible to a numpy array
        via numpy.asarray().

            resource (spdb.project.BossResource): Data model info based on the request or target resource
            corner ((int, int, int)): the xyz location of the corner of the cutout
            extent ((int, int, int)): the xyz extents
            resolution (int): the resolution level
            time_sample_range (list((int)):  a range of time samples to get [start, stop). Default is [0,1) if omitted
            filter_ids (optional[list]): Defaults to None. Otherwise, is a list of uint64 ids to filter cutout by.
            iso (bool): Flag indicating if you want to get to the "isotropic" version of a cuboid, if available
            no_cache (bool): True to read directly from S3 and bypass the cache.

            cube.Cube: The cutout data stored in a Cube instance

        boss_logger = BossLogger()
        blog = boss_logger.logger

        if not time_sample_range:
            # If not time sample list defined, used default of 0
            time_sample_range = [0, 1]

        # if cutout is below resolution, get a smaller cube and scaleup
        # ONLY FOR ANNO CHANNELS - if data is missing on the current resolution but exists elsewhere...extrapolate
        # resource.get_channel().base_resolution is the "base" resolution and you assume data exists there.
        # If downsampled you don't have to worry about this.
        # currently we don't upsample annotations when hardening the database, so don't need to check for propagated.

        # Create namedtuple for consistency with re-sampling paths through the code
        result_tuple = namedtuple(
            ['corner', 'extent', 'x_pixel_offset', 'y_pixel_offset'])

        # Check if you need to scale a cutout due to off-base resolution cutout and the downsample state
        channel = resource.get_channel()
        if not channel.is_image():
            # The channel is an annotation so we can dynamically re-sample
            base_res = channel.base_resolution

            if base_res > resolution and not resource.is_downsampled():
                # Desired cutout is below base res in hierarchy (higher res image). Must up-sample cutout dynamically
                # Find the effective dimensions of the up-sampled cutout
                raise SpdbError(
                    'Not Implemented',
                    'Dynamic resolution up-sampling not yet implemented.',

                # cutout_coords = self._up_sample_cutout(resource, corner, extent, resolution)

                # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res]
                # cutout_resolution = base_res

            elif not channel.is_image(
            ) and base_res < resolution and not resource.is_downsampled():
                # Currently, let's not support this. We can cutout a smaller cube and up-sample for the user, but do not
                # want to deal with cutting out large regions and down-sampling
                raise SpdbError(
                    'Not Implemented',
                    'Dynamic resolution down-sampling not yet implemented.',
                # If cutout is an annotation channel, above base resolution (lower res), and NOT propagated, down-sample
                # cutout_coords = self._down_sample_cutout(resource, corner, extent, resolution)

                # [x_cube_dim, y_cube_dim, z_cube_dim] = cube_dim = CUBOIDSIZE[base_res]
                # cutout_resolution = base_res
                # this is the default path when not DYNAMICALLY scaling the resolution

                # get the size of the image and cube
                [x_cube_dim, y_cube_dim,
                 z_cube_dim] = cube_dim = CUBOIDSIZE[resolution]
                cutout_resolution = resolution

                # Create namedtuple for consistency with re-sampling paths through the code
                cutout_coords = result_tuple(corner, extent, None, None)
            # Resource is an image channel, so no re-sampling
            # get the size of the image and cube
            [x_cube_dim, y_cube_dim,
             z_cube_dim] = cube_dim = CUBOIDSIZE[resolution]
            cutout_resolution = resolution

            # Create namedtuple for consistency with re-sampling paths through the code
            cutout_coords = result_tuple(corner, extent, None, None)

        # Round to the nearest larger cube in all dimensions
        z_start = cutout_coords.corner[2] // z_cube_dim
        y_start = cutout_coords.corner[1] // y_cube_dim
        x_start = cutout_coords.corner[0] // x_cube_dim

        z_num_cubes = (cutout_coords.corner[2] + cutout_coords.extent[2] +
                       z_cube_dim - 1) // z_cube_dim - z_start
        y_num_cubes = (cutout_coords.corner[1] + cutout_coords.extent[1] +
                       y_cube_dim - 1) // y_cube_dim - y_start
        x_num_cubes = (cutout_coords.corner[0] + cutout_coords.extent[0] +
                       x_cube_dim - 1) // x_cube_dim - x_start

        # Initialize the final output cube (before trim operation since adding full cuboids)
        out_cube = Cube.create_cube(resource, [
            x_num_cubes * x_cube_dim, y_num_cubes * y_cube_dim,
            z_num_cubes * z_cube_dim
        ], time_sample_range)

        # Build a list of indexes to access
        # TODO: Move this for loop directly into c-lib
        list_of_idxs = []
        for z in range(z_num_cubes):
            for y in range(y_num_cubes):
                for x in range(x_num_cubes):
                    morton_idx = ndlib.XYZMorton(
                        [x + x_start, y + y_start, z + z_start])

        # Sort the indexes in Morton order

        # xyz offset stored for later use
        lowxyz = ndlib.MortonXYZ(list_of_idxs[0])

        # Get index of missing keys for cuboids to read
        missing_key_idx, cached_key_idx, all_keys = self.kvio.get_missing_read_cache_keys(
        # Wait for cuboids that are currently being written to finish
        start_time = datetime.now()
        dirty_keys = all_keys
            "Waiting for {} writes to finish before read can complete".format(
        while dirty_keys:
            dirty_flags = self.kvio.is_dirty(dirty_keys)
            dirty_keys_temp, clean_keys = [], []
            for key, flag in zip(dirty_keys, dirty_flags):
                (dirty_keys_temp if flag else clean_keys).append(key)
            dirty_keys = dirty_keys_temp

            if (datetime.now() - start_time).seconds > self.dirty_read_timeout:
                # Took too long! Something must have crashed
                raise SpdbError(
                    '{} second timeout reached while waiting for dirty cubes to be flushed.'
                    .format(self.dirty_read_timeout), ErrorCodes.ASYNC_ERROR)
            # Sleep a bit so you don't kill the DB

        # All dirty cubes flushed, can begin reading.

        s3_key_idx = []
        cache_cuboids = []
        s3_cuboids = []
        zero_cuboids = []

        if no_cache:
            # If not using the cache, then consider all keys are missing.
            blog.debug("Bypassing cache; loading all cuboids directly from S3")
            missing_key_idx = [i for i in range(len(all_keys))]

        if len(missing_key_idx) > 0:
            # There are keys that are missing in the cache
            # Get index of missing keys that are in S3
            s3_key_idx, zero_key_idx = self.objectio.cuboids_exist(
                all_keys, missing_key_idx)

            if len(s3_key_idx) > 0:
                if no_cache:
                    temp_keys = self.objectio.cached_cuboid_to_object_keys(

                    # Get objects
                    temp_cubes = self.objectio.get_objects(temp_keys)
                    # keys will be just the morton id and time sample.
                    keys_and_cubes = []
                    for key, cube in zip(temp_keys, temp_cubes):
                        vals = key.split("&")
                            (int(vals[-1]), int(vals[-2]), cube))
                    s3_cuboids = self.sort_cubes(resource, keys_and_cubes)
                    # Load data into cache.
                    blog.debug("Data missing from cache, but present in S3")

                    if len(s3_key_idx) > self.read_lambda_threshold:
                        # Trigger page-in of available blocks from object store and wait for completion
                        blog.debug("Triggering Lambda Page-in")
                        # Read cuboids from S3 into cache directly
                        # Convert cuboid-cache keys to object keys
                        blog.debug("Paging-in Keys Directly")
                        temp_keys = self.objectio.cached_cuboid_to_object_keys(

                        # Get objects
                        temp_cubes = self.objectio.get_objects(temp_keys)

                        # write to cache
                        blog.debug("put keys on direct page in: {}".format(
                            itemgetter(*s3_key_idx)(all_keys), temp_cubes)

            if len(zero_key_idx) > 0:
                if not no_cache:
                    blog.debug("Data missing in cache, but not in S3")
                        "No data for some keys, making cuboids with zeros")

                # Keys that don't exist in object store render as zeros
                [x_cube_dim, y_cube_dim, z_cube_dim] = CUBOIDSIZE[resolution]
                for idx in zero_key_idx:
                    parts, m_id = all_keys[idx].rsplit("&", 1)
                    _, t_start = parts.rsplit("&", 1)
                    temp_cube = Cube.create_cube(
                        resource, [x_cube_dim, y_cube_dim, z_cube_dim],
                        [int(t_start), int(t_start) + 1])
                    temp_cube.morton_id = int(m_id)

        # Get cubes from the cache database (either already there or freshly paged in)
        if not no_cache:
            # TODO: Optimize access to cache data and checking for dirty cubes
            if len(s3_key_idx) > 0:
                blog.debug("Get cubes from cache that were paged in from S3")

                s3_cuboids = self.get_cubes(resource,

                # Record misses that were found in S3 for possible pre-fetching

            # Get previously cached cubes, waiting for dirty cubes to be updated if needed
            if len(cached_key_idx) > 0:
                blog.debug("Get cubes that were already present in the cache")

                # Get the cached keys once in list form
                cached_keys_list = itemgetter(*cached_key_idx)(all_keys)
                if isinstance(cached_keys_list, str):
                    cached_keys_list = [cached_keys_list]
                if isinstance(cached_keys_list, tuple):
                    cached_keys_list = list(cached_keys_list)

                # Split clean and dirty keys
                dirty_flags = self.kvio.is_dirty(cached_keys_list)
                dirty_keys, clean_keys = [], []
                for key, flag in zip(cached_keys_list, dirty_flags):
                    (dirty_keys if flag else clean_keys).append(key)

                # Get all the clean cubes immediately, removing them from the list of cached keys to get
                for k in clean_keys:
                cache_cuboids.extend(self.get_cubes(resource, clean_keys))

                # Get the dirty ones when you can with a timeout
                start_time = datetime.now()
                while dirty_keys:
                    dirty_flags = self.kvio.is_dirty(cached_keys_list)
                    dirty_keys, clean_keys = [], []
                    for key, flag in zip(cached_keys_list, dirty_flags):
                        (dirty_keys if flag else clean_keys).append(key)

                    if clean_keys:
                        # Some keys are ready now. Remove from list and get them
                        for k in clean_keys:
                            self.get_cubes(resource, clean_keys))

                    if (datetime.now() -
                            start_time).seconds > self.dirty_read_timeout:
                        # Took too long! Something must have crashed
                        raise SpdbError(
                            '{} second timeout reached while waiting for dirty cubes to be flushed.'

                    # Sleep a bit so you don't kill the DB

        # At this point, have all cuboids whether or not the cache was used.

        # Add all cuboids (which have all time samples packed in already) to final cube of data
        for cube in cache_cuboids + s3_cuboids + zero_cuboids:
            # Compute offset so data inserted properly
            curxyz = ndlib.MortonXYZ(cube.morton_id)
            offset = [
                curxyz[0] - lowxyz[0], curxyz[1] - lowxyz[1],
                curxyz[2] - lowxyz[2]

            # add it to the output cube
            out_cube.add_data(cube, offset)

        # A smaller cube was cutout due to off-base resolution query: up-sample and trim
        base_res = channel.base_resolution
        if not channel.is_image(
        ) and base_res > resolution and not resource.is_downsampled():
            raise SpdbError(
                'Not Implemented',
                'Dynamic resolution up-sampling not yet implemented.',
            # TODO: implement dynamic re-sampling
            # out_cube.zoomData(base_res - resolution)

            # need to trim based on the cube cutout at new resolution
            # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.x_pixel_offset,
            #               extent[0],
            #               corner[1] % (y_cube_dim * (2 ** (base_res - resolution))) + cutout_coords.y_pixel_offset,
            #               extent[1],
            #               corner[2] % z_cube_dim,
            #               extent[2])

        # A larger cube was cutout due to off-base resolution query: down-sample and trim
        elif not channel.is_image(
        ) and base_res < resolution and not resource.is_downsampled():
            raise SpdbError(
                'Not Implemented',
                'Dynamic resolution down-sampling not yet implemented.',
            # out_cube.downScale(resolution - base_res)
            # # need to trim based on the cube cutout at new resolution
            # out_cube.trim(corner[0] % (x_cube_dim * (2 ** (base_res - resolution))),
            #               extent[0],
            #               corner[1] % (y_cube_dim * (2 ** (base_res - resolution))),
            #               extent[1],
            #               corner[2] % z_cube_dim,
            #               extent[2])

        # Trim cube since cutout was not cuboid aligned
        elif extent[0] % x_cube_dim == 0 and \
             extent[1] % y_cube_dim == 0 and \
             extent[2] % z_cube_dim == 0 and \
             corner[0] % x_cube_dim == 0 and \
             corner[1] % y_cube_dim == 0 and \
             corner[2] % z_cube_dim == 0:
            # Cube is already the correct dimensions
            out_cube.trim(corner[0] % x_cube_dim, extent[0],
                          corner[1] % y_cube_dim, extent[1],
                          corner[2] % z_cube_dim, extent[2])

        # Filter out ids not in list.
        if filter_ids is not None:
                out_cube.data = ndlib.filter_ctype_OMP(out_cube.data,
            except ValueError as ve:
                raise SpdbError(
                    'filter_ids probably not convertible to numpy uint64 array: {}'
                    .format(ve), ErrorCodes.DATATYPE_MISMATCH) from ve
                raise SpdbError('unknown error filtering cutout',

        return out_cube
예제 #3
 def from_morton(cls, morton):
     x, y, z = ndlib.MortonXYZ(morton)
     return cls(x=x, y=y, z=z)
예제 #4
#!/usr/bin/env python3.4
# This lambda tests that it can read from user-data, and access the cache_state_db
# then import spdb and access the compiled c_lib
# {
#   "lambda-name": "test",
# }
print("in test_lambda")
import bossutils
import spdb
from spdb.spatialdb import state
print("finished part1 imports")

print("checking c_lib")
from spdb.c_lib import ndlib
print("finished c_lib imports.")
id = ndlib.MortonXYZ(10)
for w in id:
print("finished part2")