def test_filtered_cutout(self):
        time_axis = [1]
        cube_dim = [self.x_dim, self.y_dim, self.z_dim]
        cube_dim_tuple = (self.x_dim, self.y_dim, self.z_dim)
        cube1 = Cube.create_cube(self.resource, cube_dim)
        cube1.data = np.ones(time_axis + [cube_dim[2], cube_dim[1], cube_dim[0]], 
            dtype='uint64')
        cube1.morton_id = 0
        corner = (0, 0, 0)

        expected = np.zeros(time_axis + [cube_dim[2], cube_dim[1], cube_dim[0]], 
            dtype='uint64')

        # Will filter by these ids.
        id1 = 55555
        id2 = 66666
        cube1.data[0][0][40][0] = id1
        cube1.data[0][0][50][0] = id2
        expected[0][0][40][0] = id1
        expected[0][0][50][0] = id2

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)
        resolution = 0
        sp.write_cuboid(self.resource, corner, resolution, cube1.data, time_sample_start=0)

        # Make sure cube written correctly.
        actual_cube = sp.cutout(self.resource, corner, cube_dim_tuple, resolution)
        np.testing.assert_array_equal(cube1.data, actual_cube.data)

        # Method under test.
        actual_filtered = sp.cutout(self.resource, corner, cube_dim_tuple, resolution, 
            filter_ids=[id1, id2])

        np.testing.assert_array_equal(expected, actual_filtered.data)
    def test_page_in_multi_cuboids_y_dir(self):
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim * 2, self.z_dim])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(self.resource, (0, 0, 0), 0, cube1.data)

        cube2 = sp.cutout(self.resource, (0, 0, 0), (self.x_dim, self.y_dim * 2, self.z_dim), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)

        # Make sure data is the same
        np.testing.assert_array_equal(cube1.data, cube2.data)

        # Delete everything in the cache
        sp.kvio.cache_client.flushdb()

        # Force use of lambda function.
        sp.read_lambda_threshold = 0

        # Get the data again, which should trigger lambda page in.
        cube3 = sp.cutout(self.resource, (0, 0, 0), (self.x_dim, self.y_dim * 2, self.z_dim), 0)

        # Make sure the data is the same
        np.testing.assert_array_equal(cube1.data, cube3.data)
    def test_sqs_watcher_send_message(self):
        """Inject message into queue and test that SqsWatcher kicks off a lambda and writes cuboid to s3."""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [512, 512, 16])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        base_write_cuboid_key = "WRITE-CUBOID&{}&{}".format(self.resource.get_lookup_key(), 0)
        morton_idx = ndlib.XYZMorton([0, 0, 0])
        t = 0
        write_cuboid_key = sp.kvio.insert_cube_in_write_buffer(base_write_cuboid_key, t, morton_idx,
                                                               cube1.to_blosc_by_time_index(t))

        # Put page out job on the queue
        sqs = boto3.client('sqs', region_name=get_region())

        msg_data = {"config": self.config_data,
                    "write_cuboid_key": write_cuboid_key,
                    "lambda-name": "s3_flush",
                    "resource": self.resource.to_dict()}

        response = sqs.send_message(QueueUrl=self.object_store_config["s3_flush_queue"],
                                    MessageBody=json.dumps(msg_data))
        assert response['ResponseMetadata']['HTTPStatusCode'] == 200

        watcher = SqsWatcher(self.lambda_data)
        #  verify_queue() needs the be run multiple times to verify that the queue is not changing
        #  only then does it send off a lambda message.
        time.sleep(5)
        watcher.verify_queue()
        time.sleep(5)
        lambdas_invoked = watcher.verify_queue()
        if lambdas_invoked < 1:
            time.sleep(5)
            watcher.verify_queue()
        time.sleep(15)

        client = boto3.client('sqs', region_name=get_region())
        response = client.get_queue_attributes(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            AttributeNames=[
                'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible'
            ]
        )
        https_status_code = response['ResponseMetadata']['HTTPStatusCode']
        queue_count = int(response['Attributes']['ApproximateNumberOfMessages'])
        # test that the queue count is now 0
        assert queue_count == 0

        s3 = boto3.client('s3', region_name=get_region())
        objects_list = s3.list_objects(Bucket=self.object_store_config['cuboid_bucket'])
        # tests that bucket has some Contents.
        assert "Contents" in objects_list.keys()
Exemple #4
0
    def test_factory(self):
        """Test the Cube factory in Cube"""

        data = get_image_dict()
        resource = BossResourceBasic(data)

        c = Cube.create_cube(resource, [30, 20, 13], [0, 15])
        assert isinstance(c, ImageCube8) is True
        assert c.cube_size == [13, 20, 30]
        assert c.is_time_series is True
        assert c.time_range == [0, 15]
Exemple #5
0
    def test_factory(self):
        """Test the Cube factory in Cube"""

        data = get_image_dict()
        resource = BossResourceBasic(data)

        c = Cube.create_cube(resource, [30, 20, 13], [0, 15])
        assert isinstance(c, ImageCube8) is True
        assert c.cube_size == [13, 20, 30]
        assert c.is_time_series is True
        assert c.time_range == [0, 15]
Exemple #6
0
    def test_get_cubes_time_multiple(self, fake_get_region):
        """Test the get_cubes method - time - multiple"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim], [0, 4])
        cube1.random()
        cube1.morton_id = 32
        cube2 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim], [0, 4])
        cube2.random()
        cube2.morton_id = 33

        db = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        # populate dummy data
        keys = self.write_test_cube(db, self.resource, 0, cube1, cache=True, s3=False)
        keys.extend(self.write_test_cube(db, self.resource, 0, cube2, cache=True, s3=False))

        cube_read = db.get_cubes(self.resource, keys)

        np.testing.assert_array_equal(cube1.data, cube_read[0].data)
        np.testing.assert_array_equal(cube2.data, cube_read[1].data)
    def test_get_tight_bounding_box_single_cuboid(self):
        """
        Get the tight bounding box for an object that exists within a single cuboid.
        """
        resolution = 0
        [x_cube_dim, y_cube_dim, z_cube_dim] = CUBOIDSIZE[resolution]

        id = 33333
        id_as_str = '33333'
        # Customize resource with so it writes to its own channel and uses a
        # coord frame large enough to encompass the data written.  This is
        # important for proper loose bounding box calculations.
        data = get_anno_dict(boss_key='col1&exp1&ch50', lookup_key='1&1&50')
        data['coord_frame']['x_stop'] = 10000
        data['coord_frame']['y_stop'] = 10000
        data['coord_frame']['z_stop'] = 10000
        resource = BossResourceBasic(data)
        time_sample = 0
        version = 0
        x_rng = [0, x_cube_dim]
        y_rng = [0, y_cube_dim]
        z_rng = [0, z_cube_dim]
        t_rng = [0, 1]

        cube_dim_tuple = (self.x_dim, self.y_dim, self.z_dim)
        cube1 = Cube.create_cube(resource, [self.x_dim, self.y_dim, self.z_dim])
        cube1.zeros()
        cube1.data[0][14][500][104] = id
        cube1.data[0][15][501][105] = id
        cube1.data[0][15][502][104] = id
        cube1.data[0][14][503][105] = id

        pos1 = [10*self.x_dim, 15*self.y_dim, 2*self.z_dim]
        cube1.morton_id = XYZMorton(pos1)

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)
        sp.write_cuboid(resource, pos1, resolution, cube1.data, time_sample_start=0)

        # Make sure cube write complete and correct.
        actual_cube = sp.cutout(resource, pos1, cube_dim_tuple, resolution)
        np.testing.assert_array_equal(cube1.data, actual_cube.data)

        # Method under test.
        actual = sp.get_bounding_box(resource, resolution, id_as_str, bb_type='tight')

        expected = {
            'x_range': [pos1[0]+104, pos1[0]+106],
            'y_range': [pos1[1]+500, pos1[1]+504],
            'z_range': [pos1[2]+14, pos1[2]+16],
            't_range': t_rng
        }

        self.assertEqual(expected, actual)
Exemple #8
0
    def test_factory_no_time(self):
        """Test the Cube factory in Cube"""

        data = get_image_dict()
        data['channel']['datatype'] = 'uint16'
        resource = BossResourceBasic(data)

        c = Cube.create_cube(resource, [30, 20, 13])
        assert isinstance(c, ImageCube16) is True
        assert c.cube_size == [13, 20, 30]
        assert c.is_time_series is False
        assert c.time_range == [0, 1]
Exemple #9
0
    def test_factory_no_time(self):
        """Test the Cube factory in Cube"""

        data = get_image_dict()
        data['channel']['datatype'] = 'uint16'
        resource = BossResourceBasic(data)

        c = Cube.create_cube(resource, [30, 20, 13])
        assert isinstance(c, ImageCube16) is True
        assert c.cube_size == [13, 20, 30]
        assert c.is_time_series is False
        assert c.time_range == [0, 1]
Exemple #10
0
    def test_factory_no_time(self):
        """Test the Cube factory in Cube"""

        data = get_anno_dict()

        resource = BossResourceBasic(data)

        c = Cube.create_cube(resource, [30, 20, 13])
        assert isinstance(c, AnnotateCube64) is True
        assert c.cube_size == [13, 20, 30]
        assert c.is_time_series is False
        assert c.time_range == [0, 1]
Exemple #11
0
    def test_write_cuboid_off_base_res(self, fake_get_region):
        """Test writing a cuboid to not the base resolution"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube1.random()
        cube1.morton_id = 0

        db = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        # populate dummy data
        with self.assertRaises(SpdbError):
            db.write_cuboid(self.resource, (0, 0, 0), 5, cube1.data, time_sample_start=0)
    def test_get_ids_in_region_multiple_partial_cubes(self):
        """
        Region cuboid aligned in x, but doesn't span full cuboids in the y 
        and z.
        """
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube1.zeros()
        cube1.data[0][0][40][0] = 55555
        cube1.data[0][0][50][0] = 66666
        pos1 = [4*self.x_dim, 4*self.y_dim, 2*self.z_dim]
        cube1.morton_id = XYZMorton(pos1)

        cube2 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube2.zeros()
        cube2.data[0][0][40][0] = 55555
        cube2.data[0][0][50][0] = 77777
        pos2 = [5*self.x_dim, 4*self.y_dim, 2*self.z_dim]
        cube2.morton_id = XYZMorton(pos2)

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        resolution = 0
        sp.write_cuboid(self.resource, pos1, resolution, cube1.data, time_sample_start=0)
        sp.write_cuboid(self.resource, pos2, resolution, cube2.data, time_sample_start=0)

        # Not verifying writes here because get_ids_in_region() should be doing
        # cutouts due to the region not containing full cuboids.

        corner = (4*self.x_dim, 4*self.y_dim, 2*self.z_dim)
        extent = (2*self.x_dim, 60, 10)
        t_range = [0, 1]
        version = 0
        expected = ['55555', '66666', '77777']

        # Method under test.
        actual = sp.get_ids_in_region(
            self.resource, resolution, corner, extent, t_range, version)

        self.assertIn('ids', actual)
        self.assertCountEqual(expected, actual['ids'])
Exemple #13
0
    def test_add_to_prefetch(self):
        cuboid_dims = CUBOIDSIZE[0]
        # Cuboid dimensions.
        x_dim = cuboid_dims[0]
        y_dim = cuboid_dims[1]
        z_dim = cuboid_dims[2]

        cube_above = Cube.create_cube(self.resource, [x_dim, y_dim, z_dim])
        cube_above.random()

        # Write cuboid that are stacked vertically.
        self.sp.write_cuboid(self.resource, (0, 0, z_dim * 2), 0,
                             cube_above.data)

        cube_above.morton_id = ndlib.XYZMorton([0, 0, z_dim * 2 // z_dim])

        cube_above_cache_key = self.sp.kvio.generate_cached_cuboid_keys(
            self.resource, 0, [0], [cube_above.morton_id])

        # Make sure cuboid saved.
        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim * 2),
                                  (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_above.data, cube_act.data)

        # Clear cache so we can test prefetch.
        self.sp.kvio.cache_client.flushdb()

        # Also clear cache state before running test.
        self.sp.cache_state.status_client.flushdb()

        obj_keys = self.sp.objectio.cached_cuboid_to_object_keys(
            cube_above_cache_key)

        # Place a cuboid in the pretch queue.
        self.sp.cache_state.status_client.rpush('PRE-FETCH', obj_keys[0])

        # This is the system under test.
        self.prefetch.process()

        # Wait for cube to be prefetched.
        i = 0
        while not self.sp.kvio.cube_exists(cube_above_cache_key[0]) and i < 30:
            time.sleep(1)
            i += 1

        # Confirm cuboid now in cache.
        self.assertTrue(self.sp.kvio.cube_exists(cube_above_cache_key[0]))

        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim * 2),
                                  (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_above.data, cube_act.data)
    def test_cutout_time_offset_single_aligned_hit(self):
        """Test the get_cubes method - w/ time - single - hit"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim], time_range=[0, 3])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(self.resource, (0, 0, 0), 0, cube1.data, time_sample_start=6)

        cube2 = sp.cutout(self.resource, (0, 0, 0), (self.x_dim, self.y_dim, self.z_dim), 0, time_sample_range=[6, 9])

        np.testing.assert_array_equal(cube1.data, cube2.data)
    def test_cutout_no_time_single_unaligned_hit(self):
        """Test the get_cubes method - no time - single - unaligned - hit"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(self.resource, (600, 0, 0), 0, cube1.data)

        cube2 = sp.cutout(self.resource, (600, 0, 0), (self.x_dim, self.y_dim, 16), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)
    def test_add_to_prefetch(self):
        cuboid_dims = CUBOIDSIZE[0]
        # Cuboid dimensions.
        x_dim = cuboid_dims[0]
        y_dim = cuboid_dims[1]
        z_dim = cuboid_dims[2]

        cube_above = Cube.create_cube(self.resource, [x_dim, y_dim, z_dim])
        cube_above.random()

        # Write cuboid that are stacked vertically.
        self.sp.write_cuboid(self.resource, (0, 0, z_dim * 2), 0, cube_above.data)

        cube_above.morton_id = ndlib.XYZMorton([0, 0, z_dim * 2 // z_dim])

        cube_above_cache_key = self.sp.kvio.generate_cached_cuboid_keys(
            self.resource, 0, [0], [cube_above.morton_id])

        # Make sure cuboid saved.
        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim * 2), (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_above.data, cube_act.data)

        # Clear cache so we can test prefetch.
        self.sp.kvio.cache_client.flushdb()

        # Also clear cache state before running test.
        self.sp.cache_state.status_client.flushdb()

        obj_keys = self.sp.objectio.cached_cuboid_to_object_keys(
            cube_above_cache_key)

        # Place a cuboid in the pretch queue.
        self.sp.cache_state.status_client.rpush('PRE-FETCH', obj_keys[0])

        # This is the system under test.
        self.prefetch.process()

        # Wait for cube to be prefetched.
        i = 0
        while not self.sp.kvio.cube_exists(cube_above_cache_key[0]) and i < 30:
            time.sleep(1)
            i += 1

        # Confirm cuboid now in cache.
        self.assertTrue(self.sp.kvio.cube_exists(cube_above_cache_key[0]))

        cube_act = self.sp.cutout(
            self.resource, (0, 0, z_dim * 2), (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_above.data, cube_act.data)
Exemple #17
0
    def test_cutout_no_time_single_aligned_miss(self, fake_get_region):
        """Test the get_cubes method - no time - single"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube1.random()
        cube1.morton_id = 0

        db = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        # populate dummy data
        self.write_test_cube(db, self.resource, 0, cube1, cache=False, s3=True)

        cube2 = db.cutout(self.resource, (0, 0, 0), (self.x_dim, self.y_dim, self.z_dim), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)
    def test_cutout_iso_below_fork(self):
        """Test write_cuboid and cutout methods with iso option, testing iso is equal below the res fork"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [400, 400, 8])
        cube1.random()
        cube1.morton_id = 0

        cubez = Cube.create_cube(self.resource, [400, 400, 8])
        cubez.zeros()
        cubez.morton_id = 0

        # Write at 5, not iso, and verify
        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(self.resource, (200, 600, 3), 0, cube1.data, iso=False)

        cube2 = sp.cutout(self.resource, (200, 600, 3), (400, 400, 8), 0, iso=False)

        np.testing.assert_array_equal(cube1.data, cube2.data)

        # Get at res 5 iso, which should be equal to non-iso call
        cube2 = sp.cutout(self.resource, (200, 600, 3), (400, 400, 8), 0, iso=True)

        np.testing.assert_array_equal(cube1.data, cube2.data)
Exemple #19
0
    def test_get_cubes_no_time_single(self, fake_get_region):
        """Test the get_cubes method - no time - single"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube1.random()
        cube1.morton_id = 32

        db = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        # populate dummy data
        keys = self.write_test_cube(db, self.resource, 0, cube1, cache=True, s3=False)

        cube2 = db.get_cubes(self.resource, keys)

        np.testing.assert_array_equal(cube1.data, cube2[0].data)
    def test_cutout_no_time_single_aligned_existing_hit(self):
        """Test the get_cubes method - no time - aligned - existing data - miss"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube1.random()

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(self.resource, (0, 0, 0), 0, cube1.data)

        cube2 = sp.cutout(self.resource, (0, 0, 0), (self.x_dim, self.y_dim, self.z_dim), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)
        del cube1
        del cube2

        # now write to cuboid again
        cube3 = Cube.create_cube(self.resource, [self.x_dim, self.y_dim, self.z_dim])
        cube3.random()

        sp.write_cuboid(self.resource, (0, 0, 0), 0, cube3.data)

        cube4 = sp.cutout(self.resource, (0, 0, 0), (self.x_dim, self.y_dim, self.z_dim), 0)
        np.testing.assert_array_equal(cube3.data, cube4.data)
    def test_filtered_cutout_bad_id_list(self):
        time_axis = [1]
        cube_dim = [self.x_dim, self.y_dim, self.z_dim]
        cube_dim_tuple = (self.x_dim, self.y_dim, self.z_dim)
        cube1 = Cube.create_cube(self.resource, cube_dim)
        cube1.data = np.ones(time_axis + [cube_dim[2], cube_dim[1], cube_dim[0]], dtype='uint64')
        cube1.morton_id = 0
        corner = (6*self.x_dim, 6*self.y_dim, 2*self.z_dim)

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)
        resolution = 0
        sp.write_cuboid(self.resource, corner, resolution, cube1.data, 
            time_sample_start=0)

        # Method under test.
        with self.assertRaises(SpdbError):
            sp.cutout(self.resource, corner, cube_dim_tuple, resolution, 
                filter_ids=['foo', 55555])
    def test_cutout_no_time_multi_unaligned_hit(self):
        """Test the get_cubes method - no time - multi - unaligned - hit"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [400, 400, 8])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(self.resource, (200, 600, 3), 0, cube1.data)

        cube2 = sp.cutout(self.resource, (200, 600, 3), (400, 400, 8), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)

        # do it again...shoudl be in cache
        cube2 = sp.cutout(self.resource, (200, 600, 3), (400, 400, 8), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)
    def test_cutout_no_time_multi_unaligned_hit_iso_below(self):
        """Test write_cuboid and cutout methods - no time - multi - unaligned - hit - isotropic, below iso fork"""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [400, 400, 8])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(self.resource, (200, 600, 3), 0, cube1.data, iso=True)

        cube2 = sp.cutout(self.resource, (200, 600, 3), (400, 400, 8), 0, iso=True)

        np.testing.assert_array_equal(cube1.data, cube2.data)

        # do it again...should be in cache
        cube2 = sp.cutout(self.resource, (200, 600, 3), (400, 400, 8), 0, iso=True)

        np.testing.assert_array_equal(cube1.data, cube2.data)
    def test_set_write_locked(self):
        # Cuboid dimensions.
        xy_dim = 128
        z_dim = 16

        lookup_key = self.data['lookup_key']
        # Make sure this key isn't currently locked.
        self.dead_letter._sp.cache_state.set_project_lock(lookup_key, False)
        self.assertFalse(
            self.dead_letter._sp.cache_state.project_locked(lookup_key))

        cube1 = Cube.create_cube(self.resource, [xy_dim, xy_dim, z_dim])
        cube1.data = np.random.randint(1, 254, (1, z_dim, xy_dim, xy_dim))

        # Ensure that the we are not in a page-out state by wiping the entire
        # cache state.
        self.dead_letter._sp.cache_state.status_client.flushdb()
        self.dead_letter._sp.write_cuboid(self.resource, (0, 0, 0), 0,
                                          cube1.data)

        try:
            with patch.object(
                    self.dead_letter,
                    'send_alert',
                    wraps=self.dead_letter.send_alert) as send_alert_spy:

                # Method under test.  Returns True if it found a message.
                i = 0
                while not self.dead_letter.check_queue() and i < 30:
                    time.sleep(1)
                    i += 1

                self.assertTrue(
                    self.dead_letter._sp.cache_state.project_locked(
                        lookup_key))

                # Ensure method that publishes to SNS topic called.
                send_alert_spy.assert_called_with(lookup_key, ANY)

        finally:
            # Make sure write lock is unset before terminating.
            self.dead_letter._sp.cache_state.set_project_lock(
                lookup_key, False)
    def test_delayed_write_daemon_simple(self):
        """Test handling delayed writes"""
        sp = SpatialDB(self.kvio_config,
                       self.state_config,
                       self.object_store_config)
        dwd = DelayedWriteDaemon("boss-delayedwrited-test.pid")

        # Create some a single delayed write
        cube1 = Cube.create_cube(self.resource, [512, 512, 16])
        cube1.random()
        cube1.morton_id = 0
        res = 0
        time_sample = 0

        write_cuboid_base = "WRITE-CUBOID&{}&{}".format(self.resource.get_lookup_key(), 0)

        write_cuboid_key = sp.kvio.insert_cube_in_write_buffer(write_cuboid_base, res, cube1.morton_id,
                                                               cube1.to_blosc_by_time_index(time_sample))

        sp.cache_state.add_to_delayed_write(write_cuboid_key,
                                            self.resource.get_lookup_key(),
                                            res,
                                            cube1.morton_id,
                                            time_sample,
                                            self.resource.to_json())

        # Use Daemon To handle writes
        dwd.process(sp)
        time.sleep(30)

        # Make sure they went through
        cube2 = sp.cutout(self.resource, (0, 0, 0), (512, 512, 16), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)

        # Make sure delay key got deleted
        keys = sp.cache_state.get_all_delayed_write_keys()
        assert not keys
    def test_cutout_no_time_multi_unaligned_hit_iso_above(self):
        """Test write_cuboid and cutout methods - no time - multi - unaligned - hit - isotropic, above iso fork"""
        data = self.data
        data["channel"]["base_resolution"] = 5
        resource = BossResourceBasic(data)

        # Generate random data
        cube1 = Cube.create_cube(resource, [400, 400, 8])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config, self.object_store_config)

        sp.write_cuboid(resource, (200, 600, 3), 5, cube1.data, iso=True)

        cube2 = sp.cutout(resource, (200, 600, 3), (400, 400, 8), 5, iso=True)

        np.testing.assert_array_equal(cube1.data, cube2.data)

        # do it again...should be in cache
        cube2 = sp.cutout(resource, (200, 600, 3), (400, 400, 8), 5, iso=True)

        np.testing.assert_array_equal(cube1.data, cube2.data)
    def test_set_write_locked(self):
        # Cuboid dimensions.
        xy_dim = 128
        z_dim = 16

        lookup_key = self.data['lookup_key']
        # Make sure this key isn't currently locked.
        self.dead_letter._sp.cache_state.set_project_lock(lookup_key, False)
        self.assertFalse(self.dead_letter._sp.cache_state.project_locked(lookup_key))

        cube1 = Cube.create_cube(self.resource, [xy_dim, xy_dim, z_dim])
        cube1.data = np.random.randint(1, 254, (1, z_dim, xy_dim, xy_dim))

        # Ensure that the we are not in a page-out state by wiping the entire
        # cache state.
        self.dead_letter._sp.cache_state.status_client.flushdb()
        self.dead_letter._sp.write_cuboid(
            self.resource, (0, 0, 0), 0, cube1.data)


        try:
            with patch.object(self.dead_letter, 'send_alert', wraps=self.dead_letter.send_alert) as send_alert_spy:

                # Method under test.  Returns True if it found a message.
                i = 0
                while not self.dead_letter.check_queue() and i < 30:
                    time.sleep(1)
                    i += 1

                self.assertTrue(self.dead_letter._sp.cache_state.project_locked(lookup_key))

                # Ensure method that publishes to SNS topic called.
                send_alert_spy.assert_called_with(lookup_key, ANY)

        finally:
            # Make sure write lock is unset before terminating.
            self.dead_letter._sp.cache_state.set_project_lock(lookup_key, False)
Exemple #28
0
    def test_delayed_write_daemon_simple(self):
        """Test handling delayed writes"""
        sp = SpatialDB(self.kvio_config, self.state_config,
                       self.object_store_config)
        dwd = DelayedWriteDaemon("boss-delayedwrited-test.pid")

        # Create some a single delayed write
        cube1 = Cube.create_cube(self.resource, [512, 512, 16])
        cube1.random()
        cube1.morton_id = 0
        res = 0
        time_sample = 0

        write_cuboid_base = "WRITE-CUBOID&{}&{}".format(
            self.resource.get_lookup_key(), 0)

        write_cuboid_key = sp.kvio.insert_cube_in_write_buffer(
            write_cuboid_base, res, cube1.morton_id,
            cube1.to_blosc_by_time_index(time_sample))

        sp.cache_state.add_to_delayed_write(write_cuboid_key,
                                            self.resource.get_lookup_key(),
                                            res, cube1.morton_id, time_sample,
                                            self.resource.to_json())

        # Use Daemon To handle writes
        dwd.process(sp)
        time.sleep(30)

        # Make sure they went through
        cube2 = sp.cutout(self.resource, (0, 0, 0), (512, 512, 16), 0)

        np.testing.assert_array_equal(cube1.data, cube2.data)

        # Make sure delay key got deleted
        keys = sp.cache_state.get_all_delayed_write_keys()
        assert not keys
Exemple #29
0
    def upload_data(self, args):
        """
        Fill the coord frame with random data.

        Args:
            args (dict): This should be the dict returned by get_downsample_args().
        """
        cuboid_size = CUBOIDSIZE[0]
        x_dim = cuboid_size[0]
        y_dim = cuboid_size[1]
        z_dim = cuboid_size[2]

        resource = BossResourceBasic()
        resource.from_dict(self.get_image_dict())
        resolution = 0
        ts = 0
        version = 0

        # DP HACK: uploading all cubes will take longer than the actual downsample
        #          just upload the first volume worth of cubes.
        #          The downsample volume lambda will only read these cubes when
        #          passed the 'test' argument.
        bucket = S3Bucket(self.bosslet_config.session, args['s3_bucket'])
        print('Uploading test data', end='', flush=True)
        for cube in xyz_range(XYZ(0,0,0), XYZ(2,2,2)):
            key = AWSObjectStore.generate_object_key(resource, resolution, ts, cube.morton)
            key += "&0" # Add the version number
            #print('morton: {}'.format(cube.morton))
            #print('key: {}'.format(key))
            #print("{} -> {} -> {}".format(cube, cube.morton, key))
            cube = Cube.create_cube(resource, [x_dim, y_dim, z_dim])
            cube.random()
            data = cube.to_blosc()
            bucket.put(key, data)
            print('.', end='', flush=True)
        print(' Done uploading.')
    def upload_data(self, session, args):
        """
        Fill the coord frame with random data.

        Args:
            args (dict): This should be the dict returned by get_downsample_args().
        """
        cuboid_size = CUBOIDSIZE[0]
        x_dim = cuboid_size[0]
        y_dim = cuboid_size[1]
        z_dim = cuboid_size[2]

        resource = BossResourceBasic()
        resource.from_dict(self.get_image_dict())
        resolution = 0
        ts = 0
        version = 0

        # DP HACK: uploading all cubes will take longer than the actual downsample
        #          just upload the first volume worth of cubes.
        #          The downsample volume lambda will only read these cubes when
        #          passed the 'test' argument.
        bucket = S3Bucket(session, args['s3_bucket'])
        print('Uploading test data', end='', flush=True)
        for cube in xyz_range(XYZ(0,0,0), XYZ(2,2,2)):
            key = AWSObjectStore.generate_object_key(resource, resolution, ts, cube.morton)
            key += "&0" # Add the version number
            #print('morton: {}'.format(cube.morton))
            #print('key: {}'.format(key))
            #print("{} -> {} -> {}".format(cube, cube.morton, key))
            cube = Cube.create_cube(resource, [x_dim, y_dim, z_dim])
            cube.random()
            data = cube.to_blosc()
            bucket.put(key, data)
            print('.', end='', flush=True)
        print(' Done uploading.')
    if exist_keys:  # Cuboid Exists
        # Get cuboid to flush from write buffer
        write_cuboid_bytes = sp.kvio.get_cube_from_write_buffer(write_cuboid_key)
        if write_cuboid_bytes is None:
            # Didn't get any data back.  Assume another lambda already
            # served this request.  Remove message and continue.
            print("No data returned from write buffer, ignoring and deleting message.")
            sqs_client.delete_message(
                QueueUrl=event["config"]["object_store_config"]["s3_flush_queue"],
                ReceiptHandle=rx_handle)

            # Increment run counter
            run_cnt += 1
            continue

        new_cube = Cube.create_cube(resource, cube_dim)
        new_cube.morton_id = morton
        new_cube.from_blosc(write_cuboid_bytes)

        # Get existing cuboid from S3
        existing_cube = Cube.create_cube(resource, cube_dim)
        existing_cube.morton_id = new_cube.morton_id
        existing_cube_bytes = sp.objectio.get_single_object(object_keys[0])
        existing_cube.from_blosc(existing_cube_bytes, new_cube.time_range)

        # Merge cuboids
        existing_cube.overwrite(new_cube.data, new_cube.time_range)

        # Get bytes
        cuboid_bytes = existing_cube.to_blosc()
        uncompressed_cuboid_bytes = existing_cube.data
Exemple #32
0
    def test_sqs_watcher_send_message(self):
        """Inject message into queue and test that SqsWatcher kicks off a lambda and writes cuboid to s3."""
        # Generate random data
        cube1 = Cube.create_cube(self.resource, [512, 512, 16])
        cube1.random()
        cube1.morton_id = 0

        sp = SpatialDB(self.kvio_config, self.state_config,
                       self.object_store_config)

        base_write_cuboid_key = "WRITE-CUBOID&{}&{}".format(
            self.resource.get_lookup_key(), 0)
        morton_idx = ndlib.XYZMorton([0, 0, 0])
        t = 0
        write_cuboid_key = sp.kvio.insert_cube_in_write_buffer(
            base_write_cuboid_key, t, morton_idx,
            cube1.to_blosc_by_time_index(t))

        # Put page out job on the queue
        sqs = boto3.client('sqs', region_name=get_region())

        msg_data = {
            "config": self.config_data,
            "write_cuboid_key": write_cuboid_key,
            "lambda-name": "s3_flush",
            "resource": self.resource.to_dict()
        }

        response = sqs.send_message(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            MessageBody=json.dumps(msg_data))
        assert response['ResponseMetadata']['HTTPStatusCode'] == 200

        watcher = SqsWatcher(self.lambda_data)
        #  verify_queue() needs the be run multiple times to verify that the queue is not changing
        #  only then does it send off a lambda message.
        time.sleep(5)
        watcher.verify_queue()
        time.sleep(5)
        lambdas_invoked = watcher.verify_queue()
        if lambdas_invoked < 1:
            time.sleep(5)
            watcher.verify_queue()
        time.sleep(15)

        client = boto3.client('sqs', region_name=get_region())
        response = client.get_queue_attributes(
            QueueUrl=self.object_store_config["s3_flush_queue"],
            AttributeNames=[
                'ApproximateNumberOfMessages',
                'ApproximateNumberOfMessagesNotVisible'
            ])
        https_status_code = response['ResponseMetadata']['HTTPStatusCode']
        queue_count = int(
            response['Attributes']['ApproximateNumberOfMessages'])
        # test that the queue count is now 0
        assert queue_count == 0

        s3 = boto3.client('s3', region_name=get_region())
        objects_list = s3.list_objects(
            Bucket=self.object_store_config['cuboid_bucket'])
        # tests that bucket has some Contents.
        assert "Contents" in objects_list.keys()
def handler(event, context):
    # Load settings
    SETTINGS = BossSettings.load()

    # Used as a guard against trying to delete the SQS message when lambda is
    # triggered by SQS.
    sqs_triggered = 'Records' in event and len(event['Records']) > 0

    if sqs_triggered :
        # Lambda invoked by an SQS trigger.
        msg_data = json.loads(event['Records'][0]['body'])
        # Load the project info from the chunk key you are processing
        chunk_key = msg_data['chunk_key']
        proj_info = BossIngestProj.fromSupercuboidKey(chunk_key)
        proj_info.job_id = msg_data['ingest_job']
    else:
        # Standard async invoke of this lambda.

        # Load the project info from the chunk key you are processing
        proj_info = BossIngestProj.fromSupercuboidKey(event["chunk_key"])
        proj_info.job_id = event["ingest_job"]

        # Get message from SQS ingest queue, try for ~2 seconds
        rx_cnt = 0
        msg_data = None
        msg_id = None
        msg_rx_handle = None
        while rx_cnt < 6:
            ingest_queue = IngestQueue(proj_info)
            msg = [x for x in ingest_queue.receiveMessage()]
            if msg:
                msg = msg[0]
                print("MESSAGE: {}".format(msg))
                print(len(msg))
                msg_id = msg[0]
                msg_rx_handle = msg[1]
                msg_data = json.loads(msg[2])
                print("MESSAGE DATA: {}".format(msg_data))
                break
            else:
                rx_cnt += 1
                print("No message found. Try {} of 6".format(rx_cnt))
                time.sleep(1)

        if not msg_id:
            # No tiles ready to ingest.
            print("No ingest message available")
            return

        # Get the chunk key of the tiles to ingest.
        chunk_key = msg_data['chunk_key']


    tile_error_queue = TileErrorQueue(proj_info)

    print("Ingesting Chunk {}".format(chunk_key))
    tiles_in_chunk = int(chunk_key.split('&')[1])

    # Setup SPDB instance
    sp = SpatialDB(msg_data['parameters']["KVIO_SETTINGS"],
                   msg_data['parameters']["STATEIO_CONFIG"],
                   msg_data['parameters']["OBJECTIO_CONFIG"])

    # Get tile list from Tile Index Table
    tile_index_db = BossTileIndexDB(proj_info.project_name)
    # tile_index_result (dict): keys are S3 object keys of the tiles comprising the chunk.
    tile_index_result = tile_index_db.getCuboid(msg_data["chunk_key"], int(msg_data["ingest_job"]))
    if tile_index_result is None:
        # If chunk_key is gone, another lambda uploaded the cuboids and deleted the chunk_key afterwards.
        if not sqs_triggered:
            # Remove message so it's not redelivered.
            ingest_queue.deleteMessage(msg_id, msg_rx_handle)

        print("Aborting due to chunk key missing from tile index table")
        return

    # Sort the tile keys
    print("Tile Keys: {}".format(tile_index_result["tile_uploaded_map"]))
    tile_key_list = [x.rsplit("&", 2) for x in tile_index_result["tile_uploaded_map"].keys()]
    if len(tile_key_list) < tiles_in_chunk:
        print("Not a full set of 16 tiles. Assuming it has handled already, tiles: {}".format(len(tile_key_list)))
        if not sqs_triggered:
            ingest_queue.deleteMessage(msg_id, msg_rx_handle)
        return
    tile_key_list = sorted(tile_key_list, key=lambda x: int(x[1]))
    tile_key_list = ["&".join(x) for x in tile_key_list]
    print("Sorted Tile Keys: {}".format(tile_key_list))

    # Augment Resource JSON data so it will instantiate properly that was pruned due to S3 metadata size limits
    resource_dict = msg_data['parameters']['resource']
    _, exp_name, ch_name = resource_dict["boss_key"].split("&")

    resource_dict["channel"]["name"] = ch_name
    resource_dict["channel"]["description"] = ""
    resource_dict["channel"]["sources"] = []
    resource_dict["channel"]["related"] = []
    resource_dict["channel"]["default_time_sample"] = 0
    resource_dict["channel"]["downsample_status"] = "NOT_DOWNSAMPLED"

    resource_dict["experiment"]["name"] = exp_name
    resource_dict["experiment"]["description"] = ""
    resource_dict["experiment"]["num_time_samples"] = 1
    resource_dict["experiment"]["time_step"] = None
    resource_dict["experiment"]["time_step_unit"] = None

    resource_dict["coord_frame"]["name"] = "cf"
    resource_dict["coord_frame"]["name"] = ""
    resource_dict["coord_frame"]["x_start"] = 0
    resource_dict["coord_frame"]["x_stop"] = 100000
    resource_dict["coord_frame"]["y_start"] = 0
    resource_dict["coord_frame"]["y_stop"] = 100000
    resource_dict["coord_frame"]["z_start"] = 0
    resource_dict["coord_frame"]["z_stop"] = 100000
    resource_dict["coord_frame"]["voxel_unit"] = "nanometers"

    # Setup the resource
    resource = BossResourceBasic()
    resource.from_dict(resource_dict)
    dtype = resource.get_numpy_data_type()

    # read all tiles from bucket into a slab
    tile_bucket = TileBucket(proj_info.project_name)
    data = []
    num_z_slices = 0
    for tile_key in tile_key_list:
        try:
            image_data, message_id, receipt_handle, metadata = tile_bucket.getObjectByKey(tile_key)
        except KeyError:
            print('Key: {} not found in tile bucket, assuming redelivered SQS message and aborting.'.format(
                tile_key))
            if not sqs_triggered:
                # Remove message so it's not redelivered.
                ingest_queue.deleteMessage(msg_id, msg_rx_handle)
            print("Aborting due to missing tile in bucket")
            return

        image_bytes = BytesIO(image_data)
        image_size = image_bytes.getbuffer().nbytes

        # Get tiles size from metadata, need to shape black tile if actual tile is corrupt.
        if 'x_size' in metadata:
            tile_size_x = metadata['x_size']
        else:
            print('MetadataMissing: x_size not in tile metadata:  using 1024.')
            tile_size_x = 1024

        if 'y_size' in metadata:
            tile_size_y = metadata['y_size']
        else:
            print('MetadataMissing: y_size not in tile metadata:  using 1024.')
            tile_size_y = 1024

        if image_size == 0:
            print('TileError: Zero length tile, using black instead: {}'.format(tile_key))
            error_msg = 'Zero length tile'
            enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
            tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)
        else:
            try:
                tile_img = np.asarray(Image.open(image_bytes), dtype=dtype)
            except TypeError as te:
                print('TileError: Incomplete tile, using black instead (tile_size_in_bytes, tile_key): {}, {}'
                      .format(image_size, tile_key))
                error_msg = 'Incomplete tile'
                enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
                tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)
            except OSError as oe:
                print('TileError: OSError, using black instead (tile_size_in_bytes, tile_key): {}, {} ErrorMessage: {}'
                      .format(image_size, tile_key, oe))
                error_msg = 'OSError: {}'.format(oe)
                enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
                tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)

        data.append(tile_img)
        num_z_slices += 1


    # Make 3D array of image data. It should be in XYZ at this point
    chunk_data = np.array(data)
    del data
    tile_dims = chunk_data.shape

    # Break into Cube instances
    print("Tile Dims: {}".format(tile_dims))
    print("Num Z Slices: {}".format(num_z_slices))
    num_x_cuboids = int(math.ceil(tile_dims[2] / CUBOIDSIZE[proj_info.resolution][0]))
    num_y_cuboids = int(math.ceil(tile_dims[1] / CUBOIDSIZE[proj_info.resolution][1]))

    print("Num X Cuboids: {}".format(num_x_cuboids))
    print("Num Y Cuboids: {}".format(num_y_cuboids))

    chunk_key_parts = BossUtil.decode_chunk_key(chunk_key)
    t_index = chunk_key_parts['t_index']
    for x_idx in range(0, num_x_cuboids):
        for y_idx in range(0, num_y_cuboids):
            # TODO: check time series support
            cube = Cube.create_cube(resource, CUBOIDSIZE[proj_info.resolution])
            cube.zeros()

            # Compute Morton ID
            # TODO: verify Morton indices correct!
            print(chunk_key_parts)
            morton_x_ind = x_idx + (chunk_key_parts["x_index"] * num_x_cuboids)
            morton_y_ind = y_idx + (chunk_key_parts["y_index"] * num_y_cuboids)
            print("Morton X: {}".format(morton_x_ind))
            print("Morton Y: {}".format(morton_y_ind))
            morton_index = XYZMorton([morton_x_ind, morton_y_ind, int(chunk_key_parts['z_index'])])

            # Insert sub-region from chunk_data into cuboid
            x_start = x_idx * CUBOIDSIZE[proj_info.resolution][0]
            x_end = x_start + CUBOIDSIZE[proj_info.resolution][0]
            x_end = min(x_end, tile_dims[2])
            y_start = y_idx * CUBOIDSIZE[proj_info.resolution][1]
            y_end = y_start + CUBOIDSIZE[proj_info.resolution][1]
            y_end = min(y_end, tile_dims[1])
            z_end = CUBOIDSIZE[proj_info.resolution][2]
            # TODO: get sub-array w/o making a copy.
            print("Yrange: {}".format(y_end - y_start))
            print("Xrange: {}".format(x_end - x_start))
            print("X start: {}".format(x_start))
            print("X stop: {}".format(x_end))
            cube.data[0, 0:num_z_slices, 0:(y_end - y_start), 0:(x_end - x_start)] = chunk_data[0:num_z_slices,
                                                                                 y_start:y_end, x_start:x_end]

            # Create object key
            object_key = sp.objectio.generate_object_key(resource, proj_info.resolution, t_index, morton_index)
            print("Object Key: {}".format(object_key))

            # Put object in S3
            sp.objectio.put_objects([object_key], [cube.to_blosc()])

            # Add object to index
            sp.objectio.add_cuboid_to_index(object_key, ingest_job=int(msg_data["ingest_job"]))

            # Update id indices if this is an annotation channel
            # We no longer index during ingest.
            #if resource.data['channel']['type'] == 'annotation':
            #   try:
            #       sp.objectio.update_id_indices(
            #           resource, proj_info.resolution, [object_key], [cube.data])
            #   except SpdbError as ex:
            #       sns_client = boto3.client('sns')
            #       topic_arn = msg_data['parameters']["OBJECTIO_CONFIG"]["prod_mailing_list"]
            #       msg = 'During ingest:\n{}\nCollection: {}\nExperiment: {}\n Channel: {}\n'.format(
            #           ex.message,
            #           resource.data['collection']['name'],
            #           resource.data['experiment']['name'],
            #           resource.data['channel']['name'])
            #       sns_client.publish(
            #           TopicArn=topic_arn,
            #           Subject='Object services misuse',
            #           Message=msg)

    lambda_client = boto3.client('lambda', region_name=SETTINGS.REGION_NAME)

    names = AWSNames.create_from_lambda_name(context.function_name)

    delete_tiles_data = {
        'tile_key_list': tile_key_list,
        'region': SETTINGS.REGION_NAME,
        'bucket': tile_bucket.bucket.name
    }

    # Delete tiles from tile bucket.
    lambda_client.invoke(
        FunctionName=names.delete_tile_objs_lambda,
        InvocationType='Event',
        Payload=json.dumps(delete_tiles_data).encode()
    )       

    delete_tile_entry_data = {
        'tile_index': tile_index_db.table.name,
        'region': SETTINGS.REGION_NAME,
        'chunk_key': chunk_key,
        'task_id': msg_data['ingest_job']
    }

    # Delete entry from tile index.
    lambda_client.invoke(
        FunctionName=names.delete_tile_index_entry_lambda,
        InvocationType='Event',
        Payload=json.dumps(delete_tile_entry_data).encode()
    )       

    if not sqs_triggered:
        # Delete message since it was processed successfully
        ingest_queue.deleteMessage(msg_id, msg_rx_handle)
    def test_add_to_prefetch(self):
        cuboid_dims = CUBOIDSIZE[0]
        # Cuboid dimensions.
        x_dim = cuboid_dims[0]
        y_dim = cuboid_dims[1]
        z_dim = cuboid_dims[2]

        cube = Cube.create_cube(self.resource, [x_dim, y_dim, z_dim])
        cube.random()
        cube_above = Cube.create_cube(self.resource, [x_dim, y_dim, z_dim])
        cube_above.random()
        cube_below = Cube.create_cube(self.resource, [x_dim, y_dim, z_dim])
        cube_below.random()

        # Write 3 cuboids that are stacked vertically.
        self.sp.write_cuboid(self.resource, (0, 0, 0), 0, cube_below.data)
        self.sp.write_cuboid(self.resource, (0, 0, z_dim), 0, cube.data)
        self.sp.write_cuboid(self.resource, (0, 0, z_dim * 2), 0, cube_above.data)

        cube.morton_id = ndlib.XYZMorton([0, 0, z_dim // z_dim])
        cube_below.morton_id = ndlib.XYZMorton([0, 0, 0])
        cube_above.morton_id = ndlib.XYZMorton([0, 0, z_dim * 2 // z_dim])
        print('mortons: {}, {}, {}'.format(
            cube_below.morton_id, cube.morton_id, cube_above.morton_id))

        cube_below_cache_key, cube_cache_key, cube_above_cache_key = self.sp.kvio.generate_cached_cuboid_keys(
            self.resource, 0, [0],
            [cube_below.morton_id, cube.morton_id, cube_above.morton_id])

        # Make sure cuboids saved.
        cube_act = self.sp.cutout(self.resource, (0, 0, 0), (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_below.data, cube_act.data)
        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim), (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube.data, cube_act.data)
        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim * 2), (x_dim, y_dim, z_dim), 0)
        np.testing.assert_array_equal(cube_above.data, cube_act.data)

        # Clear cache so we can get a cache miss.
        self.sp.kvio.cache_client.flushdb()

        # Also clear CACHE-MISS before running testing.
        self.sp.cache_state.status_client.flushdb()

        # Get middle cube again.  This should trigger a cache miss.
        cube_act = self.sp.cutout(self.resource, (0, 0, z_dim), (x_dim, y_dim, z_dim), 0)

        # Confirm there is a cache miss.
        misses = self.sp.cache_state.status_client.lrange('CACHE-MISS', 0, 10)
        print('misses:')
        print(misses)
        miss_actual = self.sp.cache_state.status_client.lindex('CACHE-MISS', 0)
        self.assertEqual(cube_cache_key, str(miss_actual, 'utf-8'))

        # This is the system under test.
        self.cache_miss.process()

        # Confirm PRE-FETCH has the object keys for the cube above and below.
        fetch_actual1 = self.sp.cache_state.status_client.lindex('PRE-FETCH', 0)
        fetch_actual2 = self.sp.cache_state.status_client.lindex('PRE-FETCH', 1)
        obj_keys = self.sp.objectio.cached_cuboid_to_object_keys(
            [cube_above_cache_key, cube_below_cache_key])
        self.assertEqual(obj_keys[0], str(fetch_actual1, 'utf-8'))
        self.assertEqual(obj_keys[1], str(fetch_actual2, 'utf-8'))
Exemple #35
0
def handler(event, context):
    # Load settings
    SETTINGS = BossSettings.load()

    # Used as a guard against trying to delete the SQS message when lambda is
    # triggered by SQS.
    sqs_triggered = 'Records' in event and len(event['Records']) > 0

    if sqs_triggered :
        # Lambda invoked by an SQS trigger.
        msg_data = json.loads(event['Records'][0]['body'])
        # Load the project info from the chunk key you are processing
        chunk_key = msg_data['chunk_key']
        proj_info = BossIngestProj.fromSupercuboidKey(chunk_key)
        proj_info.job_id = msg_data['ingest_job']
    else:
        # Standard async invoke of this lambda.

        # Load the project info from the chunk key you are processing
        proj_info = BossIngestProj.fromSupercuboidKey(event["chunk_key"])
        proj_info.job_id = event["ingest_job"]

        # Get message from SQS ingest queue, try for ~2 seconds
        rx_cnt = 0
        msg_data = None
        msg_id = None
        msg_rx_handle = None
        while rx_cnt < 6:
            ingest_queue = IngestQueue(proj_info)
            try:
                msg = [x for x in ingest_queue.receiveMessage()]
            # StopIteration may be converted to a RunTimeError.
            except (StopIteration, RuntimeError):
                msg = None

            if msg:
                msg = msg[0]
                print("MESSAGE: {}".format(msg))
                print(len(msg))
                msg_id = msg[0]
                msg_rx_handle = msg[1]
                msg_data = json.loads(msg[2])
                print("MESSAGE DATA: {}".format(msg_data))
                break
            else:
                rx_cnt += 1
                print("No message found. Try {} of 6".format(rx_cnt))
                time.sleep(1)

        if not msg_id:
            # No tiles ready to ingest.
            print("No ingest message available")
            return

        # Get the chunk key of the tiles to ingest.
        chunk_key = msg_data['chunk_key']


    tile_error_queue = TileErrorQueue(proj_info)

    print("Ingesting Chunk {}".format(chunk_key))
    tiles_in_chunk = int(chunk_key.split('&')[1])

    # Setup SPDB instance
    sp = SpatialDB(msg_data['parameters']["KVIO_SETTINGS"],
                   msg_data['parameters']["STATEIO_CONFIG"],
                   msg_data['parameters']["OBJECTIO_CONFIG"])

    # Get tile list from Tile Index Table
    tile_index_db = BossTileIndexDB(proj_info.project_name)
    # tile_index_result (dict): keys are S3 object keys of the tiles comprising the chunk.
    tile_index_result = tile_index_db.getCuboid(msg_data["chunk_key"], int(msg_data["ingest_job"]))
    if tile_index_result is None:
        # If chunk_key is gone, another lambda uploaded the cuboids and deleted the chunk_key afterwards.
        if not sqs_triggered:
            # Remove message so it's not redelivered.
            ingest_queue.deleteMessage(msg_id, msg_rx_handle)

        print("Aborting due to chunk key missing from tile index table")
        return

    # Sort the tile keys
    print("Tile Keys: {}".format(tile_index_result["tile_uploaded_map"]))
    tile_key_list = [x.rsplit("&", 2) for x in tile_index_result["tile_uploaded_map"].keys()]
    if len(tile_key_list) < tiles_in_chunk:
        print("Not a full set of 16 tiles. Assuming it has handled already, tiles: {}".format(len(tile_key_list)))
        if not sqs_triggered:
            ingest_queue.deleteMessage(msg_id, msg_rx_handle)
        return
    tile_key_list = sorted(tile_key_list, key=lambda x: int(x[1]))
    tile_key_list = ["&".join(x) for x in tile_key_list]
    print("Sorted Tile Keys: {}".format(tile_key_list))

    # Augment Resource JSON data so it will instantiate properly that was pruned due to S3 metadata size limits
    resource_dict = msg_data['parameters']['resource']
    _, exp_name, ch_name = resource_dict["boss_key"].split("&")

    resource_dict["channel"]["name"] = ch_name
    resource_dict["channel"]["description"] = ""
    resource_dict["channel"]["sources"] = []
    resource_dict["channel"]["related"] = []
    resource_dict["channel"]["default_time_sample"] = 0
    resource_dict["channel"]["downsample_status"] = "NOT_DOWNSAMPLED"

    resource_dict["experiment"]["name"] = exp_name
    resource_dict["experiment"]["description"] = ""
    resource_dict["experiment"]["num_time_samples"] = 1
    resource_dict["experiment"]["time_step"] = None
    resource_dict["experiment"]["time_step_unit"] = None

    resource_dict["coord_frame"]["name"] = "cf"
    resource_dict["coord_frame"]["name"] = ""
    resource_dict["coord_frame"]["x_start"] = 0
    resource_dict["coord_frame"]["x_stop"] = 100000
    resource_dict["coord_frame"]["y_start"] = 0
    resource_dict["coord_frame"]["y_stop"] = 100000
    resource_dict["coord_frame"]["z_start"] = 0
    resource_dict["coord_frame"]["z_stop"] = 100000
    resource_dict["coord_frame"]["voxel_unit"] = "nanometers"

    # Setup the resource
    resource = BossResourceBasic()
    resource.from_dict(resource_dict)
    dtype = resource.get_numpy_data_type()

    # read all tiles from bucket into a slab
    tile_bucket = TileBucket(proj_info.project_name)
    data = []
    num_z_slices = 0
    for tile_key in tile_key_list:
        try:
            image_data, message_id, receipt_handle, metadata = tile_bucket.getObjectByKey(tile_key)
        except KeyError:
            print('Key: {} not found in tile bucket, assuming redelivered SQS message and aborting.'.format(
                tile_key))
            if not sqs_triggered:
                # Remove message so it's not redelivered.
                ingest_queue.deleteMessage(msg_id, msg_rx_handle)
            print("Aborting due to missing tile in bucket")
            return

        image_bytes = BytesIO(image_data)
        image_size = image_bytes.getbuffer().nbytes

        # Get tiles size from metadata, need to shape black tile if actual tile is corrupt.
        if 'x_size' in metadata:
            tile_size_x = metadata['x_size']
        else:
            print('MetadataMissing: x_size not in tile metadata:  using 1024.')
            tile_size_x = 1024

        if 'y_size' in metadata:
            tile_size_y = metadata['y_size']
        else:
            print('MetadataMissing: y_size not in tile metadata:  using 1024.')
            tile_size_y = 1024

        if image_size == 0:
            print('TileError: Zero length tile, using black instead: {}'.format(tile_key))
            error_msg = 'Zero length tile'
            enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
            tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)
        else:
            try:
                # DP NOTE: Issues when specifying dtype in the asarray function with Pillow ver 8.3.1. 
                # Fixed by separating array instantiation and dtype assignment. 
                tile_img = np.asarray(Image.open(image_bytes))
                tile_img = tile_img.astype(dtype)
            except TypeError as te:
                print('TileError: Incomplete tile, using black instead (tile_size_in_bytes, tile_key): {}, {}'
                      .format(image_size, tile_key))
                error_msg = 'Incomplete tile'
                enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
                tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)
            except OSError as oe:
                print('TileError: OSError, using black instead (tile_size_in_bytes, tile_key): {}, {} ErrorMessage: {}'
                      .format(image_size, tile_key, oe))
                error_msg = 'OSError: {}'.format(oe)
                enqueue_tile_error(tile_error_queue, tile_key, chunk_key, error_msg)
                tile_img = np.zeros((tile_size_x, tile_size_y), dtype=dtype)

        data.append(tile_img)
        num_z_slices += 1


    # Make 3D array of image data. It should be in XYZ at this point
    chunk_data = np.array(data)
    del data
    tile_dims = chunk_data.shape

    # Break into Cube instances
    print("Tile Dims: {}".format(tile_dims))
    print("Num Z Slices: {}".format(num_z_slices))
    num_x_cuboids = int(math.ceil(tile_dims[2] / CUBOIDSIZE[proj_info.resolution][0]))
    num_y_cuboids = int(math.ceil(tile_dims[1] / CUBOIDSIZE[proj_info.resolution][1]))

    print("Num X Cuboids: {}".format(num_x_cuboids))
    print("Num Y Cuboids: {}".format(num_y_cuboids))

    chunk_key_parts = BossUtil.decode_chunk_key(chunk_key)
    t_index = chunk_key_parts['t_index']
    for x_idx in range(0, num_x_cuboids):
        for y_idx in range(0, num_y_cuboids):
            # TODO: check time series support
            cube = Cube.create_cube(resource, CUBOIDSIZE[proj_info.resolution])
            cube.zeros()

            # Compute Morton ID
            # TODO: verify Morton indices correct!
            print(chunk_key_parts)
            morton_x_ind = x_idx + (chunk_key_parts["x_index"] * num_x_cuboids)
            morton_y_ind = y_idx + (chunk_key_parts["y_index"] * num_y_cuboids)
            print("Morton X: {}".format(morton_x_ind))
            print("Morton Y: {}".format(morton_y_ind))
            morton_index = XYZMorton([morton_x_ind, morton_y_ind, int(chunk_key_parts['z_index'])])

            # Insert sub-region from chunk_data into cuboid
            x_start = x_idx * CUBOIDSIZE[proj_info.resolution][0]
            x_end = x_start + CUBOIDSIZE[proj_info.resolution][0]
            x_end = min(x_end, tile_dims[2])
            y_start = y_idx * CUBOIDSIZE[proj_info.resolution][1]
            y_end = y_start + CUBOIDSIZE[proj_info.resolution][1]
            y_end = min(y_end, tile_dims[1])
            z_end = CUBOIDSIZE[proj_info.resolution][2]
            # TODO: get sub-array w/o making a copy.
            print("Yrange: {}".format(y_end - y_start))
            print("Xrange: {}".format(x_end - x_start))
            print("X start: {}".format(x_start))
            print("X stop: {}".format(x_end))
            cube.data[0, 0:num_z_slices, 0:(y_end - y_start), 0:(x_end - x_start)] = chunk_data[0:num_z_slices,
                                                                                 y_start:y_end, x_start:x_end]

            # Create object key
            object_key = sp.objectio.generate_object_key(resource, proj_info.resolution, t_index, morton_index)
            print("Object Key: {}".format(object_key))

            # Put object in S3
            sp.objectio.put_objects([object_key], [cube.to_blosc()])

            # Add object to index
            sp.objectio.add_cuboid_to_index(object_key, ingest_job=int(msg_data["ingest_job"]))

            # Update id indices if this is an annotation channel
            # We no longer index during ingest.
            #if resource.data['channel']['type'] == 'annotation':
            #   try:
            #       sp.objectio.update_id_indices(
            #           resource, proj_info.resolution, [object_key], [cube.data])
            #   except SpdbError as ex:
            #       sns_client = boto3.client('sns')
            #       topic_arn = msg_data['parameters']["OBJECTIO_CONFIG"]["prod_mailing_list"]
            #       msg = 'During ingest:\n{}\nCollection: {}\nExperiment: {}\n Channel: {}\n'.format(
            #           ex.message,
            #           resource.data['collection']['name'],
            #           resource.data['experiment']['name'],
            #           resource.data['channel']['name'])
            #       sns_client.publish(
            #           TopicArn=topic_arn,
            #           Subject='Object services misuse',
            #           Message=msg)

    lambda_client = boto3.client('lambda', region_name=SETTINGS.REGION_NAME)

    names = AWSNames.from_lambda(context.function_name)

    delete_tiles_data = {
        'tile_key_list': tile_key_list,
        'region': SETTINGS.REGION_NAME,
        'bucket': tile_bucket.bucket.name
    }

    # Delete tiles from tile bucket.
    lambda_client.invoke(
        FunctionName=names.delete_tile_objs.lambda_,
        InvocationType='Event',
        Payload=json.dumps(delete_tiles_data).encode()
    )       

    delete_tile_entry_data = {
        'tile_index': tile_index_db.table.name,
        'region': SETTINGS.REGION_NAME,
        'chunk_key': chunk_key,
        'task_id': msg_data['ingest_job']
    }

    # Delete entry from tile index.
    lambda_client.invoke(
        FunctionName=names.delete_tile_index_entry.lambda_,
        InvocationType='Event',
        Payload=json.dumps(delete_tile_entry_data).encode()
    )       

    if not sqs_triggered:
        # Delete message since it was processed successfully
        ingest_queue.deleteMessage(msg_id, msg_rx_handle)
Exemple #36
0
    print("Tile Dims: {}".format(tile_dims))
    print("Num Z Slices: {}".format(num_z_slices))
    num_x_cuboids = int(math.ceil(tile_dims[2] / CUBOIDSIZE[proj_info.resolution][0]))
    num_y_cuboids = int(math.ceil(tile_dims[1] / CUBOIDSIZE[proj_info.resolution][1]))

    print("Num X Cuboids: {}".format(num_x_cuboids))
    print("Num Y Cuboids: {}".format(num_y_cuboids))

    # Cuboid List
    cuboids = []
    chunk_key_parts = BossUtil.decode_chunk_key(chunk_key)
    t_index = chunk_key_parts['t_index']
    for x_idx in range(0, num_x_cuboids):
        for y_idx in range(0, num_y_cuboids):
            # TODO: check time series support
            cube = Cube.create_cube(resource, CUBOIDSIZE[proj_info.resolution])
            cube.zeros()

            # Compute Morton ID
            # TODO: verify Morton indices correct!
            print(chunk_key_parts)
            morton_x_ind = x_idx + (chunk_key_parts["x_index"] * num_x_cuboids)
            morton_y_ind = y_idx + (chunk_key_parts["y_index"] * num_y_cuboids)
            print("Morton X: {}".format(morton_x_ind))
            print("Morton Y: {}".format(morton_y_ind))
            morton_index = XYZMorton([morton_x_ind, morton_y_ind, int(chunk_key_parts['z_index'])])

            # Insert sub-region from chunk_data into cuboid
            x_start = x_idx * CUBOIDSIZE[proj_info.resolution][0]
            x_end = x_start + CUBOIDSIZE[proj_info.resolution][0]
            x_end = min(x_end, tile_dims[2])