Beispiel #1
0
 def test_01_08_register_dataset_dependent_specific(self):
     #
     # For the case where the dependent dataset knows its source
     # (e.g. if the sources overlap), choose the specified source
     #
     self.db.set_target_dir("/tmp/foo")
     self.db.set_temp_dir("/tmp/bar")
     task1 = DummyTask(my_parameter="left")
     task2 = DummyTask(my_parameter="right")
     dependent_task = DummyTask(my_parameter="dependent")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     volume = Volume(0, 0, 0, 1024, 1024, 128)
     self.db.register_dataset(self.db.get_dataset_id(), task1, "image",
                              volume)
     volume = Volume(512, 0, 0, 1024, 1024, 128)
     self.db.register_dataset(self.db.get_dataset_id(), task2, "image",
                              volume)
     volume = Volume(512, 0, 0, 512, 1024, 128)
     self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                        dependent_task,
                                        "image",
                                        volume,
                                        src_task=task2)
     self.db.compute_subvolumes()
     dependencies = self.db.get_dependencies(dependent_task)
     self.assertEqual(len(dependencies), 1)
     self.assertEqual(dependencies[0], task2.task_id)
Beispiel #2
0
 def test_01_07_register_dataset_dependent_multiple(self):
     #
     # Register a dataset dependent against 3 tasks of which only two
     # overlap
     #
     self.db.set_target_dir("/tmp/foo")
     self.db.set_temp_dir("/tmp/bar")
     task1 = DummyTask(my_parameter="foo")
     task2 = DummyTask(my_parameter="bar")
     task3 = DummyTask(my_parameter="baz")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     volume = Volume(0, 0, 0, 1024, 1024, 128)
     self.db.register_dataset(self.db.get_dataset_id(), task1, "image",
                              volume)
     volume = Volume(1024, 0, 0, 1024, 1024, 128)
     self.db.register_dataset(self.db.get_dataset_id(), task2, "image",
                              volume)
     volume = Volume(2048, 0, 0, 1024, 1024, 128)
     self.db.register_dataset(self.db.get_dataset_id(), task3, "image",
                              volume)
     dependent_task = DummyTask(my_parameter="blech")
     volume = Volume(0, 0, 0, 1025, 1024, 128)
     self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                        dependent_task, "image", volume)
     self.db.compute_subvolumes()
     dependencies = self.db.get_dependencies(dependent_task)
     self.assertEqual(len(dependencies), 2)
     for dependency in dependencies:
         self.assertIn(dependency, (task1.task_id, task2.task_id))
Beispiel #3
0
    def test_02_03_a_particular_source(self):
        #
        # If two tasks produce overlapping volumes, make sure that
        # a dependent can choose which.
        #
        src_task1 = DummyTask(my_parameter="task1")
        src_task2 = DummyTask(my_parameter="task2")
        dependent_task = DummyTask(my_parameter="dependent")
        volume1 = Volume(0, 0, 0, 1024, 1024, 128)
        volume2 = Volume(512, 0, 0, 1024, 1024, 128)
        volume_dest = Volume(500, 0, 0, 24, 1024, 128)
        dataset_name = "neuroproof"
        self.db.register_dataset_type(dataset_name, Persistence.Permanent,
                                      "uint32")
        self.db.set_target_dir("/tmp/foo")
        self.db.set_temp_dir("tmp/bar")
        dataset_id1 = self.db.get_dataset_id()
        self.db.register_dataset(dataset_id1, src_task1, dataset_name, volume1)
        dataset_id2 = self.db.get_dataset_id()
        self.db.register_dataset(dataset_id2, src_task2, dataset_name, volume2)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           dependent_task,
                                           dataset_name,
                                           volume_dest,
                                           src_task=src_task1)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           dependent_task,
                                           dataset_name,
                                           volume_dest,
                                           src_task=src_task2)
        self.db.compute_subvolumes()
        result1 = self.db.get_subvolume_locations(dependent_task, dataset_name,
                                                  src_task1.task_id)
        self.assertEqual(len(result1), 1)
        location, volume = result1[0]
        expected1 = self.db.get_subvolume_locations_by_dataset_id(dataset_id1)
        for expected_location, expected_volume in expected1:
            if expected_location == location and\
               expected_volume == volume:
                break
        else:
            self.fail()

        result2 = self.db.get_subvolume_locations(dependent_task, dataset_name,
                                                  src_task2.task_id)
        self.assertEqual(len(result2), 1)
        location, volume = result2[0]
        expected2 = self.db.get_subvolume_locations_by_dataset_id(dataset_id2)
        for expected_location, expected_volume in expected2:
            if expected_location == location and\
               expected_volume == volume:
                break
        else:
            self.fail()
Beispiel #4
0
 def test_03_03_32bit_read_and_write(self):
     src_task = DummyTask(my_parameter="src")
     dest_task = DummyTask(my_parameter="dest")
     dataset_name = "membrane"
     self.db.register_dataset_type(dataset_name, Persistence.Permanent,
                                   "uint32")
     tempdir = tempfile.mkdtemp()
     try:
         self.db.set_target_dir(tempdir)
         volume = Volume(44, 44, 14, 100, 200, 300)
         data = np.random.RandomState(301).randint(
             0, 2**30, (volume.depth, volume.height, volume.width))
         dataset_id = self.db.get_dataset_id()
         self.db.register_dataset(dataset_id, src_task, dataset_name,
                                  volume)
         loading_plan_id = self.db.get_loading_plan_id()
         self.db.register_dataset_dependent(loading_plan_id, dest_task,
                                            dataset_name, volume)
         self.db.compute_subvolumes()
         self.db.imwrite(dataset_id, data)
         result = self.db.imread(loading_plan_id)
         np.testing.assert_array_equal(data, result)
         self.assertEqual(result.dtype, np.uint32)
     finally:
         shutil.rmtree(tempdir)
Beispiel #5
0
    def test_03_02_complex_read_and_write(self):
        #
        # Test reading across datasets. The structure should chop
        # the volumes into several pieces.
        #
        src_task_1 = DummyTask(my_parameter="src1")
        src_task_2 = DummyTask(my_parameter="src2")
        dest_task_1 = DummyTask(my_parameter="dest1")
        dest_task_2 = DummyTask(my_parameter="dest2")
        volume1 = Volume(44, 44, 14, 100, 100, 100)
        volume2 = Volume(144, 44, 14, 100, 100, 100)
        dvolume = Volume(100, 64, 34, 88, 60, 60)
        r = np.random.RandomState(302)
        data1 = r.randint(0, 256,
                          (volume1.depth, volume1.height, volume1.width))
        data2 = r.randint(0, 256,
                          (volume2.depth, volume2.height, volume2.width))

        dataset_name = "membrane"
        self.db.register_dataset_type(dataset_name, Persistence.Permanent,
                                      "uint8")
        dataset_1 = self.db.get_dataset_id()
        self.db.register_dataset(dataset_1, src_task_1, dataset_name, volume1)
        dataset_2 = self.db.get_dataset_id()
        self.db.register_dataset(dataset_2, src_task_2, dataset_name, volume2)
        load_plan_1 = self.db.get_loading_plan_id()
        self.db.register_dataset_dependent(load_plan_1, dest_task_1,
                                           dataset_name, dvolume)
        load_plan_2 = self.db.get_loading_plan_id()
        self.db.register_dataset_dependent(load_plan_2, dest_task_2,
                                           dataset_name, volume1)
        tempdir = tempfile.mkdtemp()
        try:
            self.db.set_target_dir(tempdir)
            self.db.compute_subvolumes()
            self.db.imwrite(dataset_1, data1)
            self.db.imwrite(dataset_2, data2)
            result = self.db.imread(load_plan_1)
            np.testing.assert_array_equal(data1[20:-20, 20:-20, 56:],
                                          result[:, :, :44])
            np.testing.assert_array_equal(data2[20:-20, 20:-20, :44],
                                          result[:, :, 44:])
            result = self.db.imread(load_plan_2)
            np.testing.assert_array_equal(data1, result)
        finally:
            shutil.rmtree(tempdir)
Beispiel #6
0
 def test_01_10_get_dtype_by_dataset_id(self):
     self.db.set_target_dir("/tmp/foo")
     self.db.set_temp_dir("/tmp/bar")
     task1 = DummyTask(my_parameter="left")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     volume = Volume(0, 0, 0, 1024, 1024, 128)
     dataset_id = self.db.get_dataset_id()
     self.db.register_dataset(dataset_id, task1, "image", volume)
     self.assertEqual(self.db.get_dataset_dtype_by_dataset_id(dataset_id),
                      "uint8")
Beispiel #7
0
 def test_01_03_register_dataset(self):
     task = DummyTask(my_parameter="foo")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     volume = Volume(44, 22, 14, 1024, 2048, 117)
     dataset_id = self.db.get_dataset_id()
     self.db.register_dataset(dataset_id, task, "image", volume)
     result = self.db.find_datasets_by_type_and_volume("image", volume)
     self.assertEqual(len(result), 1)
     dataset = result[0]
     self.assertEqual(dataset.task.luigi_id, task.task_id)
     self.assertEqual(dataset.volume.x0, 44)
Beispiel #8
0
 def test_01_06_dont_find_dataset_of_wrong_type(self):
     task = DummyTask(my_parameter="foo")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     self.db.register_dataset_type("segmentation", Persistence.Permanent,
                                   "uint32")
     volume = Volume(44, 22, 14, 1024, 2048, 117)
     dataset_id = self.db.get_dataset_id()
     self.db.register_dataset(dataset_id, task, "image", volume)
     result = self.db.find_datasets_by_type_and_volume(
         "segmentation", volume)
     self.assertEqual(len(result), 0)
Beispiel #9
0
 def test_01_06_register_dataset_dependent(self):
     task = DummyTask(my_parameter="foo")
     self.db.set_target_dir("/tmp/foo")
     self.db.set_temp_dir("/tmp/bar")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     volume = Volume(44, 22, 14, 1024, 2048, 117)
     dataset_id = self.db.get_dataset_id()
     self.db.register_dataset(dataset_id, task, "image", volume)
     dependent_task = DummyTask(my_parameter="bar")
     loading_plan_id = self.db.get_loading_plan_id()
     self.db.register_dataset_dependent(loading_plan_id, dependent_task,
                                        "image", volume)
     self.db.compute_subvolumes()
     dependencies = self.db.get_dependencies(dependent_task)
     self.assertEqual(len(dependencies), 1)
     self.assertEqual(dependencies[0], task.task_id)
Beispiel #10
0
 def test_02_01_simple_compute_subvolumes(self):
     task = DummyTask(my_parameter="foo")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     self.db.set_target_dir("/tmp/foo")
     self.db.set_temp_dir("/tmp/bar")
     volume = Volume(44, 22, 14, 1024, 2048, 117)
     dataset_id = self.db.get_dataset_id()
     self.db.register_dataset(dataset_id, task, "image", volume)
     dependent_task = DummyTask(my_parameter="bar")
     load_plan_id = self.db.get_loading_plan_id()
     self.db.register_dataset_dependent(load_plan_id, dependent_task,
                                        "image", volume)
     self.db.compute_subvolumes()
     result = self.db.get_subvolume_locations(dependent_task, "image")
     self.assertEqual(len(result), 1)
     location, volume = result[0]
     self.assertTrue(location.startswith("/tmp/foo/44/22/14/image_"))
     self.assertEqual(volume.x, 44)
Beispiel #11
0
 def test_01_05_find_overlapping_datasets(self):
     task = DummyTask(my_parameter="foo")
     self.db.register_dataset_type("image", Persistence.Permanent, "uint8",
                                   "Raw image data volume")
     volume = Volume(44, 22, 14, 1024, 2048, 117)
     dataset_id = self.db.get_dataset_id()
     self.db.register_dataset(dataset_id, task, "image", volume)
     #
     # Check every corner case
     #
     for volume in (Volume(1067, 22, 14, 1024, 2048,
                           117), Volume(44, 2069, 14, 1024, 2048, 117),
                    Volume(44, 22, 117 + 14 - 1, 1024, 2048,
                           117), Volume(0, 22, 14, 45, 2048, 117),
                    Volume(44, 0, 14, 1024, 23,
                           117), Volume(44, 22, 0, 1024, 2048, 15)):
         result = self.db.find_datasets_by_type_and_volume("image", volume)
         self.assertEqual(len(result), 1)
def main():
    rh_logger.logger.start_process("microns-volume", "starting", [])
    #
    # Deal with decanting the arguments
    #
    args = parse_args()
    index_file = args.index_file
    dataset_name = args.dataset_name
    x = args.x
    y = args.y
    z = args.z
    width = args.width
    height = args.height
    depth = args.depth
    output_location = args.output_location
    output_dataset_name = args.output_dataset_name or dataset_name
    chunks = args.chunks
    gzip = args.gzip
    datatype = args.datatype
    #
    # Get the index file
    #
    index = json.load(open(index_file))
    #
    # Open the HDF5 file
    #
    with h5py.File(output_location, "a") as fd:
        #
        # Get the coordinates of the volume
        #
        x0 = x
        x1 = x + width
        y0 = y
        y1 = y + height
        z0 = z
        z1 = z + depth
        ds = None
        #
        # Loop through all datasets
        #
        tf = TargetFactory()
        for volume, location in index[dataset_name]:
            volume = Volume(**volume)
            #
            # Check for dataset completely out of range
            #
            if volume.x > x1 or volume.x1 <= x0:
                continue
            if volume.y > y1 or volume.y1 <= y0:
                continue
            if volume.z > z1 or volume.z1 <= z0:
                continue
            #
            # Get the volume target
            #
            location = DatasetLocation(**location)
            target = tf.get_volume_target(location, volume)
            #
            # Compute the cutout
            #
            vx0 = max(x0, volume.x)
            vx1 = min(x1, volume.x1)
            vy0 = max(y0, volume.y)
            vy1 = min(y1, volume.y1)
            vz0 = max(z0, volume.z)
            vz1 = min(z1, volume.z1)
            #
            # Read the cutout
            #
            cutout = target.imread_part(vx0, vy0, vz0, vx1 - vx0, vy1 - vy0,
                                        vz1 - vz0)
            if ds is None:
                #
                # Create the dataset now that we know the datatype of
                # the input.
                #
                if datatype is None:
                    datatype = cutout.dtype
                else:
                    datatype = getattr(np, datatype)
                kwds = {}
                if chunks is not None:
                    chunks = map(int, chunks.split(","))
                    kwds["chunks"] = chunks
                if gzip:
                    kwds["compression"] = "gzip"
                ds = fd.create_dataset(output_dataset_name,
                                       shape=(depth, height, width),
                                       dtype=datatype,
                                       **kwds)
            #
            # Write the cutout
            #
            ds[vz0 - z0:vz1 - z0, vy0 - y0:vy1 - y0,
               vx0 - x0:vx1 - x0] = cutout
    rh_logger.logger.end_process("exiting", rh_logger.ExitCode.success)
Beispiel #13
0
    def test_02_02_complex_compute_subvolumes(self):
        #
        # When computing segmentation block joins, you chop thin slices
        # from the x, y and z edges and also use the entire segmentation
        #
        src_task = DummyTask(my_parameter="foo")
        dataset_name = "neuroproof"
        self.db.register_dataset_type(dataset_name, Persistence.Permanent,
                                      "uint8", "Raw image data volume")
        self.db.set_target_dir("/tmp/foo")
        self.db.set_temp_dir("/tmp/bar")
        volume = Volume(2048, 2048, 256, 1024, 1024, 128)
        self.db.register_dataset(self.db.get_dataset_id(), src_task,
                                 dataset_name, volume)
        #
        # The destination task that uses the entire volume
        #
        dest_task = DummyTask(my_parameter="destination")
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           dest_task, dataset_name, volume)
        #
        # The six overlaps
        #
        # .-.-.----.-.-.
        # | | |  1 | | |
        # .-.-.----.-.-.
        # | | |    | | |
        # | |2|    |3| |
        # | | |    | | |
        # .-.-.----.-.-.
        # | | |  4 | | |
        # .-.-.----.-.-.
        #
        overlap_x0_task = DummyTask(my_parameter="overlap-x0")
        volume_x0 = Volume(2048 + 20, 2048 + 30, 256 + 30, 10, 1024 - 60,
                           128 - 60)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           overlap_x0_task, dataset_name,
                                           volume_x0)

        overlap_x1_task = DummyTask(my_parameter="overlap-x1")
        volume_x1 = Volume(2048 + 1024 - 30, 2048 + 30, 256 + 30, 10,
                           1024 - 60, 128 - 60)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           overlap_x1_task, dataset_name,
                                           volume_x1)

        overlap_y0_task = DummyTask(my_parameter="overlap-y0")
        volume_y0 = Volume(2048 + 30, 2048 + 20, 256 + 30, 1024 - 60, 10,
                           128 - 60)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           overlap_y0_task, dataset_name,
                                           volume_y0)

        overlap_y1_task = DummyTask(my_parameter="overlap-y1")
        volume_y1 = Volume(2048 + 30, 2048 + 1024 - 30, 256 + 30, 1024 - 60,
                           10, 128 - 60)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           overlap_y1_task, dataset_name,
                                           volume_y1)

        overlap_z0_task = DummyTask(my_parameter="overlap-z0")
        volume_z0 = Volume(2048 + 30, 2048 + 30, 256 + 20, 1024 - 60,
                           1024 - 60, 10)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           overlap_z0_task, dataset_name,
                                           volume_z0)

        overlap_z1_task = DummyTask(my_parameter="overlap-z1")
        volume_z1 = Volume(2048 + 30, 2048 + 30, 256 + 128 - 30, 1024 - 60,
                           1024 - 60, 10)
        self.db.register_dataset_dependent(self.db.get_loading_plan_id(),
                                           overlap_z1_task, dataset_name,
                                           volume_z1)
        #
        # Compute the subvolumes
        #
        self.db.compute_subvolumes()
        #
        # Check the destination task
        #
        result = self.db.get_subvolume_locations(dest_task, dataset_name)
        #
        # There should be 5 x 5 x 5 = 125 (!!!) volumes. The curse of
        # dimensionality
        #
        self.assertEqual(len(result), 125)
        for x0, x1 in ((2048, 2048 + 20), (2048 + 20, 2048 + 30),
                       (2048 + 30, 2048 + 1024 - 30),
                       (2048 + 1024 - 30, 2048 + 1024 - 20), (2048 + 1024 - 20,
                                                              2048 + 1024)):
            for y0, y1 in ((2048, 2048 + 20), (2048 + 20, 2048 + 30),
                           (2048 + 30, 2048 + 1024 - 30), (2048 + 1024 - 30,
                                                           2048 + 1024 - 20),
                           (2048 + 1024 - 20, 2048 + 1024)):
                for z0, z1 in ((256, 256 + 20), (256 + 20, 256 + 30),
                               (256 + 30, 256 + 128 - 30), (256 + 128 - 30,
                                                            256 + 128 - 20),
                               (256 + 128 - 20, 256 + 128)):
                    for location, volume in result:
                        if volume.x == x0 and volume.x1 == x1 and \
                           volume.y == y0 and volume.y1 == y1 and \
                           volume.z == z0 and volume.z1 == z1:
                            expected = "/tmp/foo/%d/%d/%d/%s_" % \
                                (x0, y0, z0, dataset_name)
                            self.assertTrue(location.startswith(expected))
                            break
                    else:
                        self.fail()
        #
        # The other six, they should only have one slice
        #
        for task, expected_volume in ((overlap_x0_task, volume_x0),
                                      (overlap_x1_task, volume_x1),
                                      (overlap_y0_task, volume_y0),
                                      (overlap_y1_task, volume_y1),
                                      (overlap_z0_task, volume_z0),
                                      (overlap_z1_task, volume_z1)):
            result = self.db.get_subvolume_locations(task, dataset_name)
            self.assertEqual(len(result), 1)
            location, volume = result[0]
            self.assertTrue(
                location.startswith("/tmp/foo/%d/%d/%d/%s_" %
                                    (expected_volume.x, expected_volume.y,
                                     expected_volume.z, dataset_name)))
            self.assertEqual(volume.x, expected_volume.x)
            self.assertEqual(volume.x1, expected_volume.x1)
            self.assertEqual(volume.y, expected_volume.y)
            self.assertEqual(volume.y1, expected_volume.y1)
            self.assertEqual(volume.z, expected_volume.z)
            self.assertEqual(volume.z1, expected_volume.z1)