Example #1
0
class TableImplBase(luigi.Task):
    """ table_impl base class
    """

    task_name = "table_impl"
    src_file = os.path.abspath(__file__)

    input_files = luigi.ListParameter()
    output_files = luigi.ListParameter()
    input_key = luigi.Parameter()
    resolution = luigi.ListParameter()
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    def require_output_folders(self):
        output_folders = [
            os.path.split(out_file)[0] for out_file in self.output_files
        ]
        output_folders = list(set(output_folders))
        for out_folder in output_folders:
            os.makedirs(out_folder, exist_ok=True)

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        self.require_output_folders()

        # luigi may randomly shuffles the file lists, so we need to make sure they are ordered here
        input_files = list(self.input_files)
        input_files.sort()
        output_files = list(self.output_files)
        output_files.sort()

        # load and update the task config
        task_config = self.get_task_config()
        task_config.update({
            "input_files": input_files,
            "output_files": output_files,
            "resolution": self.resolution,
            "input_key": self.input_key
        })

        block_list = list(range(len(input_files)))
        self._write_log("scheduled %i blocks to run" % len(block_list))

        # prime and run the jobs
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, task_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #2
0
class SkeletonizeBase(luigi.Task):
    """ Skeletonize base class
    """

    task_name = 'skeletonize'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape, dtype and make block config
        with vu.file_reader(self.input_path, 'r') as f:
            shape = f[self.input_key].shape

        # load the skeletonize config
        task_config = self.get_task_config()

        # require output dataset
        chunks = (25, 256, 256)
        chunks = tuple(min(sh, ch) for sh, ch in zip(shape, chunks))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression='gzip',
                              dtype='uint64')

        # update the config with input and output paths and keys
        # as well as block shape
        task_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key
        })

        # prime and run the jobs
        n_jobs = 1
        self.prepare_jobs(n_jobs, None, task_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #3
0
    def _test_second_stage(self):
        from cluster_tools.thresholded_components.block_components import BlockComponentsLocal
        from cluster_tools.thresholded_components.merge_offsets import MergeOffsetsLocal
        from cluster_tools.utils.task_utils import DummyTask
        task1 = BlockComponentsLocal(tmp_folder=self.tmp_folder,
                                     config_dir=self.config_folder,
                                     max_jobs=8,
                                     input_path=self.input_path,
                                     input_key=self.input_key,
                                     output_path=self.output_path,
                                     output_key=self.output_key,
                                     threshold=.5,
                                     dependency=DummyTask())
        offset_path = './tmp/offsets.json'
        with z5py.File(self.input_path) as f:
            shape = f[self.input_key].shape
        task = MergeOffsetsLocal(tmp_folder=self.tmp_folder,
                                 config_dir=self.config_folder,
                                 max_jobs=8,
                                 shape=shape,
                                 save_path=offset_path,
                                 dependency=task1)
        ret = luigi.build([task], local_scheduler=True)
        self.assertTrue(ret)
        self.assertTrue(os.path.exists(offset_path))

        # checks
        # load offsets from file
        with open(offset_path) as f:
            offsets_dict = json.load(f)
            offsets = offsets_dict['offsets']
            max_offset = int(offsets_dict['n_labels']) - 1

        # load output segmentation
        with z5py.File(self.output_path) as f:
            seg = f[self.output_key][:]

        blocking = nt.blocking([0, 0, 0], list(shape), [10, 256, 256])
        for block_id in range(blocking.numberOfBlocks):
            block = blocking.getBlock(block_id)
            bb = tuple(
                slice(beg, end) for beg, end in zip(block.begin, block.end))
            segb = seg[bb]
            n_labels = len(np.unique(segb))

            # print("Checking block:", block_id)
            # print("n-labels:", n_labels)

            # number of labels from offsets
            if block_id < blocking.numberOfBlocks - 1:
                n_offsets = offsets[block_id + 1] - offsets[block_id]
            else:
                n_offsets = max_offset - offsets[block_id]
            self.assertEqual(n_labels, n_offsets)
class InsertAffinitiesBase(luigi.Task):
    """ InsertAffinities base class
    """

    task_name = 'insert_affinities'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    affinity_path = luigi.Parameter()
    affinity_key = luigi.Parameter()
    objects_path = luigi.Parameter()
    objects_key = luigi.Parameter()
    offsets = luigi.ListParameter(default=[[-1, 0, 0], [0, -1, 0], [0, 0, -1]])
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        config = self.get_task_config()
        config.update({
            'affinity_path': self.affinity_path,
            'affinity_key': self.affinity_key,
            'objects_path': self.objects_path,
            'objects_key': self.objects_key,
            'offsets': self.offsets,
            'block_shape': block_shape
        })

        shape = vu.get_shape(self.affinity_path, self.affinity_key)[1:]
        chunks = vu.file_reader(
            self.affinity_path)[self.affinity_key].chunks[1:]
        assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks))

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
Example #5
0
class VCAssignmentsBase(luigi.Task):
    """ VCAssignments base class
    """

    task_name = 'vc_assignments'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # input volumes and graph
    segmentation_path = luigi.Parameter()
    segmentation_key = luigi.Parameter()
    vc_volume_path = luigi.Parameter()
    vc_volume_key = luigi.Parameter()
    vc_expression_path = luigi.Parameter()
    med_expression_path = luigi.Parameter()
    output_path = luigi.Parameter()
    #
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({
            'segmentation_path': self.segmentation_path,
            'segmentation_key': self.segmentation_key,
            'vc_volume_path': self.vc_volume_path,
            'vc_volume_key': self.vc_volume_key,
            'vc_expression_path': self.vc_expression_path,
            'med_expression_path': self.med_expression_path,
            'output_path': self.output_path
        })

        # prime and run the job
        self.prepare_jobs(1, None, config)
        self.submit_jobs(1)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(1)
Example #6
0
 def test_first_stage(self):
     from cluster_tools.thresholded_components.block_components import BlockComponentsLocal
     from cluster_tools.utils.task_utils import DummyTask
     task = BlockComponentsLocal(tmp_folder=self.tmp_folder,
                                 config_dir=self.config_folder,
                                 max_jobs=8,
                                 input_path=self.input_path,
                                 input_key=self.input_key,
                                 output_path=self.output_path,
                                 output_key=self.output_key,
                                 threshold=.5,
                                 dependency=DummyTask())
     ret = luigi.build([task], local_scheduler=True)
     self.assertTrue(ret)
     self._check_result('greater', check_for_equality=False)
class SkeletonEvaluationBase(luigi.Task):
    """ SkeletonEvaluation base class
    """

    task_name = 'skeleton_evaluation'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    skeleton_path = luigi.Parameter()
    skeleton_key = luigi.Parameter()
    output_path = luigi.Parameter()
    dependency = luigi.TaskParameter(default=DummyTask())

    @staticmethod
    def default_task_config():
        config = LocalTask.default_task_config()
        # TODO do we need task specific stuff ?
        # config.update({})
        return config

    def requires(self):
        return self.dependency

    def run_impl(self):
        # get the global config and init configs
        # TODO support roi
        shebang, block_shape, _, _ = self.global_config_values()
        self.init(shebang)

        # load the skeleton_evaluation config
        # update the config with input and output paths and keys
        task_config = self.get_task_config()
        task_config.update({'input_path': self.input_path, 'input_key': self.input_key,
                            'skeleton_path': self.skeleton_path, 'skeleton_key': self.skeleton_key,
                            'output_path': self.output_path})

        # prime and run the jobs
        n_jobs = 1
        self.prepare_jobs(n_jobs, None, task_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #8
0
def write_new_seg():
    path = '/g/kreshuk/pape/Work/data/group_data/arendt/sponge/data.n5'
    key_seg = 'volumes/paintera/lmc/data/s0'
    key_ass = 'volumes/paintera/lmc/fragment-segment-assignment'
    key_ass_new = 'volumes/paintera/lmc/fragment-segment-assignment-dense'

    inflate_and_save_lut(path, key_seg, key_ass, key_ass_new)

    key_out = 'volumes/segmentation/painera_merged'

    config_folder = 'configs'
    global_config = WriteLocal.default_global_config()
    global_config.update({
        'shebang':
        "#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python",
        'block_shape': [32, 256, 256]
    })

    # add_failed_blocks(global_config)
    os.makedirs('configs', exist_ok=True)
    with open('configs/global.config', 'w') as f:
        json.dump(global_config, f)

    tmp_folder = './tmp'
    max_jobs = 8
    task = WriteLocal(tmp_folder=tmp_folder,
                      config_dir=config_folder,
                      max_jobs=max_jobs,
                      dependency=DummyTask(),
                      input_path=path,
                      input_key=key_seg,
                      output_path=path,
                      output_key=key_out,
                      assignment_path=path,
                      assignment_key=key_ass_new,
                      identifier='sponge')
    ret = luigi.build([task], local_scheduler=True)
    assert ret
class GradientsBase(luigi.Task):
    """ Gradients base class
    """

    task_name = 'gradients'
    src_file = os.path.abspath(__file__)
    allow_retry = True

    path_dict = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    average_gradient = luigi.BoolParameter(default=True)
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    def _validate_paths(self):
        shape = None

        with open(self.path_dict) as f:
            path_dict = json.load(f)

        for path in sorted(path_dict):
            key = path_dict[path]
            assert os.path.exists(path)
            with vu.file_reader(path, 'r') as f:
                assert key in f
                ds = f[key]
                if shape is None:
                    shape = ds.shape
                else:
                    # TODO support multi-channel inputs and then only check that
                    # spatial shapes agree
                    assert ds.shape == shape
        return shape

    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)
        shape = self._validate_paths()

        config = self.get_task_config()
        config.update({
            'path_dict': self.path_dict,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'average_gradient': self.average_gradient
        })

        # TODO need to adapt to multi-channel
        chunks = tuple(min(bs // 2, sh) for bs, sh in zip(block_shape, shape))

        if self.average_gradient:
            out_shape = shape
            out_chunks = chunks
        else:
            n_channels = len(path_dict)
            out_shape = (n_channels, ) + shape
            out_chunks = (1, ) + chunks

        # make output dataset
        compression = config.pop('compression', 'gzip')
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=out_shape,
                              dtype='float32',
                              compression=compression,
                              chunks=out_chunks)

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
class TransformixCoordinateBase(luigi.Task):
    """ TransformixCoordinate base class
    """
    task_name = 'transformix_coordinate'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()

    transformation_file = luigi.Parameter()
    elastix_directory = luigi.Parameter()

    shape = luigi.Parameter()
    resolution = luigi.Parameter(default=None)
    dependency = luigi.TaskParameter(default=DummyTask())

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'chunks': None, 'compression': 'gzip'})
        return config

    def requires(self):
        return self.dependency

    # update the transformation with our interpolation mode
    # and the corresponding dtype
    def update_transformation(self, in_file, out_file):
        def update_line(line, to_write, is_numeric):
            line = line.rstrip('\n')
            line = line.split()
            if is_numeric:
                line = [line[0], "%s)" % to_write]
            else:
                line = [line[0], "\"%s\")" % to_write]
            line = " ".join(line) + "\n"
            return line

        with open(in_file, 'r') as f_in, open(out_file, 'w') as f_out:
            for line in f_in:
                if line.startswith("(Spacing") and self.resolution is not None:
                    resolution_str = " ".join(map(str, self.resolution[::-1]))
                    line = update_line(line, resolution_str, True)

                elif line.startswith("(InitialTransformParametersFileName"):
                    initial_trafo_file = line.split()[-1][1:-2]
                    if initial_trafo_file == 'NoInitialTransform':
                        continue
                    new_initial_trafo_file = os.path.split(
                        initial_trafo_file)[1]
                    new_initial_trafo_file = os.path.join(
                        self.tmp_folder, 'transformations',
                        new_initial_trafo_file)
                    line = update_line(line, new_initial_trafo_file, False)

                f_out.write(line)

    def update_transformations(self):
        trafo_folder, trafo_name = os.path.split(self.transformation_file)
        trafo_files = glob(os.path.join(trafo_folder, '*.txt'))

        out_folder = os.path.join(self.tmp_folder, 'transformations')
        os.makedirs(out_folder, exist_ok=True)

        for trafo in trafo_files:
            name = os.path.split(trafo)[1]
            out = os.path.join(out_folder, name)
            self.update_transformation(trafo, out)

        new_trafo = os.path.join(out_folder, trafo_name)
        assert os.path.exists(new_trafo)
        return new_trafo

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)
        config = self.get_task_config()

        with open_file(self.input_path, 'r') as f:
            dtype = f[self.input_key].dtype
        chunks = config['chunks']
        if chunks is None:
            chunks = block_shape
        compression = config['compression']

        with open_file(self.output_path, 'a') as f:
            f.require_dataset(self.output_key,
                              shape=self.shape,
                              chunks=tuple(chunks),
                              compression=compression,
                              dtype=dtype)

        trafo_file = self.update_transformations()
        # we don't need any additional config besides the paths
        config.update({
            "input_path": self.input_path,
            "input_key": self.input_key,
            "output_path": self.output_path,
            "output_key": self.output_key,
            "transformation_file": trafo_file,
            "elastix_directory": self.elastix_directory,
            "tmp_folder": self.tmp_folder,
            "block_shape": block_shape
        })

        block_list = vu.blocks_in_volume(self.shape, block_shape, roi_begin,
                                         roi_end)
        self._write_log("scheduled %i blocks to run" % len(block_list))

        # prime and run the jobs
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #11
0
class SkeletonizeBase(luigi.Task):
    """ Skeletonize base class
    """

    task_name = 'skeletonize'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    number_of_labels = luigi.IntParameter()
    skeleton_format = luigi.Parameter(default='n5')
    dependency = luigi.TaskParameter(default=DummyTask())

    formats = ('volume', 'swc', 'n5')  # TODO support csv

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({
            'resolution': None,
            'size_filter': 10,
            'chunk_len': 1000
        })
        return config

    def requires(self):
        return self.dependency

    def _prepare_format_volume(self, block_shape):
        assert self.max_jobs == 1, "Output-format 'volume' only supported with a single job"

        # get shape, dtype and make block config
        with vu.file_reader(self.input_path, 'r') as f:
            shape = f[self.input_key].shape

        # prepare output dataset
        chunks = tuple(min(bs // 2, sh) for bs, sh in zip(block_shape, shape))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression='gzip',
                              dtype='uint64')
        # return n-jobs (=1) and block_list (=None)
        return 1, None

    def _prepare_format_swc(self, config):
        # make the output directory
        os.makedirs(os.path.join(self.output_path, self.output_key),
                    exist_ok=True)
        # make the blocking
        block_len = min(self.number_of_labels, config.get('chunk_len', 1000))
        block_list = vu.blocks_in_volume((self.number_of_labels, ),
                                         (block_len, ))
        n_jobs = min(len(block_list), self.max_jobs)
        # update the config
        config.update({
            'number_of_labels': self.number_of_labels,
            'block_len': block_len
        })
        return n_jobs, block_list

    def _prepare_format_n5(self, config):
        # make the blocking
        block_len = min(self.number_of_labels, config.get('chunk_len', 1000))
        block_list = vu.blocks_in_volume((self.number_of_labels, ),
                                         (block_len, ))
        n_jobs = min(len(block_list), self.max_jobs)
        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=(self.number_of_labels, ),
                              chunks=(1, ),
                              compression='gzip',
                              dtype='uint64')
        # update the config
        config.update({
            'number_of_labels': self.number_of_labels,
            'block_len': block_len
        })
        return n_jobs, block_list

    def run_impl(self):
        assert self.skeleton_format in self.formats, self.skeleton_format
        # TODO support roi
        # get the global config and init configs
        shebang, block_shape, _, _ = self.global_config_values()
        self.init(shebang)

        # load the skeletonize config
        # update the config with input and output paths and keys
        task_config = self.get_task_config()
        task_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'skeleton_format': self.skeleton_format
        })

        if self.skeleton_format == 'volume':
            n_jobs, block_list = self._prepare_format_volume(block_shape)
        elif self.skeleton_format == 'swc':
            n_jobs, block_list = self._prepare_format_swc(task_config)
        elif self.skeleton_format == 'n5':
            n_jobs, block_list = self._prepare_format_n5(task_config)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, task_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #12
0
class ScaleToBoundariesBase(luigi.Task):
    """ scale_to_boundaries base class
    """

    task_name = 'scale_to_boundaries'
    src_file = os.path.abspath(__file__)

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    boundaries_path = luigi.Parameter()
    boundaries_key = luigi.Parameter()
    offset = luigi.IntParameter(default=0)
    dependency = luigi.TaskParameter(default=DummyTask())

    allow_retry = False

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        # Parameters:
        # erode_by - erosion of seeds.
        # channel -  channel that will be used for multiscale inputs
        # dtype - output dtype
        # chunks - output chunks
        config.update({'erode_by': 12, 'channel': 0, 'dtype': 'uint64', 'chunks': None,
                       'erode_3d': True})
        return config

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape, dtype and make block config
        shape = vu.get_shape(self.boundaries_path, self.boundaries_key)
        if len(shape) == 4:
            shape = shape[1:]
        assert len(shape) == 3

        # require output dataset
        config = self.get_task_config()
        dtype = config.pop('dtype')
        chunks = config.pop('chunks')
        if chunks is None:
            chunks = tuple(bs // 2 for bs in block_shape)
        assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks)),\
            "%s, %s" % (str(block_shape), str(chunks))
        self._write_log("requiring output dataset @ %s:%s" % (self.output_path, self.output_key))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=shape, chunks=tuple(chunks),
                              compression='gzip', dtype=dtype)

        # update the config with input and output paths and keys
        # as well as block shape
        config.update({'input_path': self.input_path, 'input_key': self.input_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'boundaries_path': self.boundaries_path, 'boundaries_key': self.boundaries_key,
                       'offset': self.offset, 'block_shape': block_shape})

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
        self._write_log("scheduled %i blocks to run" % len(block_list))

        prefix = 'offset%i' % self.offset
        # prime and run the jobs
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, config, prefix)
        self.submit_jobs(n_jobs, prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(prefix)
        self.check_jobs(n_jobs, prefix)

    def output(self):
        prefix = 'offset%i' % self.offset
        return luigi.LocalTarget(os.path.join(self.tmp_folder,
                                              self.task_name + '_%s.log' % prefix))
Example #13
0
class WriteBase(luigi.Task):
    """
    Write node assignments for all blocks
    """
    task_name = 'write'
    src_file = os.path.abspath(__file__)

    # path and key to input and output datasets
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    # path to the node assignments
    # the key is optional, because the assignment can either be a
    # dense assignment table stored as n5 dataset
    # or a sparse table stored as pickled python map
    assignment_path = luigi.Parameter()
    assignment_key = luigi.Parameter(default=None)
    # the task we depend on
    dependency = luigi.TaskParameter(default=DummyTask())
    # we may have different write tasks,
    # so we need an identifier to keep them apart
    identifier = luigi.Parameter()
    offset_path = luigi.Parameter(default='')

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'chunks': None, 'allow_empty_assignments': False})
        return config

    def clean_up_for_retry(self, block_list, prefix):
        super().clean_up_for_retry(block_list, prefix)
        # TODO remove any output of failed blocks because it might be corrupted

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end, block_list_path\
            = self.global_config_values(with_block_list_path=True)
        self.init(shebang)

        # get shape and chunks
        with vu.file_reader(self.input_path) as f:
            ds = f[self.input_key]
            shape = ds.shape

        config = self.get_task_config()
        chunks = config.pop('chunks', None)
        if chunks is None:
            chunks = tuple(
                min(bs // 2, sh) for bs, sh in zip(block_shape, shape))

        # require output dataset
        with vu.file_reader(self.output_path) as f:
            if self.output_key in f:
                chunks = f[self.output_key].chunks
            assert all(bs % ch == 0
                       for bs, ch in zip(block_shape, chunks)), "%s, %s" % (
                           str(block_shape), str(chunks))
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression='gzip',
                              dtype='uint64')

        # check if input and output datasets are identical
        in_place = (self.input_path
                    == self.output_path) and (self.input_key
                                              == self.output_key)

        if self.assignment_key is None:
            assert os.path.splitext(self.assignment_path)[-1] == '.pkl',\
                "Assignments need to be pickled map if no key is given"

        # update the config with input and output paths and keys
        # as well as block shape
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'block_shape': block_shape,
            'assignment_path': self.assignment_path,
            'assignment_key': self.assignment_key
        })
        if self.offset_path != '':
            config.update({'offset_path': self.offset_path})
        # we only add output path and key if we do not write in place
        if not in_place:
            config.update({
                'output_path': self.output_path,
                'output_key': self.output_key
            })

        # get block list and jobs
        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape,
                                             block_shape,
                                             roi_begin,
                                             roi_end,
                                             block_list_path=block_list_path)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list, self.identifier)
        self._write_log('scheduling %i blocks to be processed' %
                        len(block_list))

        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config, self.identifier)
        self.submit_jobs(n_jobs, self.identifier)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(self.identifier)
        self.check_jobs(n_jobs, self.identifier)

    def output(self):
        return luigi.LocalTarget(
            os.path.join(self.tmp_folder,
                         '%s_%s.log' % (self.task_name, self.identifier)))
Example #14
0
class ComputeMeshesBase(luigi.Task):
    """ ComputeMeshes base class
    """

    task_name = 'compute_meshes'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    morphology_path = luigi.Parameter()
    morphology_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    number_of_labels = luigi.IntParameter()
    resolution = luigi.ListParameter()
    output_format = luigi.Parameter()
    size_threshold = luigi.IntParameter(default=None)
    dependency = luigi.TaskParameter(default=DummyTask())

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'chunk_len': 1000, 'smoothing_iterations': 0})
        return config

    def requires(self):
        return self.dependency

    def _prepare_output(self, config):
        # make the blocking
        block_len = min(self.number_of_labels, config.get('chunk_len', 1000))
        block_list = vu.blocks_in_volume((self.number_of_labels, ),
                                         (block_len, ))
        n_jobs = min(len(block_list), self.max_jobs)
        os.makedirs(self.output_path, exist_ok=True)
        # update the config
        config.update({
            'number_of_labels': self.number_of_labels,
            'block_len': block_len
        })
        return config, n_jobs, block_list

    def run_impl(self):

        # TODO support roi
        # get the global config and init configs
        shebang, block_shape, _, _ = self.global_config_values()
        self.init(shebang)

        # load the compute_meshes config
        # update the config with input and output paths and keys
        config = self.get_task_config()
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'morphology_path': self.morphology_path,
            'morphology_key': self.morphology_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'resolution': self.resolution,
            'size_threshold': self.size_threshold
        })
        config, n_jobs, block_list = self._prepare_output(config)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #15
0
class CopyVolumeBase(luigi.Task):
    """ copy_volume base class
    """

    task_name = 'copy_volume'
    src_file = os.path.abspath(__file__)

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    prefix = luigi.Parameter()
    dtype = luigi.Parameter(default=None)
    fit_to_roi = luigi.BoolParameter(default=False)
    effective_scale_factor = luigi.ListParameter(default=[])
    dependency = luigi.TaskParameter(default=DummyTask())

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({
            'chunks': None,
            'compression': 'gzip',
            'reduce_channels': None
        })
        return config

    def requires(self):
        return self.dependency

    def clean_up_for_retry(self, block_list):
        super().clean_up_for_retry(block_list)
        # TODO remove any output of failed blocks because it might be corrupted

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape, dtype and make block config
        with vu.file_reader(self.input_path, 'r') as f:
            shape = f[self.input_key].shape
            ds_dtype = f[self.input_key].dtype
            ds_chunks = f[self.input_key].chunks

        # load the config
        task_config = self.get_task_config()

        # if we have a roi, we need to:
        # - scale the roi to the effective scale, if effective scale is given
        # - shrink the shape to the roi, if fit_to_roi is True
        if roi_begin is not None:
            assert len(shape) == 3, "Don't support roi for 4d yet"
            assert roi_end is not None
            if self.effective_scale_factor:
                roi_begin = [
                    int(rb // sf)
                    for rb, sf in zip(roi_begin, self.effective_scale_factor)
                ]
                roi_end = [
                    int(re // sf)
                    for re, sf in zip(roi_end, self.effective_scale_factor)
                ]

            if self.fit_to_roi:
                out_shape = tuple(roie - roib
                                  for roib, roie in zip(roi_begin, roi_end))
                # if we fit to roi, the task config needs to be updated with the roi begin,
                # because the output bounding boxes need to be offseted by roi_begin
                task_config.update({'roi_begin': roi_begin})
            else:
                out_shape = shape
        else:
            out_shape = shape

        if task_config.get('reduce_channels',
                           None) is not None and len(out_shape) == 4:
            out_shape = out_shape[1:]

        compression = task_config.pop('compression', 'gzip')
        dtype = str(ds_dtype) if self.dtype is None else self.dtype

        chunks = task_config.pop('chunks', None)
        if chunks is None:
            chunks = ds_chunks
        chunks = tuple(min(chnk, osh) for chnk, osh in zip(chunks, out_shape))
        if len(chunks) == 3:
            assert all(bs % ch == 0
                       for bs, ch in zip(block_shape, chunks)), "%s, %s" % (
                           str(block_shape), str(chunks))
        else:
            assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks[1:])
                       ), "%s, %s" % (str(block_shape), str(chunks))

        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=out_shape,
                              chunks=chunks,
                              compression=compression,
                              dtype=dtype)

        # update the config with input and output paths and keys
        # as well as block shape
        task_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'dtype': dtype
        })

        if len(shape) == 4:
            shape = shape[1:]

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)
        self._write_log("scheduled %i blocks to run" % len(block_list))

        # prime and run the jobs
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, task_config, self.prefix)
        self.submit_jobs(n_jobs, self.prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(self.prefix)
        self.check_jobs(n_jobs, self.prefix)

    def output(self):
        return luigi.LocalTarget(
            os.path.join(self.tmp_folder,
                         self.task_name + '_%s.log' % self.prefix))
Example #16
0
class ApplyRegistrationBase(luigi.Task):
    """ ApplyRegistration base class
    """
    default_fiji = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/software/Fiji.app/ImageJ-linux64'
    default_elastix = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/software/elastix_v4.8'
    formats = ('bdv', 'tif')

    # what about cubic etc?
    interpolation_modes = {
        'linear': 'FinalLinearInterpolator',
        'nearest': 'FinalNearestNeighborInterpolator'
    }
    result_types = ('unsigned char', 'unsigned short')

    task_name = 'apply_registration'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    input_path_file = luigi.Parameter()
    output_path_file = luigi.Parameter()
    transformation_file = luigi.Parameter()
    interpolation = luigi.Parameter(default='nearest')
    output_format = luigi.Parameter(default='bdv')
    fiji_executable = luigi.Parameter(default=default_fiji)
    elastix_directory = luigi.Parameter(default=default_elastix)
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        config = LocalTask.default_task_config()
        config.update({'ResultImagePixelType': None})
        return config

    # update the transformation with our interpolation mode
    # and the corresponding dtype
    def update_transformation(self, in_file, out_file, res_type):

        interpolator_name = self.interpolation_modes[self.interpolation]

        def update_line(line, to_write):
            line = line.rstrip('\n')
            line = line.split()
            line = [line[0], "\"%s\")" % to_write]
            line = " ".join(line) + "\n"
            return line

        with open(in_file, 'r') as f_in, open(out_file, 'w') as f_out:
            for line in f_in:
                # change the interpolator
                if line.startswith("(ResampleInterpolator"):
                    line = update_line(line, interpolator_name)
                # change the pixel result type
                elif line.startswith(
                        "(ResultImagePixelType") and res_type is not None:
                    line = update_line(line, res_type)
                f_out.write(line)

    def update_transformations(self, res_type):
        trafo_folder, trafo_name = os.path.split(self.transformation_file)
        assert trafo_name.startswith('TransformParameters')
        trafo_files = glob(os.path.join(trafo_folder, 'TransformParameters*'))

        out_folder = os.path.join(self.tmp_folder, 'transformations')
        os.makedirs(out_folder, exist_ok=True)

        for trafo in trafo_files:
            name = os.path.split(trafo)[1]
            out = os.path.join(out_folder, name)
            self.update_transformation(trafo, out, res_type)

        new_trafo = os.path.join(out_folder, trafo_name)
        assert os.path.exists(new_trafo)
        return new_trafo

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        with open(self.input_path_file) as f:
            inputs = json.load(f)
        with open(self.output_path_file) as f:
            outputs = json.load(f)

        assert len(inputs) == len(outputs), "%i, %i" % (len(inputs),
                                                        len(outputs))
        assert all(os.path.exists(inp) for inp in inputs)
        n_files = len(inputs)

        assert os.path.exists(self.transformation_file)
        assert os.path.exists(self.fiji_executable)
        assert os.path.exists(self.elastix_directory)
        assert self.output_format in self.formats
        assert self.interpolation in self.interpolation_modes

        config = self.get_task_config()
        res_type = config.pop('ResultImagePixelType', None)
        if res_type is not None:
            assert res_type in self.result_types
        trafo_file = self.update_transformations(res_type)

        # get the split of file-ids to the volume
        file_list = vu.blocks_in_volume((n_files, ), (1, ))

        # we don't need any additional config besides the paths
        config.update({
            "input_path_file": self.input_path_file,
            "output_path_file": self.output_path_file,
            "transformation_file": trafo_file,
            "fiji_executable": self.fiji_executable,
            "elastix_directory": self.elastix_directory,
            "tmp_folder": self.tmp_folder,
            "output_format": self.output_format
        })

        # prime and run the jobs
        n_jobs = min(self.max_jobs, n_files)
        self.prepare_jobs(n_jobs, file_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #17
0
class DownscalingBase(luigi.Task):
    """ downscaling base class
    """

    task_name = 'downscaling'
    src_file = os.path.abspath(__file__)

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    # the scale used to downsample the data.
    # can be list to account for anisotropic downsacling
    scale_factor = luigi.Parameter()
    # scale prefix for unique task identifier
    scale_prefix = luigi.Parameter()
    halo = luigi.ListParameter(default=[])
    effective_scale_factor = luigi.ListParameter(default=[])
    dependency = luigi.TaskParameter(default=DummyTask())

    interpolatable_types = ('float32', 'float64', 'uint8', 'uint16')

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({
            'library': 'vigra',
            'chunks': None,
            'compression': 'gzip',
            'library_kwargs': None
        })
        return config

    def clean_up_for_retry(self, block_list):
        super().clean_up_for_retry(block_list)
        # TODO remove any output of failed blocks because it might be corrupted

    def downsample_shape(self, shape):
        if isinstance(self.scale_factor, (list, tuple)):
            new_shape = tuple(sh // sf if sh % sf == 0 else sh // sf +
                              (sf - sh % sf)
                              for sh, sf in zip(shape, self.scale_factor))
        else:
            sf = self.scale_factor
            new_shape = tuple(sh // sf if sh % sf == 0 else sh // sf +
                              (sf - sh % sf) for sh in shape)

        return new_shape

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end, block_list_path = self.global_config_values(
            with_block_list_path=True)
        self.init(shebang)

        # get shape, dtype and make block config
        with vu.file_reader(self.input_path, 'r') as f:
            prev_shape = f[self.input_key].shape
            dtype = f[self.input_key].dtype
        assert len(prev_shape) == 3, "Only support 3d inputs"

        shape = self.downsample_shape(prev_shape)
        self._write_log('downscaling with factor %s from shape %s to %s' %
                        (str(self.scale_factor), str(prev_shape), str(shape)))

        # load the downscaling config
        task_config = self.get_task_config()

        # make sure that we have order 0 downscaling if our datatype is not interpolatable
        library = task_config.get('library', 'vigra')
        assert library == 'vigra', "Downscaling is only supported with vigra"
        if dtype not in self.interpolatable_types:
            opts = task_config.get('library_kwargs', {})
            opts = {} if opts is None else opts
            order = opts.get('order', None)
            assert order == 0,\
                "datatype %s is not interpolatable, set 'library_kwargs' = {'order': 0} to downscale it" % dtype

        # get the scale factor and check if we
        # do isotropic scaling
        scale_factor = self.scale_factor
        if isinstance(scale_factor, int):
            pass
        elif all(sf == scale_factor[0] for sf in scale_factor):
            assert len(scale_factor) == 3
            scale_factor = scale_factor[0]
        else:
            assert len(scale_factor) == 3
            # for now, we only support downscaling in-plane inf the scale-factor
            # is anisotropic
            assert scale_factor[0] == 1
            assert scale_factor[1] == scale_factor[2]

        # read the output chunks
        chunks = task_config.pop('chunks', None)
        if chunks is None:
            chunks = tuple(bs // 2 for bs in block_shape)
        else:
            chunks = tuple(chunks)
            # TODO verify chunks further
            assert len(chunks) == 3, "Chunks must be 3d"
        chunks = tuple(min(ch, sh) for sh, ch in zip(shape, chunks))

        compression = task_config.pop('compression', 'gzip')
        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression=compression,
                              dtype=dtype)

        # update the config with input and output paths and keys
        # as well as block shape
        task_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'scale_factor': scale_factor,
            'halo': self.halo if self.halo else None
        })

        # if we have a roi, we need to re-sample it
        if roi_begin is not None:
            assert roi_end is not None
            effective_scale = self.effective_scale_factor if\
                self.effective_scale_factor else scale_factor
            self._write_log("downscaling roi with effective scale %s" %
                            str(effective_scale))
            self._write_log("ROI before scaling: %s to %s" %
                            (str(roi_begin), str(roi_end)))
            if isinstance(effective_scale, int):
                roi_begin = [rb // effective_scale for rb in roi_begin]
                roi_end = [
                    re // effective_scale if re is not None else sh
                    for re, sh in zip(roi_end, shape)
                ]
            else:
                roi_begin = [
                    rb // sf for rb, sf in zip(roi_begin, effective_scale)
                ]
                roi_end = [
                    re // sf if re is not None else sh
                    for re, sf, sh in zip(roi_end, effective_scale, shape)
                ]
            self._write_log("ROI after scaling: %s to %s" %
                            (str(roi_begin), str(roi_end)))

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end, block_list_path)
            self._write_log("scheduled %i blocks to run" % len(block_list))
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        # prime and run the jobs
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, task_config, self.scale_prefix)
        self.submit_jobs(n_jobs, self.scale_prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(self.scale_prefix)
        self.check_jobs(n_jobs, self.scale_prefix)

    def output(self):
        return luigi.LocalTarget(
            os.path.join(self.tmp_folder,
                         self.task_name + '_%s.log' % self.scale_prefix))
Example #18
0
class InferenceBase(luigi.Task):
    """ Inference base class
    """

    task_name = 'inference'
    src_file = os.path.abspath(__file__)

    # input volumes and graph
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.DictParameter()
    checkpoint_path = luigi.Parameter()
    halo = luigi.ListParameter()
    mask_path = luigi.Parameter(default='')
    mask_key = luigi.Parameter(default='')
    framework = luigi.Parameter(default='pytorch')
    #
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({
            'dtype': 'uint8',
            'compression': 'gzip',
            'chunks': None,
            'gpu_type': '2080Ti',
            'device_mapping': None,
            'use_best': True,
            'tda_config': {},
            'prep_model': None
        })
        return config

    def clean_up_for_retry(self, block_list):
        super().clean_up_for_retry(block_list)
        # TODO remove any output of failed blocks because it might be corrupted

    def run_impl(self):
        # TODO support more frameworks
        # assert self.framework in ('pytorch', 'tensorflow', 'caffe', 'inferno')
        assert self.framework in ('pytorch', 'inferno')

        # get the global config and init configs
        self.make_dirs()
        shebang, block_shape, roi_begin, roi_end, block_list_path = self.global_config_values(
            with_block_list_path=True)
        self.init(shebang)

        # load the task config
        config = self.get_task_config()
        dtype = config.pop('dtype', 'uint8')
        compression = config.pop('compression', 'gzip')
        chunks = config.pop('chunks', None)
        assert dtype in ('uint8', 'float32')

        # get shapes and chunks
        shape = vu.get_shape(self.input_path, self.input_key)
        chunks = tuple(chunks) if chunks is not None else tuple(
            bs // 2 for bs in block_shape)
        # make sure block shape can be divided by chunks
        assert all(bs % ch == 0 for ch, bs in zip(chunks, block_shape)),\
            "%s, %s" % (str(chunks), block_shape)

        # check if we have single dataset or multi dataset output
        out_key_dict = self.output_key
        output_keys = list(out_key_dict.keys())
        channel_mapping = list(out_key_dict.values())

        # make output volumes
        with vu.file_reader(self.output_path) as f:
            for out_key, out_channels in zip(output_keys, channel_mapping):
                assert len(out_channels) == 2
                n_channels = out_channels[1] - out_channels[0]
                assert n_channels > 0
                if n_channels > 1:
                    out_shape = (n_channels, ) + shape
                    out_chunks = (1, ) + chunks
                else:
                    out_shape = shape
                    out_chunks = chunks

                f.require_dataset(out_key,
                                  shape=out_shape,
                                  chunks=out_chunks,
                                  dtype=dtype,
                                  compression=compression)

        # update the config
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'checkpoint_path': self.checkpoint_path,
            'block_shape': block_shape,
            'halo': self.halo,
            'output_keys': output_keys,
            'channel_mapping': channel_mapping,
            'framework': self.framework
        })
        if self.mask_path != '':
            assert self.mask_key != ''
            config.update({
                'mask_path': self.mask_path,
                'mask_key': self.mask_key
            })

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape,
                                             block_shape,
                                             roi_begin,
                                             roi_end,
                                             block_list_path=block_list_path)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #19
0
class SkeletonizeBase(luigi.Task):
    """ Skeletonize base class
    """

    task_name = 'skeletonize'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    morphology_path = luigi.Parameter()
    morphology_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    number_of_labels = luigi.IntParameter()
    resolution = luigi.ListParameter()
    size_threshold = luigi.IntParameter(default=None)
    method = luigi.Parameter(default='thinning')
    dependency = luigi.TaskParameter(default=DummyTask())

    methods = get_method_names()

    # expose skeletonization parameter if we support more parameter
    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'chunk_len': 1000, 'method_kwargs': {}})
        return config

    def requires(self):
        return self.dependency

    def _prepare_output(self, config):
        # make the blocking
        block_len = min(self.number_of_labels, config.get('chunk_len', 1000))
        block_list = vu.blocks_in_volume((self.number_of_labels,),
                                         (block_len,))
        n_jobs = min(len(block_list), self.max_jobs)
        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=(self.number_of_labels,),
                              chunks=(1,), compression='gzip', dtype='uint64')
        # update the config
        config.update({'number_of_labels': self.number_of_labels,
                       'block_len': block_len})
        return config, n_jobs, block_list

    def run_impl(self):
        assert self.method in self.methods,\
            "Method %s is not supported, must be one of %s" % (self.method, str(self.methods))

        # TODO support roi
        # get the global config and init configs
        shebang, block_shape, _, _ = self.global_config_values()
        self.init(shebang)

        # load the skeletonize config
        # update the config with input and output paths and keys
        config = self.get_task_config()
        config.update({'input_path': self.input_path, 'input_key': self.input_key,
                       'morphology_path': self.morphology_path,
                       'morphology_key': self.morphology_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'resolution': self.resolution, 'size_threshold': self.size_threshold,
                       'method': self.method})
        config, n_jobs, block_list = self._prepare_output(config)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
class SparseLiftedNeighborhoodBase(luigi.Task):
    """ SparseLiftedNeighborhood base class
    """

    task_name = 'sparse_lifted_neighborhood'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    graph_path = luigi.Parameter()
    graph_key = luigi.Parameter()
    node_label_path = luigi.Parameter()
    node_label_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    prefix = luigi.Parameter()
    nh_graph_depth = luigi.IntParameter()
    node_ignore_label = luigi.IntParameter(default=0)
    # different modes for adding lifted edges:
    # "all": add lifted edges between all nodes that have a label
    # "same": add lifted edges only between nodes with the same label
    # "different": add lifted edges only between nodes with different labels
    mode = luigi.Parameter(default='all')
    dependency = luigi.TaskParameter(default=DummyTask())

    modes = ('all', 'same', 'different')

    def requires(self):
        return self.dependency

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        assert self.mode in self.modes, "Invalid mode %s" % self.mode

        # load the task config
        config = self.get_task_config()

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({
            'graph_path': self.graph_path,
            'graph_key': self.graph_key,
            'node_label_path': self.node_label_path,
            'node_label_key': self.node_label_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'nh_graph_depth': self.nh_graph_depth,
            'node_ignore_label': self.node_ignore_label,
            'mode': self.mode
        })

        # prime and run the jobs
        self.prepare_jobs(1, None, config, self.prefix)
        self.submit_jobs(1, self.prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(self.prefix)
        self.check_jobs(1, self.prefix)

    # part of the luigi API
    def output(self):
        return luigi.LocalTarget(
            os.path.join(self.tmp_folder,
                         self.task_name + '_%s.log' % self.prefix))
Example #21
0
class EmbeddingDistancesBase(luigi.Task):
    """ EmbeddingDistances base class
    """

    task_name = 'embedding_distances'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    path_dict = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    offsets = luigi.ListParameter(default=[[-1, 0, 0], [0, -1, 0], [0, 0, -1]])
    norm = luigi.Parameter(default='l2')
    threshold = luigi.FloatParameter(default=None)
    threshold_mode = luigi.Parameter(default='greater')
    dependency = luigi.TaskParameter(default=DummyTask())

    threshold_modes = ('greater', 'less', 'equal')
    norms = ('l2', 'cosine')

    def requires(self):
        return self.dependency

    def _validate_paths(self):
        shape = None

        with open(self.path_dict) as f:
            path_dict = json.load(f)

        for path in sorted(path_dict):
            key = path_dict[path]
            assert os.path.exists(path)
            with vu.file_reader(path, 'r') as f:
                assert key in f
                ds = f[key]
                if shape is None:
                    shape = ds.shape
                else:
                    # TODO support multi-channel inputs and then only check that
                    # spatial shapes agree
                    assert ds.shape == shape
        return shape

    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        assert self.norm in self.norms
        if self.threshold is not None:
            assert self.threshold_mode in self.threshold_modes

        shape = self._validate_paths()
        config = self.get_task_config()
        config.update({
            'path_dict': self.path_dict,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'offsets': self.offsets,
            'block_shape': block_shape,
            'norm': self.norm,
            'threshold': self.threshold,
            'threshold_mode': self.threshold_mode
        })

        n_channels = len(self.offsets)
        chunks = tuple(min(bs // 2, sh) for bs, sh in zip(block_shape, shape))

        out_shape = (n_channels, ) + shape
        out_chunks = (1, ) + chunks

        # make output dataset
        compression = config.pop('compression', 'gzip')
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=out_shape,
                              dtype='float32',
                              compression=compression,
                              chunks=out_chunks)

        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
Example #22
0
class InsertAffinitiesBase(luigi.Task):
    """ InsertAffinities base class
    """

    task_name = 'insert_affinities'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    objects_path = luigi.Parameter()
    objects_key = luigi.Parameter()
    offsets = luigi.ListParameter(default=[[-1, 0, 0],
                                           [0, -1, 0],
                                           [0, 0, -1]])
    dependency = luigi.TaskParameter(default=DummyTask())

    @staticmethod
    def default_task_config():
        config = LocalTask.default_task_config()
        config.update({'erode_by': 6, 'zero_objects_list': None,
                       'chunks': None, 'dilate_by': 2, 'erode_3d': True})
        return config

    def requires(self):
        return self.dependency

    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        config = self.get_task_config()
        config.update({'input_path': self.input_path,
                       'input_key': self.input_key,
                       'output_path': self.output_path,
                       'output_key': self.output_key,
                       'objects_path': self.objects_path,
                       'objects_key': self.objects_key,
                       'offsets': self.offsets,
                       'block_shape': block_shape})

        shape = vu.get_shape(self.input_path, self.input_key)
        dtype = vu.file_reader(self.input_path, 'r')[self.input_key].dtype

        chunks = config['chunks']
        if chunks is None:
            chunks = vu.file_reader(self.input_path, 'r')[self.input_key].chunks
        assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks[1:]))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=tuple(shape), chunks=tuple(chunks),
                              dtype=dtype, compression='gzip')

        shape = shape[1:]
        block_list = vu.blocks_in_volume(shape, block_shape,
                                         roi_begin, roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
class MultiscaleInferenceBase(luigi.Task):
    """ MultiscaleInference base class
    """

    task_name = 'multiscale_inference'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # input volume, output volume and inference parameter
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    input_scales = luigi.ListParameter()
    scale_factors = luigi.ListParameter()

    output_path = luigi.Parameter()
    output_key = luigi.DictParameter()
    checkpoint_path = luigi.Parameter()
    halos = luigi.ListParameter()
    multiscale_output = luigi.BoolParameter(default=False)
    mask_path = luigi.Parameter(default='')
    mask_key = luigi.Parameter(default='')
    framework = luigi.Parameter(default='pytorch')
    #
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'dtype': 'uint8', 'compression': 'gzip', 'chunks': None,
                       'gpu_type': '2080Ti', 'device_mapping': None,
                       'use_best': True, 'prep_model': None, 'channel_accumulation': None})
        return config

    def run_impl(self):
        assert self.framework in ('pytorch', 'inferno')

        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()
        dtype = config.pop('dtype', 'uint8')
        compression = config.pop('compression', 'gzip')
        chunks = config.pop('chunks', None)
        assert dtype in ('uint8', 'float32')

        # check th input datasets
        shapes = []
        with vu.file_reader(self.input_path, 'r') as f:
            g = f[self.input_key]
            for ii, scale in enumerate(self.input_scales):
                assert scale in g
                shapes.append(g[scale].shape)
        shape = shapes[0]
        n_scales = len(self.input_scales)
        assert len(self.scale_factors) == n_scales - 1
        assert len(self.halos) == n_scales, "%i, %i" % (len(self.halos), n_scales)

        # get shapes and chunks
        chunks = tuple(chunks) if chunks is not None else tuple(bs // 2 for bs in block_shape)
        # make sure block shape can be divided by chunks
        assert all(bs % ch == 0 for ch, bs in zip(chunks, block_shape)),\
            "%s, %s" % (str(chunks), block_shape)

        # check if we have single dataset or multi dataset output
        out_key_dict = self.output_key
        output_keys = list(out_key_dict.keys())
        output_params = list(out_key_dict.values())

        channel_accumulation = config.get('channel_accumulation', None)

        # TODO support different channel mapping for different scales
        # make output volumes
        with vu.file_reader(self.output_path) as f:
            for out_key, out_channels in zip(output_keys, output_params):
                assert len(out_channels) == 2
                n_channels = out_channels[1] - out_channels[0]
                assert n_channels > 0

                if self.multiscale_output:
                    for scale, this_shape in zip(self.input_scales, shapes):
                        if n_channels > 1 and channel_accumulation is None:
                            out_shape = (n_channels,) + this_shape
                            out_chunks = (1,) + chunks
                        else:
                            out_shape = this_shape
                            out_chunks = chunks

                        this_key = os.path.join(out_key, scale)
                        f.require_dataset(this_key, shape=out_shape,
                                          chunks=out_chunks, dtype=dtype,
                                          compression=compression)
                else:
                    if n_channels > 1 and channel_accumulation is None:
                        out_shape = (n_channels,) + shape
                        out_chunks = (1,) + chunks
                    else:
                        out_shape = shape
                        out_chunks = chunks

                    f.require_dataset(out_key, shape=out_shape,
                                      chunks=out_chunks, dtype=dtype, compression=compression)

        # update the config
        config.update({'input_path': self.input_path, 'input_key': self.input_key,
                       'input_scales': self.input_scales, 'scale_factors': self.scale_factors,
                       'output_path': self.output_path, 'checkpoint_path': self.checkpoint_path,
                       'block_shape': block_shape, 'halos': self.halos,
                       'output_keys': output_keys, 'channel_mapping': output_params,
                       'framework': self.framework, 'multiscale_output': self.multiscale_output})
        if self.mask_path != '':
            assert self.mask_key != ''
            config.update({'mask_path': self.mask_path, 'mask_key': self.mask_key})

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #24
0
class AffineBase(luigi.Task):
    """ affine base class
    """

    task_name = 'affine'
    src_file = os.path.abspath(__file__)

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()

    # transformation parameters
    transformation = luigi.ListParameter()
    shape = luigi.ListParameter()
    order = luigi.IntParameter(default=0)

    dependency = luigi.TaskParameter(default=DummyTask())

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({
            'chunks': None,
            'compression': 'gzip',
            'fill_value': 0,
            'sigma_anti_aliasing': None
        })
        return config

    def requires(self):
        return self.dependency

    def clean_up_for_retry(self, block_list):
        super().clean_up_for_retry(block_list)
        # TODO remove any output of failed blocks because it might be corrupted

    def compute_shape(self, input_shape):
        pass

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape, dtype and make block config
        with vu.file_reader(self.input_path, 'r') as f:
            dtype = f[self.input_key].dtype

        # load the config
        task_config = self.get_task_config()
        compression = task_config.pop('compression', 'gzip')
        chunks = task_config.pop('chunks', None)
        if chunks is None:
            chunks = block_shape

        # require output dataset
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=self.shape,
                              chunks=tuple(chunks),
                              compression=compression,
                              dtype=dtype)

        # update the config with input and output paths and keys
        # as well as block shape
        task_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'transformation': self.transformation,
            'shape': self.shape,
            'order': self.order
        })

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(self.shape, block_shape,
                                             roi_begin, roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)
        self._write_log("scheduled %i blocks to run" % len(block_list))

        # prime and run the jobs
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, task_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #25
0
class ThresholdBase(luigi.Task):
    """ Threshold base class
    """

    task_name = 'threshold'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    threshold = luigi.FloatParameter()
    threshold_mode = luigi.Parameter(default='greater')
    channel = luigi.Parameter(default=None)
    # task that is required before running this task
    dependency = luigi.TaskParameter(DummyTask())

    threshold_modes = ('greater', 'less', 'equal')

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({'sigma_prefilter': 0})
        return config

    def requires(self):
        return self.dependency

    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end, block_list_path\
            = self.global_config_values(with_block_list_path=True)
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)

        assert self.threshold_mode in self.threshold_modes
        config = self.get_task_config()
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'threshold': self.threshold,
            'threshold_mode': self.threshold_mode
        })

        # get chunks
        chunks = config.pop('chunks', None)
        if chunks is None:
            chunks = tuple(bs // 2 for bs in block_shape)

        # check if we have a multi-channel volume and specify a channel
        # to apply the threshold to
        if self.channel is None:
            # if no channel is specified, we need 3d input
            assert len(shape) == 3, str(len(shape))
        else:
            # if channel is specified, we need 4d input
            assert isinstance(self.channel, (int, tuple, list))
            assert len(shape) == 4, str(len(shape))
            if isinstance(self.channel, int):
                assert shape[0] > self.channel, "%i, %i" % (shape[0],
                                                            self.channel)
            else:
                assert all(isinstance(chan, int) for chan in self.channel)
                assert shape[0] > max(
                    self.channel), "%i, %i" % (shape[0], max(self.channel))
            shape = shape[1:]
            config.update({'channel': self.channel})

        # clip chunks
        chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape))

        # make output dataset
        compression = config.pop('compression', 'gzip')
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              dtype='uint8',
                              compression=compression,
                              chunks=chunks)

        block_list = vu.blocks_in_volume(shape,
                                         block_shape,
                                         roi_begin,
                                         roi_end,
                                         block_list_path=block_list_path)
        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
Example #26
0
class MorphologyBase(luigi.Task):
    """ Morphology base class
    """

    task_name = 'morphology'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    # compute cell features or nucleus features?
    compute_cell_features = luigi.BoolParameter()

    # paths to raw data and segmentations
    # if the raw path is None, we don't compute intensity features
    raw_path = luigi.Parameter(default=None)
    # we always need the nucleus segmentation
    nucleus_segmentation_path = luigi.Parameter()
    # we only need the cell segmentation if we compute cell morphology features
    cell_segmentation_path = luigi.Parameter(default=None)
    # we only need the chromatin segmentation if we compute nucleus features
    chromatin_segmentation_path = luigi.Parameter(default=None)

    # the scale used for computation, relative to the raw scale
    scale = luigi.IntParameter(default=3)

    # the input tables paths for the default table, the
    # nucleus mapping table and the region mapping table
    in_table_path = luigi.Parameter()
    # only need the mapping paths for the nucleus features
    nucleus_mapping_path = luigi.Parameter(default=None)
    region_mapping_path = luigi.Parameter(default=None)

    # prefix for the output tables
    output_prefix = luigi.Parameter()

    # minimum and maximum sizes for objects / bounding box
    min_size = luigi.IntParameter()
    max_size = luigi.IntParameter(default=None)
    max_bb = luigi.IntParameter()

    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    def _update_config_for_cells(self, config):
        # check the relevant inputs for the cell morphology
        assert self.cell_segmentation_path is not None
        assert self.nucleus_mapping_path is not None
        assert self.region_mapping_path is not None
        config.update({'cell_segmentation_path': self.cell_segmentation_path,
                       'nucleus_segmentation_path': self.nucleus_segmentation_path,
                       'raw_path': self.raw_path,
                       'output_prefix': self.output_prefix,
                       'in_table_path': self.in_table_path,
                       'nucleus_mapping_path': self.nucleus_mapping_path,
                       'region_mapping_path': self.region_mapping_path,
                       'scale': self.scale, 'max_bb': self.max_bb,
                       'min_size': self.min_size, 'max_size': self.max_size})
        return config

    def _update_config_for_nuclei(self, config):
        # check the relevant inputs for the nucleus morphology
        assert self.chromatin_segmentation_path is not None
        assert self.raw_path is not None
        config.update({'nucleus_segmentation_path': self.nucleus_segmentation_path,
                       'chromatin_segmentation_path': self.chromatin_segmentation_path,
                       'raw_path': self.raw_path,
                       'output_prefix': self.output_prefix,
                       'in_table_path': self.in_table_path,
                       'scale': self.scale, 'max_bb': self.max_bb,
                       'min_size': self.min_size, 'max_size': self.max_size})
        return config

    def _get_number_of_labels(self):
        seg_path = self.cell_segmentation_path if self.compute_cell_features else\
            self.nucleus_segmentation_path
        is_h5 = os.path.splitext(seg_path)[1].lower() in ('.hdf', '.hdf5', '.h5')
        key = get_key(is_h5, time_point=0, setup_id=0, scale=0)
        with vu.file_reader(seg_path, 'r') as f:
            n_labels = int(f[key].attrs['maxId']) + 1
        return n_labels

    def _compute_block_len(self, number_of_labels):
        ids_per_job = int(ceil(float(number_of_labels) / self.max_jobs))
        return ids_per_job

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        if self.compute_cell_features:
            config = self._update_config_for_cells(config)
        else:
            config = self._update_config_for_nuclei(config)

        # TODO match block size and number of blocks
        # we hard-code the chunk-size to 1000 for now
        number_of_labels = self._get_number_of_labels()
        block_len = self._compute_block_len(number_of_labels)
        block_list = vu.blocks_in_volume([number_of_labels], [block_len])
        config.update({'block_len': block_len,
                       'compute_cell_features': self.compute_cell_features,
                       'number_of_labels': number_of_labels})

        prefix = 'cells' if self.compute_cell_features else 'nuclei'
        # prime and run the job
        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, config, prefix)
        self.submit_jobs(n_jobs, prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs, prefix)

    def output(self):
        prefix = 'cells' if self.compute_cell_features else 'nuclei'
        out_path = os.path.join(self.tmp_folder,
                                '%s_%s.log' % (self.task_name, prefix))
        return luigi.LocalTarget(out_path)
class WatershedFromSeedsBase(luigi.Task):
    """ WatershedFromSeeds base class
    """

    task_name = 'watershed_from_seeds'
    src_file = os.path.abspath(__file__)

    # input and output volumes
    input_path = luigi.Parameter()
    input_key = luigi.Parameter()
    seeds_path = luigi.Parameter()
    seeds_key = luigi.Parameter()
    output_path = luigi.Parameter()
    output_key = luigi.Parameter()
    mask_path = luigi.Parameter(default='')
    mask_key = luigi.Parameter(default='')
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        # we use this to get also get the common default config
        config = LocalTask.default_task_config()
        config.update({
            'channel_begin': 0,
            'channel_end': None,
            'agglomerate_channels': 'mean',
            'size_filter': 0
        })
        return config

    def clean_up_for_retry(self, block_list):
        # TODO remove any output of failed blocks because it might be corrupted
        super().clean_up_for_retry(block_list)

    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)
        if len(shape) == 4:
            shape = shape[1:]

        # load the watershed config
        ws_config = self.get_task_config()

        # require output dataset
        # TODO read chunks from config
        chunks = tuple(bs // 2 for bs in block_shape)
        chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression='gzip',
                              dtype='uint64')

        # update the config with input and output paths and keys
        # as well as block shape
        ws_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'seeds_path': self.seeds_path,
            'seeds_key': self.seeds_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape
        })
        if self.mask_path != '':
            assert self.mask_key != ''
            ws_config.update({
                'mask_path': self.mask_path,
                'mask_key': self.mask_key
            })

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        self._write_log('scheduling %i blocks to be processed' %
                        len(block_list))

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, ws_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
Example #28
0
class TransformixBase(luigi.Task):
    """ Transformix base class
    """
    formats = ('bdv', 'tif')

    # what about cubic etc?
    interpolation_modes = {
        'linear': 'FinalLinearInterpolator',
        'nearest': 'FinalNearestNeighborInterpolator'
    }
    result_types = ('unsigned char', 'unsigned short')

    task_name = 'transformix'
    src_file = os.path.abspath(__file__)
    allow_retry = False

    input_path_file = luigi.Parameter()
    output_path_file = luigi.Parameter()
    transformation_file = luigi.Parameter()
    fiji_executable = luigi.Parameter()
    elastix_directory = luigi.Parameter()
    shape = luigi.ListParameter(default=None)
    resolution = luigi.ListParameter(default=None)
    interpolation = luigi.Parameter(default='nearest')
    output_format = luigi.Parameter(default='bdv')
    dependency = luigi.TaskParameter(default=DummyTask())

    def requires(self):
        return self.dependency

    @staticmethod
    def default_task_config():
        config = LocalTask.default_task_config()
        config.update({'ResultImagePixelType': None})
        return config

    # update the transformation with our interpolation mode
    # and the corresponding dtype
    def update_transformation(self, in_file, out_file, res_type):

        interpolator_name = self.interpolation_modes[self.interpolation]

        def update_line(line, to_write, is_numeric):
            line = line.rstrip('\n')
            line = line.split()
            if is_numeric:
                line = [line[0], "%s)" % to_write]
            else:
                line = [line[0], "\"%s\")" % to_write]
            line = " ".join(line) + "\n"
            return line

        with open(in_file, 'r') as f_in, open(out_file, 'w') as f_out:
            for line in f_in:
                # change the interpolator
                if line.startswith("(ResampleInterpolator"):
                    line = update_line(line, interpolator_name, False)
                # change the pixel result type
                elif line.startswith(
                        "(ResultImagePixelType") and res_type is not None:
                    line = update_line(line, res_type, False)
                elif line.startswith("(Size") and self.shape is not None:
                    shape_str = " ".join(map(str, self.shape[::-1]))
                    line = update_line(line, shape_str, True)
                elif line.startswith(
                        "(Spacing") and self.resolution is not None:
                    resolution_str = " ".join(map(str, self.resolution[::-1]))
                    line = update_line(line, resolution_str, True)
                elif line.startswith("(InitialTransformParametersFileName"):
                    initial_trafo_file = line.split()[-1][1:-2]
                    if initial_trafo_file == 'NoInitialTransform':
                        continue
                    new_initial_trafo_file = os.path.split(
                        initial_trafo_file)[1]
                    new_initial_trafo_file = os.path.join(
                        self.tmp_folder, 'transformations',
                        new_initial_trafo_file)
                    line = update_line(line, new_initial_trafo_file, False)
                f_out.write(line)

    def update_transformations(self, res_type):
        trafo_folder, trafo_name = os.path.split(self.transformation_file)
        trafo_files = glob(os.path.join(trafo_folder, '*.txt'))

        out_folder = os.path.join(self.tmp_folder, 'transformations')
        os.makedirs(out_folder, exist_ok=True)

        for trafo in trafo_files:
            name = os.path.split(trafo)[1]
            out = os.path.join(out_folder, name)
            self.update_transformation(trafo, out, res_type)

        new_trafo = os.path.join(out_folder, trafo_name)
        assert os.path.exists(new_trafo)
        return new_trafo

    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        with open(self.input_path_file) as f:
            inputs = json.load(f)
        with open(self.output_path_file) as f:
            outputs = json.load(f)

        assert len(inputs) == len(outputs), "%i, %i" % (len(inputs),
                                                        len(outputs))
        assert all(os.path.exists(inp) for inp in inputs), f"{inputs}"
        n_files = len(inputs)

        assert os.path.exists(self.transformation_file)
        assert os.path.exists(self.fiji_executable)
        assert os.path.exists(self.elastix_directory)
        assert self.output_format in self.formats
        assert self.interpolation in self.interpolation_modes

        config = self.get_task_config()
        res_type = config.pop('ResultImagePixelType', None)
        if res_type is not None:
            assert res_type in self.result_types
        trafo_file = self.update_transformations(res_type)

        # get the split of file-ids to the volume
        file_list = vu.blocks_in_volume((n_files, ), (1, ))

        # we don't need any additional config besides the paths
        config.update({
            "input_path_file": self.input_path_file,
            "output_path_file": self.output_path_file,
            "transformation_file": trafo_file,
            "fiji_executable": self.fiji_executable,
            "elastix_directory": self.elastix_directory,
            "tmp_folder": self.tmp_folder,
            "output_format": self.output_format
        })

        # prime and run the jobs
        n_jobs = min(self.max_jobs, n_files)
        self.prepare_jobs(n_jobs, file_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)