コード例 #1
0
def scale_to_boundaries(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read paths from the config
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']
    boundaries_path = config['boundaries_path']
    boundaries_key = config['boundaries_key']
    offset = config['offset']

    # additional config
    erode_by = config['erode_by']
    erode_3d = config.get('erode_3d', True)
    channel = config['channel']

    block_shape = list(config['block_shape'])
    block_list = config['block_list']

    with vu.file_reader(input_path, 'r') as fin,\
            vu.file_reader(boundaries_path, 'r') as fb,\
            vu.file_reader(output_path) as fout:

        ds_bd = fb[boundaries_key]
        ds_out = fout[output_key]

        shape = ds_out.shape
        blocking = nt.blocking([0, 0, 0], list(shape), block_shape)

        ds_in = ResizedVolume(fin[input_key], shape)

        for block_id in block_list:
            _scale_block(block_id, blocking,
                         ds_in, ds_bd, ds_out,
                         offset, erode_by, erode_3d, channel)

    # log success
    fu.log_job_success(job_id)
コード例 #2
0
def watershed(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']
    shape = list(vu.get_shape(input_path, input_key))
    if len(shape) == 4:
        shape = shape[1:]

    block_shape = list(config['block_shape'])
    block_list = config['block_list']

    # read the output config
    output_path = config['output_path']
    output_key = config['output_key']

    # get the blocking
    blocking = nt.blocking([0, 0, 0], shape, block_shape)

    # submit blocks
    with vu.file_reader(input_path,
                        'r') as f_in, vu.file_reader(output_path) as f_out:
        ds_in = f_in[input_key]
        assert ds_in.ndim in (3, 4)
        ds_out = f_out[output_key]
        assert ds_out.ndim == 3

        if 'mask_path' in config:
            mask_path = config['mask_path']
            mask_key = config['mask_key']
            mask = vu.load_mask(mask_path, mask_key, shape)
        else:
            mask = None
        for block_id in block_list:
            _ws_block(blocking, block_id, ds_in, ds_out, mask, config)

    # log success
    fu.log_job_success(job_id)
コード例 #3
0
def _merge_graph(graph_path, output_key, scale, block_list, blocking, shape,
                 n_threads):
    subgraph_key = 's%i/sub_graphs' % scale
    ndist.mergeSubgraphs(graph_path,
                         subgraphKey=subgraph_key,
                         blockIds=block_list,
                         outKey=output_key,
                         numberOfThreads=n_threads,
                         serializeToVarlen=False)
    with vu.file_reader(graph_path) as f:
        f[output_key].attrs['shape'] = shape
コード例 #4
0
def graph_connected_components(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    problem_path = config['problem_path']
    graph_key = config['graph_key']
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']
    output_path = config['output_path']
    output_key = config['output_key']
    n_threads = config.get('n_threads', 8)

    with vu.file_reader(assignment_path, 'r') as f:
        ds_ass = f[assignment_key]
        ds_ass.n_threads = n_threads
        assignments = ds_ass[:]
        chunks = ds_ass.chunks

    graph = ndist.Graph(os.path.join(problem_path, graph_key), n_threads)
    # TODO check if we acutally have an ignore label
    assignments = ndist.connectedComponentsFromNodes(graph, assignments, True)
    vigra.analysis.relabelConsecutive(assignments,
                                      out=assignments,
                                      start_label=1,
                                      keep_zeros=True)

    with vu.file_reader(output_path) as f:
        ds_out = f.require_dataset(output_key,
                                   shape=assignments.shape,
                                   chunks=chunks,
                                   compression='gzip',
                                   dtype='uint64')
        ds_out.n_threads = n_threads
        ds_out[:] = assignments

    fu.log_job_success(job_id)
コード例 #5
0
def threshold(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']
    block_list = config['block_list']
    block_shape = config['block_shape']
    threshold = config['threshold']
    threshold_mode = config['threshold_mode']

    sigma = config.get('sigma_prefilter', 0)
    channel = config.get('channel', None)

    fu.log("Applying threshold %f with mode %s" % (threshold, threshold_mode))

    with vu.file_reader(input_path,
                        'r') as f_in, vu.file_reader(output_path) as f_out:

        ds_in = f_in[input_key]
        ds_out = f_out[output_key]

        shape = ds_in.shape
        if channel is not None:
            shape = shape[1:]
        assert len(shape) == 3

        blocking = nt.blocking([0, 0, 0], list(shape), block_shape)

        [
            _threshold_block(block_id, blocking, ds_in, ds_out, threshold,
                             threshold_mode, channel, sigma)
            for block_id in block_list
        ]

    fu.log_job_success(job_id)
コード例 #6
0
def skeletonize(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']

    output_path = config['output_path']
    output_key = config['output_key']

    n_threads = config.get('threads_per_job', 1)

    # load input segmentation
    with vu.file_reader(input_path) as f_in:
        ds_in = f_in[input_key]
        ds_in.n_threads = n_threads
        seg = ds_in[:]

    # TODO size filtering ?
    # find unique ids in the segmentation
    ids = np.unique(seg)
    # if 0 in ids, discard it (ignore id)
    if ids[0] == 0:
        ids = ids[1:]

    fu.log("computing skeletons for %i ids" % len(ids))
    # FIXME this is too slow because skeletonize 3d does not lift gil
    # skel_vol = skeletonize_multi_threaded(seg, ids, n_threads)
    skel_vol = skeletonize_mp(seg, ids, n_threads)

    # write the output
    with vu.file_reader(output_path) as f_out:
        ds_out = f_out[output_key]
        ds_out.n_threads = n_threads
        ds_out[:] = skel_vol

    # log success
    fu.log_job_success(job_id)
コード例 #7
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end, block_list_path = self.global_config_values(
            True)
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)
        if len(shape) == 4:
            shape = shape[1:]

        # load the watershed config
        ws_config = self.get_task_config()

        # require output dataset
        # TODO read chunks from config
        chunks = tuple(bs // 2 for bs in block_shape)
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression='gzip',
                              dtype='uint64')

        # update the config with input and output paths and keys
        # as well as block shape
        ws_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape
        })
        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape,
                                             block_shape,
                                             roi_begin,
                                             roi_end,
                                             block_list_path=block_list_path)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)
        self._write_log('scheduling %i blocks to be processed' %
                        len(block_list))
        n_jobs = min(len(block_list), self.max_jobs)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, ws_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
コード例 #8
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        if self.blockwise == False:
            with vu.file_reader(self.input_path) as f_in:
                block_shape = f_in[self.input_key].shape

        # load the task config
        config = self.get_task_config()

        # TODO make the scale at which we extract features accessible
        # update the config with input and output paths and keys
        # as well as block shape
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'labels_path': self.labels_path,
            'labels_key': self.labels_key,
            'output_path': self.output_path,
            'output_key': self.output_key,
            'block_shape': block_shape,
            'feature_list': self.feature_list
        })
        # TODO support multi-channel
        shape = vu.get_shape(self.input_path, self.input_key)

        # require the temporary output data-set
        f_out = z5py.File(self.output_path)

        f_out.require_dataset(self.output_key,
                              shape=shape,
                              compression='gzip',
                              chunks=tuple(block_shape),
                              dtype='float32')

        if self.n_retries == 0:
            # get shape and make block config
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
コード例 #9
0
 def _read_num_features(self, block_ids):
     n_feats = None
     with vu.file_reader(self.output_path) as f:
         for block_id in block_ids:
             block_key = os.path.join('blocks', 'block_%i' % block_id)
             block_path = os.path.join(self.output_path, block_key)
             if not os.path.exists(block_path):
                 continue
             n_feats = f[block_key].shape[1]
             break
     assert n_feats is not None, "No valid feature block found"
     return n_feats
コード例 #10
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()

        # require output group
        with vu.file_reader(self.output_path) as f:
            f.require_group(self.blocks_prefix)

        # TODO make the scale at which we extract features accessible
        # update the config with input and output paths and keys
        # as well as block shape
        config.update({
            'input_path':
            self.input_path,
            'input_key':
            self.input_key,
            'labels_path':
            self.labels_path,
            'labels_key':
            self.labels_key,
            'output_path':
            self.output_path,
            'block_shape':
            block_shape,
            'blocks_prefix':
            self.blocks_prefix,
            'graph_block_prefix':
            os.path.join(self.graph_path, 's0', 'sub_graphs', 'block_')
        })

        if self.n_retries == 0:
            # get shape and make block config
            shape = vu.get_shape(self.input_path, self.input_key)
            if len(shape) == 4:
                shape = shape[1:]
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
コード例 #11
0
def embedding_distances(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    path_dict = config['path_dict']
    output_path = config['output_path']
    output_key = config['output_key']
    block_list = config['block_list']
    block_shape = config['block_shape']
    offsets = config['offsets']
    norm = config['norm']

    # TODO support thresholding
    threshold = config['threshold']
    threshold_mode = config['threshold_mode']
    assert threshold is None

    with open(path_dict) as f:
        path_dict = json.load(f)

    input_datasets = []
    for path in sorted(path_dict):
        input_datasets.append(vu.file_reader(path, 'r')[path_dict[path]])

    with vu.file_reader(output_path) as f:

        ds = f[output_key]

        shape = ds.shape[1:]
        blocking = nt.blocking([0, 0, 0], list(shape), block_shape)
        [
            _embedding_distances_block(block_id, blocking, input_datasets, ds,
                                       offsets, norm)
            for block_id in block_list
        ]

    fu.log_job_success(job_id)
コード例 #12
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the task config
        config = self.get_task_config()
        shape = vu.get_shape(self.input_path, self.input_key)
        # FIXME we should be able to specify xyzc vs cyzx
        if len(shape) == 4:
            shape = shape[1:]
        assert len(shape) == 3
        block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                         roi_end)

        # update the config with input and output paths and keys
        # as well as block shape
        config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'halo': self.halo,
            'ilastik_project': self.ilastik_project,
            'ilastik_folder': self.ilastik_folder,
            'block_shape': block_shape,
            'tmp_folder': self.tmp_folder
        })
        # if the output key is not None, we have a z5 file and
        # need to require the dataset
        if self.output_key is not None:
            config.update({'output_key': self.output_key})
            chunks = tuple(bs // 2 for bs in block_shape)
            if self.n_channels > 1:
                shape = (self.n_channels, ) + shape
                chunks = (1, ) + chunks

            dtype = config.get('dtype', 'float32')
            with vu.file_reader(self.output_path) as f:
                f.require_dataset(self.output_key,
                                  shape=shape,
                                  chunks=chunks,
                                  dtype=dtype,
                                  compression='gzip')

        n_jobs = min(len(block_list), self.max_jobs)
        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
コード例 #13
0
def find_uniques(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    block_list = config['block_list']
    block_shape = config['block_shape']
    tmp_folder = config['tmp_folder']
    return_counts = config['return_counts']

    # open the input file
    with vu.file_reader(input_path, 'r') as f:
        ds = f[input_key]
        is_label_multiset = ds.attrs.get("isLabelMultiset", False)
        if is_label_multiset:
            ds = LabelMultisetWrapper(ds)

        shape = ds.shape
        blocking = nt.blocking(roiBegin=[0, 0, 0],
                               roiEnd=list(shape),
                               blockShape=list(block_shape))

        # find uniques for all blocks
        uniques = [
            uniques_in_block(block_id, blocking, ds, return_counts)
            for block_id in block_list
        ]

    if return_counts:
        unique_values = np.unique(np.concatenate([un[0] for un in uniques]))
        counts = np.zeros(int(unique_values[-1] + 1), dtype='uint64')
        for uniques_block, counts_block in uniques:
            counts[uniques_block] += counts_block.astype('uint64')
        counts = counts[counts != 0]
        assert len(counts) == len(unique_values)

        count_path = os.path.join(tmp_folder, 'counts_job_%i.npy' % job_id)
        np.save(count_path, counts)

    else:
        unique_values = np.unique(np.concatenate(uniques))

    # save the uniques for this job
    save_path = os.path.join(tmp_folder, 'find_uniques_job_%i.npy' % job_id)
    fu.log("saving results to %s" % save_path)
    np.save(save_path, unique_values)
    # log success
    fu.log_job_success(job_id)
コード例 #14
0
def edge_labels(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path, 'r') as f:
        config = json.load(f)
    output_path = config['output_path']
    output_key = config['output_key']
    graph_path = config['graph_path']
    graph_key = config['graph_key']
    overlap_path = config['overlap_path']
    overlap_key = config['overlap_key']
    ignore_label_gt = config.get('ignore_label_gt', False)

    # load the node overlaps
    with vu.file_reader(overlap_path, 'r') as f:
        node_labels = f[overlap_key][:]

    # load the uv ids and check
    with vu.file_reader(graph_path, 'r') as f:
        uv_ids = f[graph_key]['edges'][:]

    lu = node_labels[uv_ids[:, 0]]
    lv = node_labels[uv_ids[:, 1]]
    edge_labels = (lu != lv).astype('int8')
    if ignore_label_gt:
        ignore_mask = np.logical_or(lu == 0, lv == 0)
        edge_labels[ignore_mask] = -1

    n_edges = len(edge_labels)
    chunks = (min(262144, n_edges), )
    with vu.file_reader(output_path) as f:
        f.create_dataset(output_key,
                         data=edge_labels,
                         chunks=chunks,
                         compression='gzip')

    fu.log_job_success(job_id)
コード例 #15
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape, dtype and make block config
        with vu.file_reader(self.input_path, 'r') as f:
            shape = f[self.input_key].shape

        # load the skeletonize config
        task_config = self.get_task_config()

        # require output dataset
        chunks = (25, 256, 256)
        chunks = tuple(min(sh, ch) for sh, ch in zip(shape, chunks))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,
                              shape=shape,
                              chunks=chunks,
                              compression='gzip',
                              dtype='uint64')

        # update the config with input and output paths and keys
        # as well as block shape
        task_config.update({
            'input_path': self.input_path,
            'input_key': self.input_key,
            'output_path': self.output_path,
            'output_key': self.output_key
        })

        # prime and run the jobs
        n_jobs = 1
        self.prepare_jobs(n_jobs, None, task_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
コード例 #16
0
def upsample_skeletons(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']

    output_path = config['output_path']
    output_key = config['output_key']

    skeleton_path = config['skeleton_path']
    skeleton_key = config['skeleton_key']

    block_list = config['block_list']
    halo = config.get('halo', None)
    pixel_pitch = config.get('pixel_pitch', None)

    # load input segmentation
    with vu.file_reader(input_path) as f_in, vu.file_reader(skeleton_path) as f_skel:
        shape = f[input_key].shape
        skel_shape = f[skeleton_key].shape

    scale_factor = tuple(sh // sksh for sh, sksh in zip(shape, skel_shape))
    blocking = nt.blocking([0, 0, 0], shape, block_shape)

    with vu.file_reader(input_path) as f_in,\
         vu.file_reader(skeleton_path) as f_skel,\
         vu.file_reader(output_path) as f_out:

        [_upsample_block(block_id, blocking, halo,
                         ds_in, ds_out, ds_skel,
                         scale_factor, pixel_pitch)
        for block_id in block_list]

    # log success
    fu.log_job_success(job_id)
コード例 #17
0
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        config = self.get_task_config()
        config.update({'input_path': self.input_path,
                       'input_key': self.input_key,
                       'output_path': self.output_path,
                       'output_key': self.output_key,
                       'objects_path': self.objects_path,
                       'objects_key': self.objects_key,
                       'offsets': self.offsets,
                       'block_shape': block_shape})

        shape = vu.get_shape(self.input_path, self.input_key)
        dtype = vu.file_reader(self.input_path, 'r')[self.input_key].dtype

        chunks = config['chunks']
        if chunks is None:
            chunks = vu.file_reader(self.input_path, 'r')[self.input_key].chunks
        assert all(bs % ch == 0 for bs, ch in zip(block_shape, chunks[1:]))
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=tuple(shape), chunks=tuple(chunks),
                              dtype=dtype, compression='gzip')

        shape = shape[1:]
        block_list = vu.blocks_in_volume(shape, block_shape,
                                         roi_begin, roi_end)
        n_jobs = min(len(block_list), self.max_jobs)

        # we only have a single job to find the labeling
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        # log the save-path again
        self.check_jobs(n_jobs)
コード例 #18
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape, dtype and make block config
        with vu.file_reader(self.input_path, 'r') as f:
            shape = f[self.input_key].shape

        # load the upsample_skeletons config
        task_config = self.get_task_config()

        # require output dataset
        chunks = (25, 256, 256)
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=shape, chunks=chunks,
                              compression='gzip', dtype='uint64')

        # update the config with input and output paths and keys
        # as well as block shape
        task_config.update({'input_path': self.input_path, 'input_key': self.input_key,
                            'skeleton_path': self.skeleton_path, 'skeleton_key': self.skeleton_key,
                            'output_path': self.output_path, 'output_key': self.output_key})

        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin, roi_end)
            self._write_log("scheduled %i blocks to run" % len(block_list))
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        n_jobs = min(len(block_list), self.max_jobs)
        self.prepare_jobs(n_jobs, block_list, task_config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
コード例 #19
0
def block_edge_features(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path, 'r') as f:
        config = json.load(f)

    block_list = config['block_list']
    input_path = config['input_path']
    input_key = config['input_key']
    labels_path = config['labels_path']
    labels_key = config['labels_key']
    output_path = config['output_path']
    block_shape = config['block_shape']
    graph_path = config['graph_path']
    subgraph_key = config['subgraph_key']
    output_key = config['output_key']

    # offsets for accumulation of affinity maps
    offsets = config.get('offsets', None)
    filters = config.get('filters', None)
    sigmas = config.get('sigmas', None)
    apply_in_2d = config.get('apply_in_2d', False)
    halo = config.get('halo', [0, 0, 0])
    channel_agglomeration = config.get('channel_agglomeration', 'mean')
    assert channel_agglomeration in ('mean', 'max', 'min', None)

    if filters is None:
        n_feats = _accumulate(input_path, input_key, labels_path, labels_key,
                              graph_path, subgraph_key, output_path,
                              output_key, block_list, offsets)
    else:
        assert offsets is None, "Filters and offsets are not supported"
        assert sigmas is not None, "Need sigma values"
        n_feats = _accumulate_with_filters(input_path, input_key, labels_path,
                                           labels_key, graph_path,
                                           subgraph_key, output_path,
                                           output_key, block_list, block_shape,
                                           filters, sigmas, halo, apply_in_2d,
                                           channel_agglomeration)

    # we need to serialize the number of features for job 0
    if job_id == 0:
        with vu.file_reader(output_path) as f:
            ds = f[output_key]
            ds.attrs['n_features'] = n_feats

    fu.log_job_success(job_id)
コード例 #20
0
def find_labeling(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    with open(config_path, 'r') as f:
        config = json.load(f)
    n_jobs = config['n_jobs']
    tmp_folder = config['tmp_folder']
    n_threads = config['threads_per_job']
    assignment_path = config['assignment_path']
    assignment_key = config['assignment_key']

    def _read_input(job_id):
        return np.load(
            os.path.join(tmp_folder, 'find_uniques_job_%i.npy' % job_id))

    fu.log("read uniques")
    with futures.ThreadPoolExecutor(n_threads) as tp:
        tasks = [tp.submit(_read_input, job_id) for job_id in range(n_jobs)]
        uniques = np.concatenate([t.result() for t in tasks])

    fu.log("compute uniques")
    uniques = np.unique(uniques)

    if uniques[0] == 0:
        start_label = 0
        stop_label = len(uniques)
    else:
        start_label = 1
        stop_label = len(uniques) + 1
    fu.log("relabel to new max-id %i" % stop_label)
    new_ids = np.arange(start_label, stop_label, dtype='uint64')
    assignments = np.concatenate([uniques[:, None], new_ids[:, None]], axis=1)

    fu.log("saving results to %s/%s" % (assignment_path, assignment_key))
    with vu.file_reader(assignment_path) as f:
        chunk_size = min(int(1e6), len(assignments))
        chunks = (chunk_size, 2)
        ds = vu.force_dataset(f,
                              assignment_key,
                              shape=assignments.shape,
                              dtype='uint64',
                              compression='gzip',
                              chunks=chunks)
        ds.n_threads = n_threads
        ds[:] = assignments

    # log success
    fu.log_job_success(job_id)
コード例 #21
0
 def _prepare_output(self, config):
     # make the blocking
     block_len = min(self.number_of_labels, config.get('chunk_len', 1000))
     block_list = vu.blocks_in_volume((self.number_of_labels,),
                                      (block_len,))
     n_jobs = min(len(block_list), self.max_jobs)
     # require output dataset
     with vu.file_reader(self.output_path) as f:
         f.require_dataset(self.output_key, shape=(self.number_of_labels,),
                           chunks=(1,), compression='gzip', dtype='uint64')
     # update the config
     config.update({'number_of_labels': self.number_of_labels,
                    'block_len': block_len})
     return config, n_jobs, block_list
コード例 #22
0
    def run_impl(self):
        # get the global config and init configs
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # load the watershed config
        config = self.get_task_config()

        # get the shape and write shape and ignore label to our output file
        with vu.file_reader(self.graph_path) as f:
            g = f['s0/sub_graphs']
            shape = tuple(g.attrs['shape'])
            ignore_label = g.attrs['ignore_label']

            g = f.require_group(self.output_key)
            g.attrs['ignore_label'] = ignore_label
            g.attrs['shape'] = shape

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({
            'graph_path': self.graph_path,
            'block_shape': block_shape,
            'scale': self.scale,
            'merge_complete_graph': self.merge_complete_graph,
            'output_key': self.output_key
        })

        factor = 2**self.scale
        block_shape = tuple(sh * factor for sh in block_shape)
        if self.n_retries == 0:
            block_list = vu.blocks_in_volume(shape, block_shape, roi_begin,
                                             roi_end)
        else:
            block_list = self.block_list
            self.clean_up_for_retry(block_list)

        if self.merge_complete_graph:
            n_jobs = 1
        else:
            n_jobs = min(len(block_list), self.max_jobs)
            self._initialize_datasets(shape, block_shape)

        # prime and run the jobs
        self.prepare_jobs(n_jobs, block_list, config)
        self.submit_jobs(n_jobs)

        # wait till jobs finish and check for job success
        self.wait_for_jobs()
        self.check_jobs(n_jobs)
コード例 #23
0
def copy_volume(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']

    block_shape = list(config['block_shape'])
    block_list = config['block_list']

    # read the output config
    output_path = config['output_path']
    output_key = config['output_key']

    # check if we offset by roi
    roi_begin = config.get('roi_begin', None)

    n_threads = config.get('threads_per_job', 1)

    # submit blocks
    with vu.file_reader(input_path,
                        'r') as f_in, vu.file_reader(output_path) as f_out:
        ds_in = f_in[input_key]
        ds_in.n_threads = n_threads
        ds_out = f_out[output_key]
        ds_out.n_threads = n_threads

        shape = list(ds_in.shape)
        blocking = nt.blocking([0, 0, 0], shape, block_shape)

        _copy_blocks(ds_in, ds_out, blocking, block_list, roi_begin)

    # log success
    fu.log_job_success(job_id)
コード例 #24
0
def check_components(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    output_path = config['output_path']
    output_key = config['output_key']

    block_shape = config['block_shape']
    chunks = config['chunks']
    n_labels = config['n_labels']

    chunks_per_block = [bs // ch for bs, ch in zip(block_shape, chunks)]
    max_chunks_per_label = np.prod(chunks_per_block)
    # TODO don't hard-code assertion to special case for [512, 512, 50], [256, 256, 25]
    assert max_chunks_per_label == 8

    n_threads = config.get('threads_per_job', 1)

    ds_in = vu.file_reader(input_path)[input_key]
    violating_ids = _check_components_impl(ds_in, max_chunks_per_label,
                                           n_threads, n_labels)

    if violating_ids.size > 0:
        fu.log("have %i violationg_ids" % violating_ids.shape[0])
        vchunks = (min(10000, violating_ids.shape[0]), 2)
        with vu.file_reader(output_path) as f:
            f.create_dataset(output_key, data=violating_ids, chunks=vchunks)
    else:
        fu.log("no violating ids")

    # log success
    fu.log_job_success(job_id)
コード例 #25
0
def stack_predictions(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path, 'r') as f:
        config = json.load(f)

    raw_path = config['raw_path']
    raw_key = config['raw_key']
    pred_path = config['pred_path']
    pred_key = config['pred_key']

    output_path = config['output_path']
    output_key = config['output_key']

    block_shape = config['block_shape']
    block_list = config['block_list']

    with vu.file_reader(raw_path, 'r') as fr,\
        vu.file_reader(pred_path, 'r') as fp,\
        vu.file_reader(output_path) as fout:

        ds_raw = fr[raw_key]
        ds_pred = fp[pred_key]
        ds_out = fout[output_key]

        dtype = str(ds_out.dtype)

        shape = ds_raw.shape
        blocking = nt.blocking([0, 0, 0], shape, block_shape)

        for block_id in block_list:
            stack_block(block_id, blocking, ds_raw, ds_pred, ds_out, dtype)

    fu.log_job_success(job_id)
コード例 #26
0
def costs_from_node_labels(job_id, config_path):

    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # get the config
    with open(config_path) as f:
        config = json.load(f)

    nh_path = config['nh_path']
    nh_key = config['nh_key']
    node_label_path = config['node_label_path']
    node_label_key = config['node_label_key']
    output_path = config['output_path']
    output_key = config['output_key']
    chunk_size = config['chunk_size']

    inter_label_cost = config['inter_label_cost']
    intra_label_cost = config['intra_label_cost']

    block_list = config['block_list']

    with vu.file_reader(node_label_path, 'r') as f:
        node_labels = f[node_label_key][:]
    with vu.file_reader(nh_path) as f_in, vu.file_reader(output_path) as f_out:
        ds_in = f_in[nh_key]
        ds_out = f_out[output_key]

        n_lifted_edges = ds_in.shape[0]
        blocking = nt.blocking([0], [n_lifted_edges], [chunk_size])

        for block_id in block_list:
            _costs_for_edge_block(block_id, blocking,
                                  ds_in, ds_out, node_labels,
                                  inter_label_cost, intra_label_cost)

    fu.log_job_success(job_id)
コード例 #27
0
def create_multiset(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)
    with open(config_path, 'r') as f:
        config = json.load(f)

    # read the input cofig
    input_path = config['input_path']
    input_key = config['input_key']

    block_shape = list(config['block_shape'])
    block_list = config['block_list']

    # read the output config
    output_path = config['output_path']
    output_key = config['output_key']
    shape = list(vu.get_shape(output_path, output_key))

    # get the blocking
    blocking = nt.blocking([0, 0, 0], shape, block_shape)

    # submit blocks
    with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out:
        ds_in = f_in[input_key]
        if ds_in.attrs.get('isLabelMultiset', False):
            ds_in = LabelMultisetWrapper(ds_in)
        ds_out = f_out[output_key]

        for block_id in block_list:
            _create_multiset_block(blocking, block_id, ds_in, ds_out)

        if job_id == 0:
            max_id = ds_in.attrs['maxId']
            write_metadata(ds_out, max_id)

    # log success
    fu.log_job_success(job_id)
コード例 #28
0
    def run_impl(self):
        # get the global config and init configs
        shebang = self.global_config_values()[0]
        self.init(shebang)

        # load the task config
        config = self.get_task_config()
        #
        with vu.file_reader(self.nh_path, 'r') as f:
            n_lifted_edges = f[self.nh_key].shape[0]

        # chunk size = 64**3
        chunk_size = min(262144, n_lifted_edges)
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key, shape=(n_lifted_edges,),
                              chunks=(chunk_size,), compression='gzip',
                              dtype='float32')

        # update the config with input and graph paths and keys
        # as well as block shape
        config.update({'nh_path': self.nh_path,
                       'nh_key': self.nh_key,
                       'node_label_path': self.node_label_path,
                       'node_label_key': self.node_label_key,
                       'output_path': self.output_path,
                       'output_key': self.output_key,
                       'chunk_size': chunk_size})

        edge_block_list = vu.blocks_in_volume([n_lifted_edges], [chunk_size])
        n_jobs = min(self.max_jobs, len(edge_block_list))
        # prime and run the jobs
        self.prepare_jobs(n_jobs, edge_block_list, config, self.prefix)
        self.submit_jobs(n_jobs, self.prefix)

        # wait till jobs finish and check for job success
        self.wait_for_jobs(self.prefix)
        self.check_jobs(n_jobs, self.prefix)
コード例 #29
0
def filling_size_filter(job_id, config_path):
    fu.log("start processing job %i" % job_id)
    fu.log("reading config from %s" % config_path)

    # read the config
    with open(config_path) as f:
        config = json.load(f)
    input_path = config['input_path']
    input_key = config['input_key']
    hmap_path = config['hmap_path']
    hmap_key = config['hmap_key']
    output_path = config['output_path']
    output_key = config['output_key']
    block_list = config['block_list']
    block_shape = config['block_shape']
    res_path = config['res_path']

    # get the shape
    with vu.file_reader(input_path, 'r') as f:
        ds = f[input_key]
        shape = f[input_key].shape
    blocking = nt.blocking(roiBegin=[0, 0, 0],
                           roiEnd=list(shape),
                           blockShape=list(block_shape))

    discard_ids = np.load(res_path)

    same_file = input_path == output_path
    in_place = same_file and input_key == output_key

    if in_place:
        with vu.file_reader(input_path) as f, vu.file_reader(hmap_path, 'r') as f_h:
            ds = f[input_key]
            ds_hmap = f_h[hmap_key]
            [apply_block(block_id, blocking, ds_hmap, ds, ds, discard_ids)
             for block_id in block_list]
    elif same_file:
        with vu.file_reader(input_path) as f, vu.file_reader(hmap_path, 'r') as f_h:
            ds_in = f[input_key]
            ds_out = f[output_key]
            ds_hmap = f_h[hmap_key]
            [apply_block(block_id, blocking, ds_hmap, ds_in, ds_out, discard_ids)
             for block_id in block_list]
    else:
        with vu.file_reader(input_path, 'r') as f_in, vu.file_reader(output_path) as f_out, vu.file_reader(hmap_path, 'r') as f_h:
            ds_in = f_in[input_key]
            ds_out = f_out[output_key]
            ds_hmap = f_h[hmap_key]
            [apply_block(block_id, blocking, ds_hmap, ds_in, ds_out, discard_ids)
             for block_id in block_list]

    fu.log_job_success(job_id)
コード例 #30
0
    def run_impl(self):
        shebang, block_shape, roi_begin, roi_end = self.global_config_values()
        self.init(shebang)

        # get shape and make block config
        shape = vu.get_shape(self.input_path, self.input_key)
        assert len(shape) == 4, "Need 4d input for MWS"
        n_channels = shape[0]
        shape = shape[1:]

        # TODO make optional which channels to choose
        assert len(self.offsets) == n_channels,\
            "%i, %i" % (len(self.offsets), n_channels)
        assert all(len(off) == 3 for off in self.offsets)

        config = self.get_task_config()
        config.update({'input_path': self.input_path, 'input_key': self.input_key,
                       'output_path': self.output_path, 'output_key': self.output_key,
                       'block_shape': block_shape, 'offsets': self.offsets,
                       'halo': self.halo, 'tmp_folder': self.tmp_folder})

        # check if we have a mask and add to the config if we do
        if self.mask_path != '':
            assert self.mask_key != ''
            config.update({'mask_path': self.mask_path, 'mask_key': self.mask_key})

        # get chunks
        chunks = config.pop('chunks', None)
        if chunks is None:
            chunks = tuple(bs // 2 for bs in block_shape)
        # clip chunks
        chunks = tuple(min(ch, sh) for ch, sh in zip(chunks, shape))

        # make output dataset
        compression = config.pop('compression', 'gzip')
        with vu.file_reader(self.output_path) as f:
            f.require_dataset(self.output_key,  shape=shape, dtype='uint64',
                              compression=compression, chunks=chunks)

        blocking = nt.blocking([0, 0, 0], list(shape), list(block_shape))
        block_lists = vu.make_checkerboard_block_lists(blocking, roi_begin, roi_end)

        # we need the max-block-id to write out max-label-id later
        max_block_id = max([max(bl) for bl in block_lists])
        config.update({'max_block_id': max_block_id})

        for pass_id, block_list in enumerate(block_lists):
            config['pass'] = pass_id
            self._mws_pass(block_list, config, 'pass_%i' % pass_id)