Example #1
0
def solve(predict_config, worker_config, data_config, graph_config,
          solve_config, num_block_workers, block_size, roi_offset, roi_size,
          context, solve_block, base_dir, experiment, train_number,
          predict_number, graph_number, solve_number, queue,
          singularity_container, mount_dirs, **kwargs):

    source_roi = daisy.Roi(daisy.Coordinate(roi_offset),
                           daisy.Coordinate(roi_size))

    solve_setup_dir = os.path.join(
        os.path.join(base_dir, experiment),
        "04_solve/setup_t{}_p{}_g{}_s{}".format(train_number, predict_number,
                                                graph_number, solve_number))

    block_write_roi = daisy.Roi((0, 0, 0), block_size)
    block_read_roi = block_write_roi.grow(context, context)
    total_roi = source_roi.grow(context, context)

    logger.info("Solving in %s", total_roi)

    daisy.run_blockwise(
        total_roi,
        block_read_roi,
        block_write_roi,
        process_function=lambda:
        start_worker(predict_config, worker_config, data_config, graph_config,
                     solve_config, queue, singularity_container, mount_dirs,
                     solve_block, solve_setup_dir),
        num_workers=num_block_workers,
        fit='shrink')

    logger.info("Finished solving, parameters id is %s", solve_number)
Example #2
0
def fetch(in_vol, voxel_size, roi_offset, roi_shape, out_file, out_ds,
          num_workers):

    total_roi = daisy.Roi((roi_offset), (roi_shape))

    read_roi = daisy.Roi((0, ) * 3, (4800, 1280, 1280))
    write_roi = read_roi

    logging.info('Creating out dataset...')

    raw_out = daisy.prepare_ds(out_file,
                               out_ds,
                               total_roi,
                               voxel_size,
                               dtype=np.uint8,
                               write_roi=write_roi)

    logging.info('Writing to dataset...')

    daisy.run_blockwise(total_roi,
                        read_roi,
                        write_roi,
                        process_function=lambda b: fetch_in_block(
                            b, voxel_size, in_vol, raw_out),
                        fit='shrink',
                        num_workers=num_workers)
def overlap():
    run_datetime = datetime.datetime.now(
        pytz.timezone('US/Eastern')).strftime('%Y%m%dT%H%M%S.%f%z')
    temp_dir = os.path.join(config.temp_path, run_datetime)
    os.makedirs(temp_dir)

    fragments = daisy.open_ds(config.fragments_zarr, config.fragments_ds)
    groundtruth = daisy.open_ds(config.groundtruth_zarr, config.groundtruth_ds)
    total_roi = daisy.Roi(offset=config.roi_offset, shape=config.roi_shape)

    start = time.time()
    daisy.run_blockwise(
        total_roi=total_roi,
        read_roi=daisy.Roi(offset=(0, 0, 0), shape=config.block_size),
        write_roi=daisy.Roi(offset=(0, 0, 0), shape=config.block_size),
        process_function=lambda block: overlap_in_block(
            block=block,
            fragments=fragments,
            groundtruth=groundtruth,
            tmp_path=temp_dir),
        fit='shrink',
        num_workers=config.num_workers,
        read_write_conflict=False,
        max_retries=1)

    logger.info(
        f"Blockwise overlapping of fragments and ground truth in {time.time() - start:.3f}s")
    logger.debug(
        f"num blocks: {np.prod(np.ceil(np.array(config.roi_shape) / np.array(config.block_size)))}")

    frag_to_gt = overlap_reduce(tmp_path=temp_dir)

    pickle.dump(frag_to_gt, open(os.path.join(
        temp_dir, 'frag_to_gt.pickle'), 'wb'))
    return frag_to_gt
Example #4
0
def relabel_connected_components(array_in, array_out, block_size, num_workers):
    '''Relabel connected components in an array in parallel.

    Args:

        array_in (``daisy.Array``):

            The array to relabel.

        array_out (``daisy.Array``):

            The array to write to. Should initially be empty (i.e., all zeros).

        block_size (``daisy.Coordinate``):

            The size of the blocks to relabel in, in world units.

        num_workers (``int``):

            The number of workers to use.
    '''

    write_roi = daisy.Roi((0, ) * len(block_size), block_size)
    read_roi = write_roi.grow(array_in.voxel_size, array_in.voxel_size)
    total_roi = array_in.roi.grow(array_in.voxel_size, array_in.voxel_size)

    num_voxels_in_block = (read_roi / array_in.voxel_size).size()

    with tempfile.TemporaryDirectory() as tmpdir:

        daisy.run_blockwise(
            total_roi,
            read_roi,
            write_roi,
            process_function=lambda b: find_components_in_block(
                array_in, array_out, num_voxels_in_block, b, tmpdir),
            num_workers=num_workers,
            fit='shrink')

        nodes, edges = read_cross_block_merges(tmpdir)

    components = find_components(nodes, edges)

    logger.debug("Num nodes: %s", len(nodes))
    logger.debug("Num edges: %s", len(edges))
    logger.debug("Num components: %s", len(components))

    write_roi = daisy.Roi((0, ) * len(block_size), block_size)
    read_roi = write_roi
    total_roi = array_in.roi

    daisy.run_blockwise(total_roi,
                        read_roi,
                        write_roi,
                        process_function=lambda b: relabel_in_block(
                            array_out, nodes, components, b),
                        num_workers=num_workers,
                        fit='shrink')
def extract_edges_blockwise(db_host,
                            db_name,
                            sample,
                            edge_move_threshold,
                            block_size,
                            num_workers,
                            frames=None,
                            frame_context=1,
                            data_dir='../01_data',
                            use_pv_distance=False,
                            **kwargs):

    voxel_size, source_roi = get_source_roi(data_dir, sample)

    # limit to specific frames, if given
    if frames:
        begin, end = frames
        begin -= frame_context
        end += frame_context
        crop_roi = daisy.Roi((begin, None, None, None),
                             (end - begin, None, None, None))
        source_roi = source_roi.intersect(crop_roi)

    # block size in world units
    block_write_roi = daisy.Roi((0, ) * 4, daisy.Coordinate(block_size))

    pos_context = daisy.Coordinate((0, ) + (edge_move_threshold, ) * 3)
    neg_context = daisy.Coordinate((1, ) + (edge_move_threshold, ) * 3)
    logger.debug("Set neg context to %s", neg_context)

    input_roi = source_roi.grow(neg_context, pos_context)
    block_read_roi = block_write_roi.grow(neg_context, pos_context)

    print("Following ROIs in world units:")
    print("Input ROI       = %s" % input_roi)
    print("Block read  ROI = %s" % block_read_roi)
    print("Block write ROI = %s" % block_write_roi)
    print("Output ROI      = %s" % source_roi)

    print("Starting block-wise processing...")

    # process block-wise
    daisy.run_blockwise(input_roi,
                        block_read_roi,
                        block_write_roi,
                        process_function=lambda b: extract_edges_in_block(
                            db_name,
                            db_host,
                            edge_move_threshold,
                            b,
                            use_pv_distance=use_pv_distance),
                        check_function=lambda b: check_function(
                            b, 'extract_edges', db_name, db_host),
                        num_workers=num_workers,
                        processes=True,
                        read_write_conflict=False,
                        fit='shrink')
Example #6
0
    def prepare_for_fragments(self):
        '''Get the fragment ID for each site in site_ids.'''

        logging.info(f"Preparing evaluation for fragments in "
                     f"{self.fragments_file}...")

        if not os.path.exists(self.site_fragment_lut_directory):

            logging.info("site-fragment LUT does not exist, creating it...")

            os.makedirs(self.site_fragment_lut_directory)
            daisy.run_blockwise(self.roi,
                                daisy.Roi((0, 0, 0), (9000, 9000, 9000)),
                                daisy.Roi((0, 0, 0), (9000, 9000, 9000)),
                                lambda b: self.store_lut_in_block(b),
                                num_workers=48,
                                fit='shrink')

        else:

            logging.info(
                "site-fragment LUT already exists, skipping preparation")

        logging.info("Reading site-fragment LUTs from "
                     f"{self.site_fragment_lut_directory}...")

        lut_files = glob.glob(
            os.path.join(self.site_fragment_lut_directory, '*.npz'))

        site_fragment_lut = np.concatenate(
            [np.load(f)['site_fragment_lut'] for f in lut_files], axis=1)

        self.num_bg_sites = int(
            np.sum([np.load(f)['num_bg_sites'] for f in lut_files]))

        assert site_fragment_lut.dtype == np.uint64

        logging.info(
            f"Found {len(site_fragment_lut[0])} sites in site-fragment LUT")

        # convert to dictionary
        site_fragment_lut = {
            site: fragment
            for site, fragment in zip(site_fragment_lut[0],
                                      site_fragment_lut[1])
        }

        # create fragment ID array congruent to site_ids
        self.site_fragment_ids = np.array([
            site_fragment_lut[s] if s in site_fragment_lut else 0
            for s in self.site_ids
        ],
                                          dtype=np.uint64)
Example #7
0
def extract_edges(
        db_host,
        db_name,
        soft_mask_container,
        soft_mask_dataset,
        roi_offset,
        roi_size,
        distance_threshold,
        block_size,
        num_block_workers,
        graph_number,
        **kwargs):

    # Define Rois:
    source_roi = daisy.Roi(roi_offset, roi_size)
    block_write_roi = daisy.Roi(
        (0,) * 3,
        daisy.Coordinate(block_size))

    pos_context = daisy.Coordinate((distance_threshold,)*3)
    neg_context = daisy.Coordinate((distance_threshold,)*3)
    logger.debug("Set pos context to %s", pos_context)
    logger.debug("Set neg context to %s", neg_context)

    input_roi = source_roi.grow(neg_context, pos_context)
    block_read_roi = block_write_roi.grow(neg_context, pos_context)

    logger.info("Following ROIs in world units:")
    logger.info("Input ROI       = %s" % input_roi)
    logger.info("Block read  ROI = %s" % block_read_roi)
    logger.info("Block write ROI = %s" % block_write_roi)
    logger.info("Output ROI      = %s" % source_roi)

    logger.info("Starting block-wise processing...")

    # process block-wise
    daisy.run_blockwise(
        input_roi,
        block_read_roi,
        block_write_roi,
        process_function=lambda b: extract_edges_in_block(
            db_name,
            db_host,
            soft_mask_container,
            soft_mask_dataset,
            distance_threshold,
            graph_number,
            b),
        num_workers=num_block_workers,
        processes=True,
        read_write_conflict=False,
        fit='shrink')
Example #8
0
    def test_negative_offset(self):

        logger.warning("A warning")

        total_roi = daisy.Roi(
            (-100,),
            (2369,))
        block_write_roi = daisy.Roi(
            (0,),
            (500,))
        block_read_roi = block_write_roi.grow(
            (100,),
            (100,))

        outdir = self.path_to()

        ret = daisy.run_blockwise(
            total_roi,
            block_read_roi,
            block_write_roi,
            process_function=lambda b: self.process_block(outdir, b),
            num_workers=1,
            fit='shrink')

        outfiles = glob.glob(os.path.join(outdir, '*.block'))
        block_ids = sorted([
            int(path.split('/')[-1].split('.')[0])
            for path in outfiles
        ])

        self.assertTrue(ret)
        self.assertEqual(len(block_ids), 5)
Example #9
0
    def test_worker_failure(self):

        total_roi = daisy.Roi((0,), (100,))
        read_roi = daisy.Roi((0,), (5,))
        write_roi = daisy.Roi((0,), (3,))

        outdir = self.path_to()

        ret = daisy.run_blockwise(
            total_roi=total_roi,
            read_roi=read_roi,
            write_roi=write_roi,
            process_function=lambda: self.worker(outdir, fail=16),
            num_workers=10)

        outfiles = glob.glob(os.path.join(outdir, '*.block'))
        block_ids = sorted([
            int(path.split('/')[-1].split('.')[0])
            for path in outfiles
        ])

        self.assertFalse(ret)
        expected_block_ids = list(range(32))
        expected_block_ids.remove(16)
        self.assertEqual(block_ids, expected_block_ids)
Example #10
0
def parallel_lsd_agglomerate(lsds, fragments, rag_provider, lsd_extractor,
                             block_size, context, num_workers):
    '''Agglomerate fragments in parallel using only the shape descriptors.

    Args:

        lsds (`class:daisy.Array`):

            An array containing the LSDs.

        fragments (`class:daisy.Array`):

            An array containing fragments.

        rag_provider (`class:SharedRagProvider`):

            A RAG provider to read nodes from and write found edges to.

        lsd_extractor (``LsdExtractor``):

            The local shape descriptor object used to compute the difference
            between the segmentation and the target LSDs.

        block_size (``tuple`` of ``int``):

            The size of the blocks to process in parallel, in world units.

        context (``tuple`` of ``int``):

            The context to consider for agglomeration, in world units.

        num_workers (``int``):

            The number of parallel workers.

    Returns:

        True, if all tasks succeeded.
    '''

    assert fragments.data.dtype == np.uint64

    shape = lsds.shape[1:]
    context = daisy.Coordinate(context)

    total_roi = lsds.roi.grow(context, context)
    read_roi = daisy.Roi((0, ) * lsds.roi.dims(),
                         block_size).grow(context, context)
    write_roi = daisy.Roi((0, ) * lsds.roi.dims(), block_size)

    return daisy.run_blockwise(
        total_roi,
        read_roi,
        write_roi,
        lambda b: agglomerate_in_block(lsds, fragments, rag_provider,
                                       lsd_extractor, b),
        lambda b: block_done(b, rag_provider),
        num_workers=num_workers,
        read_write_conflict=False,
        fit='shrink')
Example #11
0
    def test_multidim(self):

        total_roi = daisy.Roi(
            (199, -100, -100, -100),
            (12, 5140, 2248, 2369))
        block_write_roi = daisy.Roi(
            (0, 0, 0, 0),
            (5, 500, 500, 500))
        block_read_roi = block_write_roi.grow(
            (1, 100, 100, 100),
            (1, 100, 100, 100))

        outdir = self.path_to()

        ret = daisy.run_blockwise(
            total_roi,
            block_read_roi,
            block_write_roi,
            process_function=lambda b: self.process_block(outdir, b),
            num_workers=8,
            processes=False,
            fit='shrink')

        outfiles = glob.glob(os.path.join(outdir, '*.block'))
        block_ids = sorted([
            int(path.split('/')[-1].split('.')[0])
            for path in outfiles
        ])

        self.assertTrue(ret)
        self.assertEqual(len(block_ids), 500)
Example #12
0
def downscale(in_array, out_array, factor, write_size):

    print("Downsampling by factor %s" % (factor, ))

    dims = in_array.roi.dims()
    block_roi = daisy.Roi((0, ) * dims, write_size)

    print("Processing ROI %s with blocks %s" % (out_array.roi, block_roi))

    daisy.run_blockwise(out_array.roi,
                        block_roi,
                        block_roi,
                        process_function=lambda b: downscale_block(
                            in_array, out_array, factor, b),
                        read_write_conflict=False,
                        num_workers=60,
                        max_retries=0,
                        fit='shrink')
Example #13
0
def solve(predict_config, worker_config, data_config, graph_config,
          solve_config, num_block_workers, block_size, roi_offset, roi_size,
          context, solve_block, graph_number, solve_number, queue,
          singularity_container, mount_dirs, **kwargs):

    source_roi = daisy.Roi(daisy.Coordinate(roi_offset),
                           daisy.Coordinate(roi_size))

    solve_setup_dir = Path("solve_setup_dir")

    block_write_roi = daisy.Roi((0, 0, 0), block_size)
    block_read_roi = block_write_roi.grow(context, context)
    total_roi = source_roi.grow(context, context)

    logger.info("Solving in %s", total_roi)

    daisy.run_blockwise(
        total_roi,
        block_read_roi,
        block_write_roi,
        process_function=lambda: start_worker(
            predict_config,
            worker_config,
            data_config,
            graph_config,
            solve_config,
            queue,
            singularity_container,
            mount_dirs,
            solve_block,
            solve_setup_dir,
        ),
        num_workers=num_block_workers,
        fit="shrink",
    )

    logger.info("Finished solving, parameters id is %s", solve_number)
Example #14
0
    def test_callback(self):

        total_roi = daisy.Roi((0,), (100,))
        read_roi = daisy.Roi((0,), (5,))
        write_roi = daisy.Roi((0,), (3,))

        outdir = self.path_to()

        ret = daisy.run_blockwise(
            total_roi=total_roi,
            read_roi=read_roi,
            write_roi=write_roi,
            process_function=lambda b: self.process_block(outdir, b),
            num_workers=10)

        outfiles = glob.glob(os.path.join(outdir, '*.block'))
        block_ids = sorted([
            int(path.split('/')[-1].split('.')[0])
            for path in outfiles
        ])

        self.assertTrue(ret)
        self.assertEqual(block_ids, list(range(32)))
Example #15
0
def _predict_affinities_daisy():

    import pathlib
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--input-container',
                        type=str,
                        required=True,
                        help='N5 container')
    # parser.add_argument('--input-dataset', type=str, required=True, help='3-dimensional')
    parser.add_argument('--output-container',
                        type=str,
                        required=True,
                        help='N5 container')
    parser.add_argument('--input',
                        type=str,
                        nargs=2,
                        metavar=('dataset', 'tensor'),
                        help='For example --input volumes/raw Placeholder:0',
                        required=True)
    parser.add_argument(
        '--output',
        type=str,
        action='append',
        nargs=4,
        metavar=('dataset', 'dtype', 'num_channels', 'tensor'),
        help=
        'For example --output volumes/affinities/prediction float32 3 Slice:0. num-channels<=0 means no channel axis',
        required=True)
    # parser.add_argument('--output-dataset', type=str)
    parser.add_argument('--gpus', required=True, type=int, nargs='+')
    parser.add_argument(
        '--num-workers',
        type=int,
        default=1,
        help='Number of CPU workers per GPU for parallel processing')
    parser.add_argument('--input-voxel-size',
                        nargs=3,
                        type=int,
                        default=(360, 36, 36),
                        help='zyx')
    parser.add_argument('--output-voxel-size',
                        nargs=3,
                        type=int,
                        default=(120, 108, 108),
                        help='zyx')
    parser.add_argument('--network-input-shape',
                        nargs=3,
                        type=int,
                        default=(91, 862, 862),
                        help='zyx')
    parser.add_argument('--network-output-shape',
                        nargs=3,
                        type=int,
                        default=(207, 214, 214),
                        help='zyx')
    parser.add_argument('--experiment-directory', required=True)
    parser.add_argument('--iteration', type=int, required=True)
    parser.add_argument('--weight-graph-pattern',
                        default='unet_checkpoint_%d',
                        help='Relative to experiment-directory.')
    parser.add_argument('--meta-graph-filename',
                        default='unet-inference.meta',
                        help='Relative to experiment-directory.')
    # parser.add_argument('--input-placeholder-tensor', default='Placeholder:0')
    # parser.add_argument('--output-placeholder-tensor', default='Slice:0')
    parser.add_argument('--output-compression', default='raw')
    parser.add_argument(
        '--net-io-names',
        default=None,
        required=False,
        help=
        'Look-up tensor names from json, if specified. Use specified values from --input/--output as tensor names directly, otherwise.'
    )

    args = parser.parse_args()

    input_voxel_size = np.array(args.input_voxel_size, dtype=np.float64)
    output_voxel_size = np.array(args.output_voxel_size, dtype=np.float64)

    if args.net_io_names:
        with open(args.net_io_names, 'r') as f:
            net_io_names = json.load(f)

        def tensor_name(name):
            return net_io_names[name]
    else:

        def tensor_name(name):
            return name

    experiment_directory = args.experiment_directory
    input_container = args.input_container
    output_container = pathlib.Path(args.output_container)
    output_dir = output_container.parent
    iteration = args.iteration
    network_input_shape = np.array(args.network_input_shape, dtype=np.uint64)
    network_input_shape_world = np.array(tuple(
        n * i for n, i in zip(network_input_shape, input_voxel_size)),
                                         dtype=np.float64)
    network_output_shape = np.array(args.network_output_shape, dtype=np.uint64)
    network_output_shape_world = np.array(tuple(
        n * o for n, o in zip(network_output_shape, output_voxel_size)),
                                          dtype=np.float64)
    shape_diff_world = network_input_shape_world - network_output_shape_world
    print(args.input, args.output)
    inputs = tuple((ds, tensor_name(tensor)) for ds, tensor in [args.input])
    outputs = tuple((ds, np.dtype(dtype), int(nc), tensor_name(tensor))
                    for ds, dtype, nc, tensor in args.output)

    with z5py.File(path=input_container, use_zarr_format=False, mode='r') as f:
        ds = f[inputs[0][0]]
        input_dataset_size = ds.shape
    input_dataset_size_world = np.array(tuple(
        vs * s for vs, s in zip(input_voxel_size, input_dataset_size)),
                                        dtype=np.float64)
    output_dataset_roi_world = Roi(shape=input_dataset_size_world,
                                   offset=np.array(
                                       (0, ) * len(input_dataset_size_world),
                                       dtype=np.float64))
    output_dataset_roi_world = output_dataset_roi_world.snap_to_grid(
        network_output_shape_world, mode='grow')
    output_dataset_roi = output_dataset_roi_world / tuple(output_voxel_size)

    _logger.info('input dataset size world:   %s', input_dataset_size_world)
    _logger.info('output dataset roi world:   %s', output_dataset_roi_world)
    _logger.info('output datset roi:          %s', output_dataset_roi)
    _logger.info('output network size:        %s', network_output_shape)
    _logger.info('output network size world:  %s', network_output_shape_world)

    weight_graph = os.path.join(experiment_directory,
                                args.weight_graph_pattern % iteration)
    meta_graph = os.path.join(experiment_directory, args.meta_graph_filename)

    if not os.path.isdir(str(output_container)):
        os.makedirs(str(output_container))
    with z5py.File(str(output_container), use_zarr_format=False) as f:

        for output_dataset, dtype, num_channels, tensor in outputs:

            ds = f.require_dataset(
                name=output_dataset,
                shape=(num_channels, ) +
                tuple(int(s) for s in output_dataset_roi.get_shape())
                if num_channels > 0 else tuple(
                    int(s) for s in output_dataset_roi.get_shape()),
                dtype=dtype,
                chunks=(1, ) + tuple(int(n) for n in network_output_shape)
                if num_channels > 0 else tuple(
                    int(n) for n in network_output_shape),
                compression='raw')
            ds.attrs['resolution'] = tuple(args.output_voxel_size[::-1])
            ds.attrs['offset'] = tuple(
                output_dataset_roi_world.get_begin()[::-1])
            workflow_info = {
                'input': {
                    'container': input_container,
                    'dataset': inputs[0][0],
                    'tensor': inputs[0][1]
                },
                'output': {
                    'tensor': tensor
                },
                'network': {
                    'experiment_directory': experiment_directory,
                    'weight_graph': weight_graph,
                    'meta_graph': meta_graph,
                    'iteration': iteration
                }
            }
            ds.attrs['workflow_info'] = workflow_info

    gpus = args.gpus
    num_workers = len(gpus)

    pipeline_factory = _default_pipeline_factory(
        input_container=input_container,
        input=inputs[0],
        output_filename=str(output_container.name),
        output_dir=str(output_dir),
        outputs=tuple((ds, tensor) for ds, _, _, tensor in outputs),
        output_compression_type=args.output_compression,
        weight_graph=weight_graph,
        meta_graph=meta_graph,
        input_voxel_size=input_voxel_size,
        output_voxel_size=output_voxel_size)

    process_function = make_process_function(
        actor_id_to_gpu_mapping=lambda id: gpus[id],
        pipeline_factory=pipeline_factory,
        input_voxel_size=input_voxel_size,
        output_voxel_size=output_voxel_size,
        outputs=tuple((ds, tensor) for ds, _, _, tensor in outputs),
        num_cpu_workers=args.num_workers)

    total_roi = output_dataset_roi_world.grow(
        amount_neg=tuple(shape_diff_world / 2),
        amount_pos=tuple(shape_diff_world / 2))
    read_roi = Roi(shape=tuple(network_input_shape_world),
                   offset=tuple(-shape_diff_world / 2))
    write_roi = Roi(shape=tuple(network_output_shape_world),
                    offset=tuple(np.array((0, ) * len(input_voxel_size))))
    _logger.info('Running blockwise!')
    _logger.info('total roi:   %s', total_roi)
    _logger.info('read  roi:   %s', read_roi)
    _logger.info('write roi:   %s', write_roi)
    daisy.run_blockwise(total_roi=total_roi,
                        read_roi=read_roi,
                        write_roi=write_roi,
                        process_function=process_function,
                        num_workers=num_workers,
                        read_write_conflict=False)
Example #16
0
def parallel_watershed(
        affs,
        rag_provider,
        block_size,
        context,
        fragments_out,
        num_workers,
        mask=None,
        fragments_in_xy=False,
        epsilon_agglomerate=0.0,
        filter_fragments=0.0,
        replace_sections=None):
    '''Extract fragments from affinities using watershed.

    Args:

        affs (`class:daisy.Array`):

            An array containing affinities.

        rag_provider (`class:SharedRagProvider`):

            A RAG provider to write nodes for extracted fragments to. This does
            not yet add adjacency edges, for that, an agglomeration method
            should be called after this function.

        block_size (``tuple`` of ``int``):

            The size of the blocks to process in parallel in world units.

        context (``tuple`` of ``int``):

            The context to consider for fragment extraction, in world units.

        fragments_out (`class:daisy.Array`):

            An array to store fragments in. Should be of ``dtype`` ``uint64``.

        num_workers (``int``):

            The number of parallel workers.

        mask (`class:daisy.Array`):

            A dataset containing a mask. If given, fragments are only extracted
            for masked-in (==1) areas.

        fragments_in_xy (``bool``):

            Whether to extract fragments for each xy-section separately.

        epsilon_agglomerate (``float``):

            Perform an initial waterz agglomeration on the extracted fragments
            to this threshold. Skip if 0 (default).

        filter_fragments (``float``):

            Filter fragments that have an average affinity lower than this
            value.

        replace_sections (``list`` of ``int``):

            Replace fragments data with zero in given sections (useful if large
            artifacts are causing issues). List of section numbers (in voxels)

    Returns:

        True, if all tasks succeeded.
    '''

    assert fragments_out.data.dtype == np.uint64

    if context is None:
        context = daisy.Coordinate((0,)*affs.roi.dims())
    else:
        context = daisy.Coordinate(context)

    total_roi = affs.roi.grow(context, context)
    read_roi = daisy.Roi((0,)*affs.roi.dims(), block_size).grow(context, context)
    write_roi = daisy.Roi((0,)*affs.roi.dims(), block_size)

    num_voxels_in_block = (write_roi/affs.voxel_size).size()

    return daisy.run_blockwise(
        total_roi,
        read_roi,
        write_roi,
        lambda b: watershed_in_block(
            affs=affs,
            block=b,
            context=context,
            rag_provider=rag_provider,
            fragments_out=fragments_out,
            num_voxels_in_block=num_voxels_in_block,
            fragments_in_xy=fragments_in_xy,
            epsilon_agglomerate=epsilon_agglomerate,
            mask=mask,
            filtered_fragments=filtered_fragments),
        lambda b: block_done(b, rag_provider),
        num_workers=num_workers,
        read_write_conflict=False,
        fit='shrink')
Example #17
0
    parser.add_argument('--num_workers', '-nw', type=int,
                        help="Number of processes to spawn",
                        default=1)
    parser.add_argument('--read_write_conflict', '-rwc', action='store_true',
                        help="Flag to not schedule overlapping blocks"
                        " at the same time. Default is false")
    args = parser.parse_args()

    ndims = len(args.total_roi_size)

    # define total region of interest (roi)
    total_roi_start = daisy.Coordinate((0,)*ndims)
    total_roi_size = daisy.Coordinate(args.total_roi_size)
    total_roi = daisy.Roi(total_roi_start, total_roi_size)

    # define block read and write rois
    block_read_size = daisy.Coordinate(args.block_read_size)
    block_write_size = daisy.Coordinate(args.block_write_size)
    context = (block_read_size - block_write_size) / 2
    block_read_roi = daisy.Roi(total_roi_start, block_read_size)
    block_write_roi = daisy.Roi(context, block_write_size)

    # call run_blockwise
    daisy.run_blockwise(
            total_roi,
            block_read_roi,
            block_write_roi,
            process_function=process_function,
            read_write_conflict=args.read_write_conflict,
            num_workers=args.num_workers)
Example #18
0
def predict_blockwise(base_dir, experiment, train_number, predict_number,
                      iteration, in_container_spec, in_container, in_dataset,
                      in_offset, in_size, out_container, db_name, db_host,
                      singularity_container, num_cpus, num_cache_workers,
                      num_block_workers, queue, mount_dirs, **kwargs):
    '''Run prediction in parallel blocks. Within blocks, predict in chunks.

    Args:

        experiment (``string``):

            Name of the experiment (cremi, fib19, fib25, ...).

        setup (``string``):

            Name of the setup to predict.

        iteration (``int``):

            Training iteration to predict from.

        raw_file (``string``):
        raw_dataset (``string``):
        auto_file (``string``):
        auto_dataset (``string``):

            Paths to the input autocontext datasets (affs or lsds). Can be None if not needed.

        out_file (``string``):

            Path to directory where zarr should be stored

        **Note:

            out_dataset no longer needed as input, build out_dataset from config
            outputs dictionary generated in mknet.py

        file_name (``string``):

            Name of output file

        block_size_in_chunks (``tuple`` of ``int``):

            The size of one block in chunks (not voxels!). A chunk corresponds
            to the output size of the network.

        num_workers (``int``):

            How many blocks to run in parallel.

        queue (``string``):

            Name of queue to run inference on (i.e slowpoke, gpu_rtx, gpu_any,
            gpu_tesla, gpu_tesla_large)
    '''

    predict_setup_dir = os.path.join(
        os.path.join(base_dir, experiment),
        "02_predict/setup_t{}_p{}".format(train_number, predict_number))
    train_setup_dir = os.path.join(os.path.join(base_dir, experiment),
                                   "01_train/setup_t{}".format(train_number))

    # from here on, all values are in world units (unless explicitly mentioned)
    # get ROI of source
    source = daisy.open_ds(in_container_spec, in_dataset)
    logger.info('Source dataset has shape %s, ROI %s, voxel size %s' %
                (source.shape, source.roi, source.voxel_size))

    # Read network config
    predict_net_config = os.path.join(predict_setup_dir, 'predict_net.json')
    with open(predict_net_config) as f:
        logger.info('Reading setup config from {}'.format(predict_net_config))
        net_config = json.load(f)
    outputs = net_config['outputs']

    # get chunk size and context
    net_input_size = daisy.Coordinate(
        net_config['input_shape']) * source.voxel_size
    net_output_size = daisy.Coordinate(
        net_config['output_shape']) * source.voxel_size
    context = (net_input_size - net_output_size) / 2
    logger.info('Network context: {}'.format(context))

    # get total input and output ROIs
    input_roi = source.roi.grow(context, context)
    output_roi = source.roi

    # create read and write ROI
    block_read_roi = daisy.Roi((0, 0, 0), net_input_size) - context
    block_write_roi = daisy.Roi((0, 0, 0), net_output_size)

    logger.info('Preparing output dataset...')

    for output_name, val in outputs.items():
        out_dims = val['out_dims']
        out_dtype = val['out_dtype']
        out_dataset = 'volumes/%s' % output_name

        ds = daisy.prepare_ds(out_container,
                              out_dataset,
                              output_roi,
                              source.voxel_size,
                              out_dtype,
                              write_roi=block_write_roi,
                              num_channels=out_dims,
                              compressor={
                                  'id': 'gzip',
                                  'level': 5
                              })

    logger.info('Starting block-wise processing...')

    client = pymongo.MongoClient(db_host)
    db = client[db_name]
    if 'blocks_predicted' not in db.list_collection_names():
        blocks_predicted = db['blocks_predicted']
        blocks_predicted.create_index([('block_id', pymongo.ASCENDING)],
                                      name='block_id')
    else:
        blocks_predicted = db['blocks_predicted']

    # process block-wise
    succeeded = daisy.run_blockwise(
        input_roi,
        block_read_roi,
        block_write_roi,
        process_function=lambda: predict_worker(
            train_setup_dir, predict_setup_dir, predict_number, train_number,
            experiment, iteration, in_container, in_dataset, out_container,
            db_host, db_name, queue, singularity_container, num_cpus,
            num_cache_workers, mount_dirs),
        check_function=lambda b: check_block(blocks_predicted, b),
        num_workers=num_block_workers,
        read_write_conflict=False,
        fit='overhang')

    if not succeeded:
        raise RuntimeError("Prediction failed for (at least) one block")
Example #19
0
    logger.info(f'Processing data to {config.out_file}/{config.out_ds_name}')

    output_dataset = daisy.prepare_ds(
            config.out_file,
            config.out_ds_name,
            total_roi=output_roi,
            voxel_size=dataset.voxel_size,
            dtype=dataset.dtype,
            write_size=block_write_roi.get_shape())

    # make task
    task = daisy.Task(
            'GaussianSmoothingTask',
            total_roi,
            block_read_roi,
            block_write_roi,
            process_function=lambda b: smooth(
                b, dataset, output_dataset, sigma=config.sigma),
            read_write_conflict=False,
            num_workers=config.num_workers,
            fit='shrink'
            )

    # run task
    ret = daisy.run_blockwise([task])

    if ret:
        logger.info("Ran all blocks successfully!")
    else:
        logger.info("Did not run all blocks successfully...")
Example #20
0
def parallel_aff_agglomerate(
        affs,
        fragments,
        rag_provider,
        block_size,
        context,
        merge_function,
        threshold,
        num_workers):
    '''Agglomerate fragments in parallel using ``waterz``.

    Args:

        affs (`class:daisy.Array`):

            An array containing affinities.

        fragments (`class:daisy.Array`):

            An array containing fragments.

        rag_provider (`class:SharedRagProvider`):

            A RAG provider to read nodes from and write found edges to.

        block_size (``tuple`` of ``int``):

            The size of the blocks to process in parallel, in world units.

        context (``tuple`` of ``int``):

            The context to consider for agglomeration, in world units.

        merge_function (``string``):

            The merge function to use for ``waterz``.

        threshold (``float``):

            Until which threshold to agglomerate.

        num_workers (``int``):

            The number of parallel workers.

    Returns:

        True, if all tasks succeeded.
    '''

    assert fragments.data.dtype == np.uint64

    shape = affs.shape[1:]
    context = daisy.Coordinate(context)

    total_roi = affs.roi.grow(context, context)
    read_roi = daisy.Roi((0,)*affs.roi.dims(), block_size).grow(context, context)
    write_roi = daisy.Roi((0,)*affs.roi.dims(), block_size)

    return daisy.run_blockwise(
        total_roi,
        read_roi,
        write_roi,
        lambda b: agglomerate_in_block(
            affs,
            fragments,
            rag_provider,
            b,
            merge_function,
            threshold),
        lambda b: block_done(b, rag_provider),
        num_workers=num_workers,
        read_write_conflict=False,
        fit='shrink')
def extract_fragments(experiment,
                      setup,
                      iteration,
                      affs_file,
                      affs_dataset,
                      fragments_file,
                      fragments_dataset,
                      block_size,
                      context,
                      db_host,
                      db_name,
                      num_workers,
                      fragments_in_xy,
                      queue,
                      epsilon_agglomerate=0,
                      mask_file=None,
                      mask_dataset=None,
                      filter_fragments=0,
                      replace_sections=None,
                      **kwargs):
    '''

    Extract fragments in parallel blocks. Requires that affinities have been
    predicted before.

    When running parallel inference, the worker files are located in the setup
    directory of each experiment since that is where the training was done and
    checkpoints are located. When running watershed (and agglomeration) in
    parallel, we call a worker file which can be located anywhere. By default,
    we assume there is a workers directory inside the current directory that
    contains worker scripts (e.g `workers/extract_fragments_worker.py`).

    Args:

        * following three params just used to build out file directory *

        experiment (``string``):

            Name of the experiment (fib25, hemi, zfinch, ...).

        setup (``string``):

            Name of the setup to predict (setup01, setup02, ...).

        iteration (``int``):

            Training iteration.

        affs_file (``string``):

            Path to file (zarr/n5) where predictions are stored.

        affs_dataset (``string``):

            Predictions dataset to use (e.g 'volumes/affs'). If using a scale pyramid,
            will try scale zero assuming stored in directory `s0` (e.g
            'volumes/affs/s0').

        fragments_file (``string``):

            Path to file (zarr/n5) to store fragments (supervoxels) - generally
            a good idea to store in the same place as affs.

        fragments_dataset (``string``):

            Name of dataset to write fragments (supervoxels) to (e.g
            'volumes/fragments').

        block_size (``tuple`` of ``int``):

            The size of one block in world units (must be multiple of voxel
            size).

        context (``tuple`` of ``int``):

            The context to consider for fragment extraction in world units.

        db_host (``string``):

            Name of MongoDB client.

        db_name (``string``):

            Name of MongoDB database to use (for logging successful blocks in
            check function and writing nodes to the region adjacency graph).

        num_workers (``int``):

            How many blocks to run in parallel.

        fragments_in_xy (``bool``):

            Whether to extract fragments for each xy-section separately.

        queue (``string``):

            Name of cpu queue to use (e.g local)

        epsilon_agglomerate (``float``, optional):

            Perform an initial waterz agglomeration on the extracted fragments
            to this threshold. Skip if 0 (default).

        mask_file (``string``, optional):

            Path to file (zarr/n5) containing mask.

        mask_dataset (``string``, optional):

            Name of mask dataset. Data should be uint8 where 1 == masked in, 0
            == masked out.

        filter_fragments (``float``, optional):

            Filter fragments that have an average affinity lower than this
            value.

        replace_sections (``list`` of ``int``, optional):

            Replace fragments data with zero in given sections (useful if large
            artifacts are causing issues). List of section numbers (in voxels).

    '''

    logging.info(f"Reading affs from {affs_file}")

    try:
        affs = daisy.open_ds(affs_file, affs_dataset)
    except:
        affs_dataset = affs_dataset + '/s0'
        source = daisy.open_ds(affs_file, affs_dataset)

    network_dir = os.path.join(experiment, setup, str(iteration))

    client = pymongo.MongoClient(db_host)
    db = client[db_name]

    if 'blocks_extracted' not in db.list_collection_names():
        blocks_extracted = db['blocks_extracted']
        blocks_extracted.create_index([('block_id', pymongo.ASCENDING)],
                                      name='block_id')
    else:
        blocks_extracted = db['blocks_extracted']

    # prepare fragments dataset. By default use same roi as affinities, change
    # roi if extracting fragments in cropped region
    fragments = daisy.prepare_ds(fragments_file,
                                 fragments_dataset,
                                 affs.roi,
                                 affs.voxel_size,
                                 np.uint64,
                                 daisy.Roi((0, 0, 0), block_size),
                                 compressor={
                                     'id': 'zlib',
                                     'level': 5
                                 })

    context = daisy.Coordinate(context)
    total_roi = affs.roi.grow(context, context)

    read_roi = daisy.Roi((0, ) * affs.roi.dims(),
                         block_size).grow(context, context)
    write_roi = daisy.Roi((0, ) * affs.roi.dims(), block_size)

    #get number of voxels in block
    num_voxels_in_block = (write_roi / affs.voxel_size).size()

    #blockwise watershed
    daisy.run_blockwise(
        total_roi=total_roi,
        read_roi=read_roi,
        write_roi=write_roi,
        process_function=lambda: start_worker(
            affs_file, affs_dataset, fragments_file, fragments_dataset,
            db_host, db_name, context, fragments_in_xy, queue, network_dir,
            epsilon_agglomerate, mask_file, mask_dataset, filter_fragments,
            replace_sections, num_voxels_in_block),
        check_function=lambda b: check_block(blocks_extracted, b),
        num_workers=num_workers,
        read_write_conflict=False,
        fit='shrink')
Example #22
0
def agglomerate(experiment, setup, iteration, affs_file, affs_dataset,
                fragments_file, fragments_dataset, block_size, context,
                db_host, db_name, num_workers, queue, merge_function,
                **kwargs):
    '''

    Agglomerate in parallel blocks. Requires that affinities and supervoxels
    have been generated.

    Args:

        * following three params just used to build out file directory *

        experiment (``string``):

            Name of the experiment (fib25, hemi, zfinch, ...).

        setup (``string``):

            Name of the setup to predict (setup01, setup02, ...).

        iteration (``int``):

            Training iteration.

        affs_file (``string``):

            Path to file (zarr/n5) where predictions are stored.

        affs_dataset (``string``):

            Predictions dataset to use (e.g 'volumes/affs').

        fragments_file (``string``):

            Path to file (zarr/n5) where fragments (supervoxels) are stored.

        fragments_dataset (``string``):

            Name of fragments (supervoxels) dataset (e.g 'volumes/fragments').

        block_size (``tuple`` of ``int``):

            The size of one block in world units (must be multiple of voxel
            size).

        context (``tuple`` of ``int``):

            The context to consider for fragment extraction in world units.

        db_host (``string``):

            Name of MongoDB client.

        db_name (``string``):

            Name of MongoDB database to use (for logging successful blocks in
            check function and reading nodes from + writing edges to the region
            adjacency graph).

        num_workers (``int``):

            How many blocks to run in parallel.

        merge_function (``string``):

            Symbolic name of a merge function. See dictionary in worker script
            (workers/agglomerate_worker.py).

    '''

    logging.info(f"Reading affs from {affs_file}")
    affs = daisy.open_ds(affs_file, affs_dataset, mode='r')

    network_dir = os.path.join(experiment, setup, str(iteration),
                               merge_function)

    logging.info(f"Reading fragments from {fragments_file}")
    fragments = daisy.open_ds(fragments_file, fragments_dataset, mode='r')

    client = pymongo.MongoClient(db_host)
    db = client[db_name]

    blocks_agglomerated = 'blocks_agglomerated_' + merge_function

    if blocks_agglomerated not in db.list_collection_names():
        blocks_agglomerated = db[blocks_agglomerated]
        blocks_agglomerated.create_index([('block_id', pymongo.ASCENDING)],
                                         name='block_id')
    else:
        blocks_agglomerated = db[blocks_agglomerated]

    context = daisy.Coordinate(context)
    total_roi = affs.roi.grow(context, context)

    read_roi = daisy.Roi((0, ) * affs.roi.dims(),
                         block_size).grow(context, context)
    write_roi = daisy.Roi((0, ) * affs.roi.dims(), block_size)

    daisy.run_blockwise(
        total_roi,
        read_roi,
        write_roi,
        process_function=lambda: start_worker(
            affs_file, affs_dataset, fragments_file, fragments_dataset,
            db_host, db_name, queue, merge_function, network_dir),
        check_function=lambda b: check_block(blocks_agglomerated, b),
        num_workers=num_workers,
        read_write_conflict=False,
        fit='shrink')
Example #23
0
output_path = '../temp/overlap_counts'
if os.path.isdir(output_path):
    shutil.rmtree(output_path)
os.makedirs(output_path)

# TODO parametrize block size
block_size = config['block_size']
total_roi = daisy.Roi(offset=config['roi_offset'], shape=config['roi_shape'])

logger.info('Start blockwise processing')
start = time.time()
daisy.run_blockwise(
    total_roi=total_roi,
    read_roi=daisy.Roi(offset=(0, 0, 0), shape=block_size),
    write_roi=daisy.Roi(offset=(0, 0, 0), shape=block_size),
    process_function=lambda block: overlap_in_block(block=block,
                                                    fragments=fragments,
                                                    groundtruth=groundtruth,
                                                    tmp_path=output_path),
    fit='shrink',
    num_workers=config['num_workers'],
    read_write_conflict=False,
    max_retries=0)

# TODO parametrize
logger.debug('num blocks: {}'.format(
    np.prod(np.ceil(np.array(config['roi_shape']) / np.array(block_size)))))

frag_to_gt = overlap_reduce(output_path)
pickle.dump(frag_to_gt, open('frag_to_gt.pickle', 'wb'))
Example #24
0
def predict_blockwise(config_file, iteration):
    config = {
        "solve_context": daisy.Coordinate((2, 100, 100, 100)),
        "num_workers": 16,
        "data_dir": '../01_data',
        "setups_dir": '../02_setups',
    }
    master_config = load_config(config_file)
    config.update(master_config['general'])
    config.update(master_config['predict'])
    sample = config['sample']
    data_dir = config['data_dir']
    setup = config['setup']
    # solve_context = daisy.Coordinate(master_config['solve']['context'])
    setup_dir = os.path.abspath(os.path.join(config['setups_dir'], setup))
    voxel_size, source_roi = get_source_roi(data_dir, sample)
    predict_roi = source_roi

    # limit to specific frames, if given
    if 'limit_to_roi_offset' in config or 'frames' in config:
        if 'frames' in config:
            frames = config['frames']
            logger.info("Limiting prediction to frames %s" % str(frames))
            begin, end = frames
            frames_roi = daisy.Roi((begin, None, None, None),
                                   (end - begin, None, None, None))
            predict_roi = predict_roi.intersect(frames_roi)
        if 'limit_to_roi_offset' in config:
            assert 'limit_to_roi_shape' in config,\
                    "Must specify shape and offset in config file"
            limit_to_roi = daisy.Roi(
                daisy.Coordinate(config['limit_to_roi_offset']),
                daisy.Coordinate(config['limit_to_roi_shape']))
            predict_roi = predict_roi.intersect(limit_to_roi)

        # Given frames and rois are the prediction region,
        # not the solution region
        # predict_roi = target_roi.grow(solve_context, solve_context)
        # predict_roi = predict_roi.intersect(source_roi)

    # get context and total input and output ROI
    with open(os.path.join(setup_dir, 'test_net_config.json'), 'r') as f:
        net_config = json.load(f)
    net_input_size = net_config['input_shape']
    net_output_size = net_config['output_shape_2']
    net_input_size = daisy.Coordinate(net_input_size) * voxel_size
    net_output_size = daisy.Coordinate(net_output_size) * voxel_size
    context = (net_input_size - net_output_size) / 2

    # expand predict roi to multiple of block write_roi
    predict_roi = predict_roi.snap_to_grid(net_output_size, mode='grow')

    input_roi = predict_roi.grow(context, context)
    output_roi = predict_roi

    # prepare output zarr, if necessary
    if 'output_zarr' in config:
        output_zarr = config['output_zarr']
        parent_vectors_ds = 'volumes/parent_vectors'
        cell_indicator_ds = 'volumes/cell_indicator'
        output_path = os.path.join(setup_dir, output_zarr)
        logger.debug("Preparing zarr at %s" % output_path)
        daisy.prepare_ds(output_path,
                         parent_vectors_ds,
                         output_roi,
                         voxel_size,
                         dtype=np.float32,
                         write_size=net_output_size,
                         num_channels=3)
        daisy.prepare_ds(output_path,
                         cell_indicator_ds,
                         output_roi,
                         voxel_size,
                         dtype=np.float32,
                         write_size=net_output_size,
                         num_channels=1)

    # create read and write ROI
    block_write_roi = daisy.Roi((0, 0, 0, 0), net_output_size)
    block_read_roi = block_write_roi.grow(context, context)

    logger.info("Following ROIs in world units:")
    logger.info("Input ROI       = %s" % input_roi)
    logger.info("Block read  ROI = %s" % block_read_roi)
    logger.info("Block write ROI = %s" % block_write_roi)
    logger.info("Output ROI      = %s" % output_roi)

    logger.info("Starting block-wise processing...")

    # process block-wise
    if 'db_name' in config:
        daisy.run_blockwise(
            input_roi,
            block_read_roi,
            block_write_roi,
            process_function=lambda: predict_worker(config_file, iteration),
            check_function=lambda b: check_function(b, 'predict', config[
                'db_name'], config['db_host']),
            num_workers=config['num_workers'],
            read_write_conflict=False,
            max_retries=0,
            fit='valid')
    else:
        daisy.run_blockwise(
            input_roi,
            block_read_roi,
            block_write_roi,
            process_function=lambda: predict_worker(config_file, iteration),
            num_workers=config['num_workers'],
            read_write_conflict=False,
            max_retries=0,
            fit='valid')
Example #25
0
def predict_blockwise(
        train_dir,
        iteration,
        in_container,
        input_roi_in_pixels,
        out_container,
        # output_roi_in_pixels,
        num_workers,
        client,
        block_size_in_chunks=(1, 1, 1),
        raw_dataset='volumes/raw',
        affs_dataset='volumes/prediction/affinities',
        net_io_names_json='net_io_names.json',
        unet_inference_meta='unet_inference.meta'):

    setup_dir = os.path.dirname(os.path.realpath(__file__))

    # TODO: change to predict graph
    with open(os.path.join(train_dir, net_io_names_json), 'r') as f:
        config = json.load(f)

    raw = ArrayKey(_raw_key)
    affs = ArrayKey(_affs_key)

    raw_source = daisy.open_ds(in_container, raw_dataset)

    # input_voxel_size = Coordinate((360, 36, 36))
    # output_voxel_size = Coordinate((120, 108, 108))
    input_voxel_size = Coordinate((120, 12, 12)) * 3
    output_voxel_size = Coordinate((40, 36, 36)) * 3
    input_shape = (91, 862, 862)
    output_shape = (209, 214, 214)

    net_input_chunk_size, net_output_chunk_size, context = get_chunk_sizes(
        input_shape, output_shape, input_voxel_size, output_voxel_size)

    # compute sizes of blocks
    block_output_size = net_output_chunk_size * block_size_in_chunks
    block_input_size = block_output_size + context + context

    input_roi = (input_roi_in_pixels * input_voxel_size).grow(context, context)
    # output_roi  = output_roi_in_pixels * output_voxel_size

    block_input_roi = Roi((0, 0, 0), block_input_size) - context
    block_output_roi = Roi((0, 0, 0), block_output_size)

    _logger.debug('input_roi_in_pixels %s', input_roi_in_pixels)
    _logger.debug('input_voxel_size    %s', input_voxel_size)
    _logger.debug('output_voxel_size   %s', output_voxel_size)
    _logger.debug('input shape         %s', input_shape)
    _logger.debug('output shape        %s', output_shape)
    _logger.debug('block_input_size    %s', block_input_size)
    _logger.debug('block_output_size   %s', block_output_size)
    _logger.debug('block_input_roi     %s', block_input_roi)
    _logger.debug('block_output_roi    %s', block_output_roi)
    _logger.debug('input_roi           %s', input_roi)
    # _logger.debug('output_roi        %s', output_roi)

    cwd = os.getcwd()

    def predict_in_block(block):

        from distributed import get_worker

        read_roi = block.read_roi
        write_roi = block.write_roi
        predict_script = '/groups/saalfeld/home/hanslovskyp/experiments/quasi-isotropic/predict/predict.py'
        cuda_visible_devices = get_worker().cuda_visible_devices
        predict_scripts_args = ''

        name = 'predict-%s-%s' % (write_roi.get_begin(), write_roi.get_size())
        log_file = os.path.join(cwd, '%s.log' % name)
        pythonpath = ':'.join([
            '%s/workspace-pycharm/u-net/gunpowder' % _HOME,
            '%s/workspace-pycharm/u-net/CNNectome' % _HOME,
            '/groups/saalfeld/home/papec/Work/my_projects/z5/bld/python'
        ])
        pythonpath_export_str = 'export PYTHONPATH=%s:$PYTHONPATH' % pythonpath

        daisy.call([
            'nvidia-docker', 'run', '--rm', '-u',
            os.getuid(), '-v', '/groups/turaga:/groups/turaga:rshared', '-v',
            '/groups/saalfeld:/groups/saalfeld:rshared', '-v',
            '/nrs/saalfeld:/nrs/saalfeld:rshared', '-w', cwd, '--name', name,
            'neptunes5thmoon/gunpowder:v0.3-pre6-dask1'
            '/bin/bash', '-c',
            '"export CUDA_VISIBLE_DEVICES=%s; %s; python -u %s %s 2>&1 > %s"' %
            (cuda_visible_devices, pythonpath_export_str, predict_script,
             predict_script_args, log_file)
        ])

    def check_block(block):
        _logger.debug("Checking if block %s is complete...", block.write_roi)
        ds = daisy.open_ds(out_container, affs_dataset)
        center_values = ds[block.write_roi.get_center()]
        s = np.sum(center_values)
        _logger.debug("Sum of center values in %s is %f", block.write_roi, s)
        return s != 0

    # TODO set client
    daisy.run_blockwise(input_roi,
                        block_input_roi,
                        block_output_roi,
                        process_function=predict_in_block,
                        check_function=check_block,
                        num_workers=num_workers,
                        processes=False,
                        read_write_conflict=False,
                        client=client)
Example #26
0
            help='The output container, defaults to be the same as in_file+.zarr'
            )
        ap.add_argument(
            "--out_ds_name", type=str, default=None,
            help='The name of the dataset, defaults to be in_ds_name'
            )
        ap.add_argument(
            "--chunk_shape_voxel", type=int, help='The size of a chunk in voxels',
            nargs='+', default=None
            )
        ap.add_argument(
            "--max_voxel_count", type=int, default=256*1024,
            help='If chunk_shape_voxel is not given, use this value to calculate'
            'a near isotropic chunk shape',
            )
        ap.add_argument(
            "--roi_offset", type=int, help='',
            nargs='+', default=None)
        ap.add_argument(
            "--roi_shape", type=int, help='',
            nargs='+', default=None)

        config = HDF2ZarrTask.parse_args(ap)
        task = HDF2ZarrTask(config)
        daisy_task = task.prepare_task()
        done = daisy.run_blockwise([daisy_task])
        if done:
            logger.info("Ran all blocks successfully!")
        else:
            logger.info("Did not run all blocks successfully...")
def extract_segmentation(fragments_file,
                         fragments_dataset,
                         edges_collection,
                         threshold,
                         block_size,
                         out_file,
                         out_dataset,
                         num_workers,
                         roi_offset=None,
                         roi_shape=None,
                         run_type=None,
                         **kwargs):
    '''

    Args:

        fragments_file (``string``):

            Path to file (zarr/n5) containing fragments (supervoxels).

        fragments_dataset (``string``):

            Name of fragments dataset (e.g `volumes/fragments`)

        edges_collection (``string``):

            The name of the MongoDB database edges collection to use.

        threshold (``float``):

            The threshold to use for generating a segmentation.

        block_size (``tuple`` of ``int``):

            The size of one block in world units (must be multiple of voxel
            size).

        out_file (``string``):

            Path to file (zarr/n5) to write segmentation to.

        out_dataset (``string``):

            Name of segmentation dataset (e.g `volumes/segmentation`).

        num_workers (``int``):

            How many workers to use when reading the region adjacency graph
            blockwise.

        roi_offset (array-like of ``int``, optional):

            The starting point (inclusive) of the ROI. Entries can be ``None``
            to indicate unboundedness.

        roi_shape (array-like of ``int``, optional):

            The shape of the ROI. Entries can be ``None`` to indicate
            unboundedness.

        run_type (``string``, optional):

            Can be used to direct luts into directory (e.g testing, validation,
            etc).

    '''

    # open fragments
    fragments = daisy.open_ds(fragments_file, fragments_dataset)

    total_roi = fragments.roi
    if roi_offset is not None:
        assert roi_shape is not None, "If roi_offset is set, roi_shape " \
                                      "also needs to be provided"
        total_roi = daisy.Roi(offset=roi_offset, shape=roi_shape)

    read_roi = daisy.Roi((0, ) * 3, daisy.Coordinate(block_size))
    write_roi = read_roi

    logging.info("Preparing segmentation dataset...")
    segmentation = daisy.prepare_ds(out_file,
                                    out_dataset,
                                    total_roi,
                                    voxel_size=fragments.voxel_size,
                                    dtype=np.uint64,
                                    write_roi=write_roi)

    lut_filename = f'seg_{edges_collection}_{int(threshold*100)}'

    lut_dir = os.path.join(fragments_file, 'luts', 'fragment_segment')

    if run_type:
        lut_dir = os.path.join(lut_dir, run_type)
        logging.info(f"Run type set, using luts from {run_type} data")

    lut = os.path.join(lut_dir, lut_filename + '.npz')

    assert os.path.exists(lut), f"{lut} does not exist"

    logging.info("Reading fragment-segment LUT...")

    lut = np.load(lut)['fragment_segment_lut']

    logging.info(f"Found {len(lut[0])} fragments in LUT")

    num_segments = len(np.unique(lut[1]))
    logging.info(f"Relabelling fragments to {num_segments} segments")

    daisy.run_blockwise(total_roi,
                        read_roi,
                        write_roi,
                        lambda b: segment_in_block(
                            b, fragments_file, segmentation, fragments, lut),
                        fit='shrink',
                        num_workers=num_workers)
Example #28
0
def solve_blockwise(
        db_host,
        db_name,
        sample,
        parameters,  # list of TrackingParameters
        num_workers=8,
        frames=None,
        limit_to_roi=None,
        from_scratch=False,
        data_dir='../01_data',
        cell_cycle_key=None,
        **kwargs):

    block_size = daisy.Coordinate(parameters[0].block_size)
    context = daisy.Coordinate(parameters[0].context)
    # block size and context must be the same for all parameters!
    for i in range(len(parameters)):
        assert list(block_size) == parameters[i].block_size,\
                "%s not equal to %s" %\
                (block_size, parameters[i].block_size)
        assert list(context) == parameters[i].context

    voxel_size, source_roi = get_source_roi(data_dir, sample)

    # determine parameters id from database
    graph_provider = CandidateDatabase(
        db_name,
        db_host)
    parameters_id = [graph_provider.get_parameters_id(p) for p in parameters]

    if from_scratch:
        for pid in parameters_id:
            graph_provider.set_parameters_id(pid)
            graph_provider.reset_selection()

    # limit to specific frames, if given
    if frames:
        logger.info("Solving in frames %s" % frames)
        begin, end = frames
        crop_roi = daisy.Roi(
            (begin, None, None, None),
            (end - begin, None, None, None))
        source_roi = source_roi.intersect(crop_roi)
    # limit to roi, if given
    if limit_to_roi:
        logger.info("limiting to roi %s" % str(limit_to_roi))
        source_roi = source_roi.intersect(limit_to_roi)

    block_write_roi = daisy.Roi(
        (0, 0, 0, 0),
        block_size)
    block_read_roi = block_write_roi.grow(
        context,
        context)
    total_roi = source_roi.grow(
        context,
        context)

    logger.info("Solving in %s", total_roi)

    param_names = ['solve_' + str(_id) for _id in parameters_id]
    if len(parameters_id) > 1:
        # check if set of parameters is already done
        step_name = 'solve_' + str(hash(frozenset(parameters_id)))
        if check_function_all_blocks(step_name, db_name, db_host):
            logger.info("Param set with name %s already completed. Exiting",
                        step_name)
            return True
    else:
        step_name = 'solve_' + str(parameters_id[0])
    # Check each individual parameter to see if it is done
    # if it is, remove it from the list
    done_indices = []
    for _id, name in zip(parameters_id, param_names):
        if check_function_all_blocks(name, db_name, db_host):
            logger.info("Params with id %d already completed. Removing", _id)
            done_indices.append(parameters_id.index(_id))
    for index in done_indices[::-1]:
        del parameters_id[index]
        del parameters[index]
        del param_names[index]
    logger.debug(parameters_id)
    if len(parameters_id) == 0:
        logger.info("All parameters in set already completed. Exiting")
        return True

    success = daisy.run_blockwise(
        total_roi,
        block_read_roi,
        block_write_roi,
        process_function=lambda b: solve_in_block(
            db_host,
            db_name,
            parameters,
            b,
            parameters_id,
            solution_roi=source_roi,
            cell_cycle_key=cell_cycle_key),
        # Note: in the case of a set of parameters,
        # we are assuming that none of the individual parameters are
        # half done and only checking the hash for each block
        check_function=lambda b: check_function(
            b,
            step_name,
            db_name,
            db_host),
        num_workers=num_workers,
        fit='overhang')
    if success:
        # write all done to individual parameters and set
        if len(param_names) > 1:
            write_done_all_blocks(
                step_name,
                db_name,
                db_host)
        for name in param_names:
            write_done_all_blocks(
                name,
                db_name,
                db_host)
    logger.info("Finished solving")
    return success
def extract_segmentation(fragments_file,
                         fragments_dataset,
                         edges_collection,
                         threshold,
                         out_file,
                         out_dataset,
                         num_workers,
                         lut_fragment_segment,
                         roi_offset=None,
                         roi_shape=None,
                         run_type=None,
                         **kwargs):

    # open fragments
    fragments = daisy.open_ds(fragments_file, fragments_dataset)

    total_roi = fragments.roi
    if roi_offset is not None:
        assert roi_shape is not None, "If roi_offset is set, roi_shape " \
                                      "also needs to be provided"
        total_roi = daisy.Roi(offset=roi_offset, shape=roi_shape)

    read_roi = daisy.Roi((0, 0, 0), (5000, 5000, 5000))
    write_roi = daisy.Roi((0, 0, 0), (5000, 5000, 5000))

    logging.info("Preparing segmentation dataset...")
    segmentation = daisy.prepare_ds(out_file,
                                    out_dataset,
                                    total_roi,
                                    voxel_size=fragments.voxel_size,
                                    dtype=np.uint64,
                                    write_roi=write_roi)

    lut_filename = 'seg_%s_%d' % (edges_collection, int(threshold * 100))

    lut_dir = os.path.join(fragments_file, lut_fragment_segment)

    if run_type:
        lut_dir = os.path.join(lut_dir, run_type)
        logging.info("Run type set, using luts from %s data" % run_type)

    lut = os.path.join(lut_dir, lut_filename + '.npz')

    assert os.path.exists(lut), "%s does not exist" % lut

    start = time.time()
    logging.info("Reading fragment-segment LUT...")
    lut = np.load(lut)['fragment_segment_lut']
    logging.info("%.3fs" % (time.time() - start))

    logging.info("Found %d fragments in LUT" % len(lut[0]))

    daisy.run_blockwise(total_roi,
                        read_roi,
                        write_roi,
                        lambda b: segment_in_block(
                            b, fragments_file, segmentation, fragments, lut),
                        fit='shrink',
                        num_workers=num_workers,
                        processes=True,
                        read_write_conflict=False)
Example #30
0
                    help="Size of block read region",
                    default=[20, 200, 200])
    ap.add_argument('--block_write_size',
                    '-w',
                    nargs='+',
                    help="Size of block write region",
                    default=[18, 180, 180])

    config = GaussianSmoothingTask.parse_args(ap)

    config1 = copy.deepcopy(config)
    config1['out_ds_name'] = 'volumes/raw_smoothed'
    daisy_task1 = GaussianSmoothingTask(config1,
                                        task_id='Gaussian1').prepare_task()

    # here we reuse parameters but set the output dataset of the previous
    # task as input
    config2 = copy.deepcopy(config)
    config2['in_ds_name'] = 'volumes/raw_smoothed'
    config2['out_ds_name'] = 'volumes/raw_smoothed_smoothed'
    daisy_task2 = GaussianSmoothingTask(
        config2,
        task_id='Gaussian2').prepare_task(upstream_tasks=[daisy_task1])

    done = daisy.run_blockwise([daisy_task1, daisy_task2])

    if done:
        print("Ran all blocks successfully!")
    else:
        print("Did not run all blocks successfully...")