Ejemplo n.º 1
0
            def post_changed_blocks(old_seg, new_seg):
                # If we post the whole volume, we'll be overwriting blocks that haven't changed,
                # wasting space in DVID (for duplicate blocks stored in the child uuid).
                # Instead, we need to only post the blocks that have changed.

                # So, can't just do this:
                # output_service.write_subvolume(new_seg, box[0], scale)

                seg_diff = (old_seg != new_seg)
                block_diff = view_as_blocks(seg_diff, 3 * (block_width, ))

                changed_block_map = block_diff.any(axis=(3, 4, 5)).nonzero()
                changed_block_corners = box[0] + np.transpose(
                    changed_block_map) * block_width

                changed_blocks = view_as_blocks(
                    new_seg, 3 * (block_width, ))[changed_block_map]
                encoded_blocks = encode_labelarray_blocks(
                    changed_block_corners, changed_blocks)

                mgr = output_service.resource_manager_client
                with mgr.access_context(output_service.server, True, 1,
                                        changed_blocks.nbytes):
                    post_labelmap_blocks(*output_service.instance_triple,
                                         None,
                                         encoded_blocks,
                                         scale,
                                         downres=False,
                                         noindexing=True,
                                         throttle=False,
                                         is_raw=True)
Ejemplo n.º 2
0
def test_post_labelmap_blocks(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup
    instance_info = DvidInstanceInfo(dvid_server, dvid_repo,
                                     'segmentation-scratch')

    # Write some random data and read it back.
    blocks = np.random.randint(10, size=(3, 64, 64, 64), dtype=np.uint64)
    corners_zyx = [[0, 0, 0], [0, 64, 0], [0, 0, 64]]

    post_labelmap_blocks(dvid_server, dvid_repo, 'segmentation-scratch',
                         corners_zyx, blocks, 0)
    complete_voxels = fetch_labelmap_voxels(*instance_info, [(0, 0, 0),
                                                             (128, 128, 128)],
                                            supervoxels=True)

    assert (complete_voxels[0:64, 0:64, 0:64] == blocks[0]).all()
    assert (complete_voxels[0:64, 64:128, 0:64] == blocks[1]).all()
    assert (complete_voxels[0:64, 0:64, 64:128] == blocks[2]).all()
Ejemplo n.º 3
0
def setup_dvid_segmentation_input(setup_dvid_repo, random_segmentation):
    dvid_address, repo_uuid = setup_dvid_repo

    input_segmentation_name = 'labelmapcopy-segmentation-input'
    output_segmentation_name = 'labelmapcopy-segmentation-output'

    partial_output_segmentation_name = 'labelmapcopy-segmentation-partial-output'

    max_scale = 3
    already_exists = False

    try:
        create_labelmap_instance(dvid_address,
                                 repo_uuid,
                                 input_segmentation_name,
                                 max_scale=max_scale)
        create_labelmap_instance(dvid_address,
                                 repo_uuid,
                                 partial_output_segmentation_name,
                                 max_scale=max_scale)
    except HTTPError as ex:
        if ex.response is not None and 'already exists' in ex.response.content.decode(
                'utf-8'):
            already_exists = True

    expected_vols = {}
    for scale in range(1 + max_scale):
        if scale == 0:
            scaled_vol = random_segmentation
        else:
            scaled_vol = downsample(scaled_vol, 2, 'labels-numba')
        expected_vols[scale] = scaled_vol

        if not already_exists:
            scaled_box = round_box([(0, 0, 0), scaled_vol.shape], 64, 'out')
            aligned_vol = np.zeros(scaled_box[1], np.uint64)
            overwrite_subvol(aligned_vol, [(0, 0, 0), scaled_vol.shape],
                             scaled_vol)
            post_labelmap_voxels(dvid_address,
                                 repo_uuid,
                                 input_segmentation_name, (0, 0, 0),
                                 aligned_vol,
                                 scale=scale)

    if not already_exists:
        # Create a 'partial' output volume that is the same (bitwise) as the input except for some blocks.
        scaled_box = np.array([(0, 0, 0), random_segmentation.shape])
        scaled_box[1, -1] = 192
        for scale in range(1 + max_scale):
            scaled_box = round_box(scaled_box // (2**scale), 64, 'out')
            raw_blocks = fetch_labelmap_voxels(dvid_address,
                                               repo_uuid,
                                               input_segmentation_name,
                                               scaled_box,
                                               scale,
                                               supervoxels=True,
                                               format='raw-response')
            post_labelmap_blocks(dvid_address,
                                 repo_uuid,
                                 partial_output_segmentation_name, [(0, 0, 0)],
                                 raw_blocks,
                                 scale,
                                 is_raw=True)

        block = np.random.randint(1_000_000,
                                  1_000_010,
                                  size=(64, 64, 64),
                                  dtype=np.uint64)
        post_labelmap_voxels(dvid_address,
                             repo_uuid,
                             partial_output_segmentation_name, (0, 128, 64),
                             block,
                             0,
                             downres=True)

    partial_vol = fetch_labelmap_voxels(dvid_address,
                                        repo_uuid,
                                        partial_output_segmentation_name,
                                        [(0, 0, 0), random_segmentation.shape],
                                        supervoxels=True)

    template_dir = tempfile.mkdtemp(suffix="labelmapcopy-template")

    config_text = textwrap.dedent(f"""\
        workflow-name: labelmapcopy
        cluster-type: {CLUSTER_TYPE}
         
        input:
          dvid:
            server: {dvid_address}
            uuid: {repo_uuid}
            segmentation-name: {input_segmentation_name}
            supervoxels: true
           
          geometry:
            message-block-shape: [512,64,64]
            available-scales: [0,1,2,3]
 
        output:
          dvid:
            server: {dvid_address}
            uuid: {repo_uuid}
            segmentation-name: {output_segmentation_name}
            supervoxels: true
            disable-indexing: true
            create-if-necessary: true
        
        labelmapcopy:
          slab-shape: [512,128,64]
          dont-overwrite-identical-blocks: true
    """)

    with open(f"{template_dir}/workflow.yaml", 'w') as f:
        f.write(config_text)

    yaml = YAML()
    with StringIO(config_text) as f:
        config = yaml.load(f)

    return template_dir, config, expected_vols, partial_vol, dvid_address, repo_uuid, output_segmentation_name, partial_output_segmentation_name
    def write_subvolume(self, subvolume, offset_zyx, scale=0):
        req_bytes = self._dtype_nbytes * np.prod(subvolume.shape)

        offset_zyx = np.asarray(offset_zyx)
        shape_zyx = np.asarray(subvolume.shape)
        box_zyx = np.array([offset_zyx, offset_zyx + shape_zyx])

        instance_name = self._instance_name

        if self._instance_type.endswith('blk') and scale > 0:
            # Grayscale multi-scale is achieved via multiple instances
            instance_name = f"{instance_name}_{scale}"
            scale = 0

        if self._instance_type == 'labelmap':
            assert self.supervoxels, "You cannot post data to a labelmap instance unless you specify 'supervoxels: true' in your config."

        is_block_aligned = (box_zyx % self.block_width == 0).all()

        assert (not self.enable_downres) or (scale == 0), \
            "When using enable-downres, you can only write scale-0 data."

        try:
            # Labelarray data can be posted very efficiently if the request is block-aligned
            if self._instance_type in ('labelarray',
                                       'labelmap') and is_block_aligned:
                # Encode and post in two separate steps, so that the compression
                # can be peformed before obtaining a token from the resource manager.
                encoded = encode_labelarray_volume(offset_zyx, subvolume,
                                                   self.gzip_level,
                                                   not self.write_empty_blocks)
                with self._resource_manager_client.access_context(
                        self._server, False, 1, req_bytes):
                    # Post pre-encoded data with 'is_raw'
                    post_labelmap_blocks(self._server,
                                         self._uuid,
                                         instance_name,
                                         None,
                                         encoded,
                                         scale,
                                         self.enable_downres,
                                         self.disable_indexing,
                                         self._throttle,
                                         is_raw=True)
            else:
                assert not self.enable_downres, \
                    "Can't use enable-downres: You are attempting to post non-block-aligned data."

                with self._resource_manager_client.access_context(
                        self._server, False, 1, req_bytes):
                    post_raw(self._server,
                             self._uuid,
                             instance_name,
                             offset_zyx,
                             subvolume,
                             throttle=self._throttle,
                             mutate=not self.disable_indexing)

        except Exception as ex:
            # In cluster scenarios, a chained 'raise ... from ex' traceback
            # doesn't get fully transmitted to the driver,
            # so we simply append this extra info to the current exception
            # rather than using exception chaining.
            # Also log it now so it at least appears in the worker log.
            # See: https://github.com/dask/dask/issues/4384
            import traceback, io
            sio = io.StringIO()
            traceback.print_exc(file=sio)
            logger.log(logging.ERROR, sio.getvalue())

            host = socket.gethostname()
            msg = f"Host {host}: Failed to write subvolume: offset_zyx = {offset_zyx.tolist()}, shape = {subvolume.shape}"

            ex.args += (msg, )
            raise
Ejemplo n.º 5
0
        def copy_box(box, scale):
            assert not record_only or scale == 0
            box = round_box(box, 64, 'out')
            box_shape = (box[1] - box[0])

            # Read input blocks
            with mgr_client.access_context(input_service.server, True, 1,
                                           np.prod(box_shape)):
                input_raw_blocks = fetch_labelmap_voxels(
                    *input_service.instance_triple,
                    box,
                    scale,
                    False,
                    input_service.supervoxels,
                    format='raw-response')

            # If we're just recording, parse and return
            if scale == 0 and record_only:
                _input_spans, input_labels = parse_labelarray_data(
                    input_raw_blocks, extract_labels=True)
                return list(set(chain(*input_labels.values())))

            # If not checking the output, just copy input to output
            if not check_existing:
                with mgr_client.access_context(output_service.server, False, 1,
                                               np.prod(box_shape)):
                    post_labelmap_blocks(*output_service.instance_triple,
                                         None,
                                         input_raw_blocks,
                                         scale,
                                         output_service.enable_downres,
                                         output_service.disable_indexing,
                                         False,
                                         is_raw=True)

                if scale == 0 and record_labels:
                    _input_spans, input_labels = parse_labelarray_data(
                        input_raw_blocks, extract_labels=True)
                    return list(set(chain(*input_labels.values())))
                return []

            # Read from output
            with mgr_client.access_context(output_service.server, True, 1,
                                           np.prod(box_shape)):
                output_raw_blocks = fetch_labelmap_voxels(
                    *output_service.instance_triple,
                    box,
                    scale,
                    False,
                    output_service.supervoxels,
                    format='raw-response')

            # If no differences, no need to parse
            if (input_raw_blocks == output_raw_blocks):
                return []

            input_spans = parse_labelarray_data(input_raw_blocks,
                                                extract_labels=False)
            output_spans = parse_labelarray_data(output_raw_blocks,
                                                 extract_labels=False)

            # Compare block IDs
            input_ids = set(input_spans.keys())
            output_ids = set(output_spans.keys())

            missing_from_output = input_ids - output_ids
            missing_from_input = output_ids - input_ids
            common_ids = input_ids & output_ids

            for block_id in missing_from_input:
                # FIXME: We should pass this in the result so it can be logged in the client, not the worker.
                logger.error(
                    f"Not overwriting block-id: {block_id}.  It doesn't exist in the input."
                )

            # Filter the input blocks so only the new/different ones remain
            filtered_input_list = []
            for block_id in missing_from_output:
                start, stop = input_spans[block_id]
                filtered_input_list.append(
                    (block_id, input_raw_blocks[start:stop]))

            filtered_output_list = []
            for block_id in common_ids:
                in_start, in_stop = input_spans[block_id]
                out_start, out_stop = output_spans[block_id]

                in_buf = input_raw_blocks[in_start:in_stop]
                out_buf = output_raw_blocks[out_start:out_stop]

                if in_buf != out_buf:
                    filtered_input_list.append((block_id, in_buf))
                    filtered_output_list.append((block_id, out_buf))

            # Sort filtered blocks so they appear in the same order in which we received them.
            filtered_input_list = sorted(
                filtered_input_list, key=lambda k_v: input_spans[k_v[0]][0])

            # Post them
            filtered_input_buf = b''.join(
                [buf for (_, buf) in filtered_input_list])
            with mgr_client.access_context(output_service.server, False, 1,
                                           np.prod(box_shape)):
                post_labelmap_blocks(*output_service.instance_triple,
                                     None,
                                     filtered_input_buf,
                                     scale,
                                     output_service.enable_downres,
                                     output_service.disable_indexing,
                                     False,
                                     is_raw=True)

            if scale == 0 and record_labels:
                filtered_output_buf = b''.join(
                    [buf for (_, buf) in filtered_output_list])

                _, filtered_input_labels = parse_labelarray_data(
                    filtered_input_buf, extract_labels=True)
                _, filtered_output_labels = parse_labelarray_data(
                    filtered_output_buf, extract_labels=True)

                input_set = set(chain(*filtered_input_labels.values()))
                output_set = set(chain(*filtered_output_labels.values()))
                return list(input_set - output_set)

            return []