Ejemplo n.º 1
0
def main():
    # Create the destination instance if necessary.
    dst_instances = fetch_repo_instances(*dst_node, 'annotation')
    if dst_syn not in dst_instances:
        logger.info(f"Creating instance '{dst_syn}'")
        create_instance(*dst_node, dst_syn, 'annotation')

    # Check to see if the sync already exists; add it if necessary
    syn_info = fetch_instance_info(*dst_node, dst_syn)
    if len(syn_info["Base"]["Syncs"]) == 0:
        logger.info(f"Adding a sync to '{dst_syn}' from '{dst_seg}'")
        post_sync(*dst_node, dst_syn, [dst_seg])
    elif syn_info["Base"]["Syncs"][0] != dst_seg:
        other_seg = syn_info["Base"]["Syncs"][0]
        raise RuntimeError(
            f"Can't create a sync to '{dst_seg}'. "
            f"Your instance is already sync'd to a different segmentation: {other_seg}"
        )

    # Fetch segmentation extents
    bounding_box_zyx = fetch_volume_box(*src_node, src_seg).tolist()

    # Break into block-aligned chunks (boxes) that are long in the X direction
    # (optimal access pattern for dvid read/write)
    boxes = boxes_from_grid(bounding_box_zyx, (256, 256, 6400), clipped=True)

    # Use a process pool to copy the chunks in parallel.
    compute_parallel(copy_syn_blocks,
                     boxes,
                     processes=PROCESSES,
                     ordered=False)
Ejemplo n.º 2
0
 def _read_pyramid_depth(self):
     """
     Read the MaxDownresLevel from the output instance we'll be writing to,
     and verify that it matches our config for pyramid-depth.
     """
     info = fetch_instance_info(*self.output_service.instance_triple)
     existing_depth = int(info["Extended"]["MaxDownresLevel"])
     return existing_depth
def test_dvid_volume_service_grayscale(setup_dvid_repo, disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-grayscale'

    volume = np.random.randint(100, size=(256, 192, 128), dtype=np.uint8)
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          grayscale-name: {instance_name}
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        if scale == 0:
            assert instance_name in repo_instances
            assert repo_instances[instance_name] == 'uint8blk'
        else:
            assert f"{instance_name}_{scale}" in repo_instances
            assert repo_instances[f"{instance_name}_{scale}"] == 'uint8blk'

        vol = downsample(volume, 2**scale,
                         'label')  # label downsampling is easier to test with
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint8)
        overwrite_subvol(aligned_vol, [(0, 0, 0), aligned_shape], aligned_vol)
        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()
Ejemplo n.º 4
0
def _check_instance(server, uuid, instance):
    """
    Verify that the instance is a valid destination for the LabelIndices we're about to ingest.
    """
    if fetch_repo_instances(server, uuid)[instance] != 'labelmap':
        raise RuntimeError(f"DVID instance is not a labelmap: {instance}")

    info = fetch_instance_info(server, uuid, instance)
    bz, by, bx = info["Extended"]["BlockSize"]
    assert bz == by == bx == 64, \
        "The code below makes the hard-coded assumption that the instance block width is 64."
Ejemplo n.º 5
0
def post_empty_meshes(server,
                      uuid,
                      instance='segmentation_sv_meshes',
                      svs=[],
                      permit_large=False,
                      check_sizes=True):
    """
    Given a list of supervoxel ids (presumably for SMALL supervoxels),
    post an empty .drc file to the tarsupervoxels instance for each one.
    
    (By convention, we do not generally store meshes for very tiny meshes.
    Instead, we store empty mesh files (i.e. 0 bytes) in their place, and
    our proofreading tools understand this convention.)
    
    Since this function is generally supposed to be used with only small supervoxels,
    it will refuse to write empty files for any supervoxels larger than 100 voxels,
    unless you pass permit_large=True.
    """
    import tarfile
    from io import BytesIO
    from tqdm import tqdm
    from neuclease.dvid import fetch_sizes, post_load

    # Determine segmentation instance
    info = fetch_instance_info(server, uuid, instance)
    segmentation_instance = info["Base"]["Syncs"][0]

    sizes = None
    if check_sizes:
        sizes = fetch_sizes(server,
                            uuid,
                            segmentation_instance,
                            svs,
                            supervoxels=True)
        if (sizes > 1000).any():
            msg = "Some of those supervoxels are large ({sizes.max()} voxels)."
            if permit_large:
                logger.warning(msg)
            else:
                msg = f"Error: {msg} Pass permit_large=True if you really mean it."
                raise RuntimeError(msg)

    bio = BytesIO()
    tf = tarfile.TarFile('empty-svs.tar', 'w', bio)
    for sv in tqdm(svs):
        tf.addfile(tarfile.TarInfo(f'{sv}.drc'), BytesIO())

    post_load(server, uuid, instance, bio.getvalue())
    return sizes
Ejemplo n.º 6
0
def check_tarsupervoxels_status_via_exists(server, uuid, tsv_instance, bodies, seg_instance=None, mapping=None, kafka_msgs=None):
    """
    For the given bodies, query the given tarsupervoxels instance and return a
    DataFrame indicating which supervoxels are 'missing' from the instance.
    
    Bodies that no longer exist in the segmentation instance are ignored.

    This function downloads the complete mapping in advance and uses it to determine
    which supervoxels belong to each body.  Then uses the /exists endpoint to
    query for missing supervoxels, rather than /missing, which incurs a disk
    read in DVID.
    """
    if seg_instance is None:
        # Determine segmentation instance
        info = fetch_instance_info(server, uuid, tsv_instance)
        seg_instance = info["Base"]["Syncs"][0]
    
    if mapping is None:
        mapping = fetch_complete_mappings(server, uuid, seg_instance, kafka_msgs=kafka_msgs)
    
    # Filter out bodies we don't care about,
    # and append unmapped (singleton/identity) bodies
    _bodies = set(bodies)
    mapping = pd.DataFrame(mapping).query('body in @_bodies')['body'].copy()
    unmapped_bodies = _bodies - set(mapping)
    unmapped_bodies = np.fromiter(unmapped_bodies, np.uint64)
    singleton_mapping = pd.Series(index=unmapped_bodies, data=unmapped_bodies, dtype=np.uint64)
    mapping = pd.concat((mapping, singleton_mapping))
    
    assert mapping.index.values.dtype == np.uint64
    assert mapping.values.dtype == np.uint64
    
    # Faster than mapping.loc[], apparently
    mapper = LabelMapper(mapping.index.values, mapping.values)

    statuses = fetch_exists(server, uuid, tsv_instance, mapping.index, batch_size=10_000, processes=16)
    missing_svs = statuses[~statuses].index.values
    assert missing_svs.dtype == np.uint64
    missing_bodies = mapper.apply(missing_svs, True)

    missing_df = pd.DataFrame({'sv': missing_svs, 'body': missing_bodies})
    assert missing_df['sv'].dtype == np.uint64
    assert missing_df['body'].dtype == np.uint64
    
    # Return a series, indexed by sv
    return missing_df.set_index('sv')['body']
Ejemplo n.º 7
0
    def _sanitize_config(self):
        """
        Replace a few config values with reasonable defaults if necessary.
        Must be called after the input/output services are initialized.
        """
        options = self.config["masksegmentation"]

        if options["max-pyramid-scale"] == -1:
            info = fetch_instance_info(*self.output_service.instance_triple)
            existing_depth = int(info["Extended"]["MaxDownresLevel"])
            options["max-pyramid-scale"] = existing_depth

        if options["dilate-roi"] > 0 and options["erode-roi"] > 0:
            raise RuntimeError(
                "Can't dilate ROI and erode it, too.  Choose one or the other."
            )

        if options["dilate-segmentation"] > 0 and not options["invert-mask"]:
            raise RuntimeError(
                "Can't use 'dilate-segmentation'. "
                "The segmentation isn't downloaded unless 'invert-mask' is used."
            )
Ejemplo n.º 8
0
def copy_synapses(src_loc, dst_loc, processes):
    """
    See caveats in the module docstring above.
    """
    src_loc = Location(*src_loc)
    dst_loc = Location(*dst_loc)

    # Create the destination instance if necessary.
    dst_instances = fetch_repo_instances(*dst_loc[:2], 'annotation')
    if dst_loc.syn_instance not in dst_instances:
        logger.info(f"Creating instance '{dst_loc.syn_instance}'")
        create_instance(*dst_loc, 'annotation')

    # Check to see if the sync already exists; add it if necessary
    syn_info = fetch_instance_info(*dst_loc[:3])
    if len(syn_info["Base"]["Syncs"]) == 0:
        logger.info(
            f"Adding a sync to '{dst_loc.syn_instance}' from '{dst_loc.seg_instance}'"
        )
        post_sync(*dst_loc[:3], [dst_loc.seg_instance])
    elif syn_info["Base"]["Syncs"][0] != dst_loc.seg_instance:
        other_seg = syn_info["Base"]["Syncs"][0]
        raise RuntimeError(
            f"Can't create a sync to '{dst_loc.seg_instance}'. "
            f"Your instance is already sync'd to a different segmentation: {other_seg}"
        )

    # Fetch segmentation extents
    bounding_box_zyx = fetch_volume_box(*src_loc[:2],
                                        src_loc.seg_instance).tolist()

    # Break into block-aligned chunks (boxes) that are long in the X direction
    # (optimal access pattern for dvid read/write)
    boxes = boxes_from_grid(bounding_box_zyx, (256, 256, 6400), clipped=True)

    # Use a process pool to copy the chunks in parallel.
    fn = partial(copy_syn_blocks, src_loc, dst_loc)
    compute_parallel(fn, boxes, processes=processes, ordered=False)
Ejemplo n.º 9
0
    def init_services(self):
        """
        Initialize the input and output services,
        and fill in 'auto' config values as needed.
        """
        input_config = self.config["input"]
        output_config = self.config["output"]
        mgr_config = self.config["resource-manager"]

        self.resource_mgr_client = ResourceManagerClient( mgr_config["server"], mgr_config["port"] )
        self.input_service = VolumeService.create_from_config( input_config, self.resource_mgr_client )

        # If we need to create a dvid instance for the output,
        # default to the same pyramid depth as the input
        if ("dvid" in input_config) and ("dvid" in output_config) and (output_config["dvid"]["creation-settings"]["max-scale"] == -1):
            info = fetch_instance_info(*self.input_service.base_service.instance_triple)
            pyramid_depth = info['Extended']['MaxDownresLevel']
            output_config["dvid"]["creation-settings"]["max-scale"] = pyramid_depth

        replace_default_entries(output_config["geometry"]["bounding-box"], self.input_service.bounding_box_zyx[:, ::-1])

        self.output_service = VolumeService.create_from_config( output_config, self.resource_mgr_client )
        assert isinstance( self.output_service, VolumeServiceWriter ), \
            "The output format you are attempting to use does not support writing"

        if isinstance(self.output_service.base_service, DvidVolumeService):
            if not self.output_service.base_service.supervoxels:
                raise RuntimeError("Can't write to a non-supervoxels output service.")

            if not self.output_service.base_service.disable_indexing:
                logger.warning("******************************************************************************")
                logger.warning("Your output config does not specify 'disable-indexing', which means DVID will "
                               "attempt to index all voxels as they are written to the volume. "
                               "For large volumes, this is NOT recommended!"
                               "(You should run a separate job to recompute the labelindex afterwards.)")
                logger.warning("******************************************************************************")

        logger.info(f"Output bounding box: {self.output_service.bounding_box_zyx[:,::-1].tolist()}")
Ejemplo n.º 10
0
    def execute(self):
        self._sanitize_config()

        input_config = self.config["input"]["dvid"]
        options = self.config["decimatemeshes"]
        resource_config = self.config["resource-manager"]

        skip_existing = options['skip-existing']
        output_dir = options["output-directory"]
        os.makedirs(output_dir, exist_ok=True)

        resource_mgr_client = ResourceManagerClient(resource_config["server"],
                                                    resource_config["port"])

        server = input_config["server"]
        uuid = input_config["uuid"]
        tsv_instance = input_config["tarsupervoxels-instance"]

        # Determine segmentation instance
        info = fetch_instance_info(server, uuid, tsv_instance)
        seg_instance = info["Base"]["Syncs"][0]
        input_format = info["Extended"]["Extension"]

        if np.array(options["rescale"] == 1.0).all(
        ) and options["format"] == "ngmesh" and input_format != "ngmesh":
            logger.warning(
                "*** You are converting to ngmesh format, but you have not specified a rescale parameter! ***"
            )

        def process_body(body_id):
            output_path = f'{output_dir}/{body_id}.{options["format"]}'
            if skip_existing and os.path.exists(output_path):
                return (body_id, 0, 0.0, 0, 'skipped', 0)

            with resource_mgr_client.access_context(input_config["server"],
                                                    True, 1, 0):
                try:
                    mutid = fetch_mutation_id(server, uuid, seg_instance,
                                              body_id)
                except HTTPError:
                    # FIXME: Better to log the exception strings to a file
                    return (body_id, 0, 0.0, 0, 'error-mutid', 0)

                try:
                    tar_bytes = fetch_tarfile(server, uuid, tsv_instance,
                                              body_id)
                except HTTPError:
                    # FIXME: Better to log the exception strings to a file
                    return (body_id, 0, 0.0, 0, 'error-fetch', mutid)

            try:
                vertex_count, fraction, orig_vertices = \
                    decimate_existing_mesh( server, uuid, tsv_instance, body_id,
                                            options["decimation"], options["max-vertices"], options["rescale"], options["format"],
                                            output_path,
                                            tar_bytes=tar_bytes )
            except:
                return (body_id, 0, 0.0, 0, 'error-generate', mutid)

            return (body_id, vertex_count, fraction, orig_vertices, 'success',
                    mutid)

        bodies = self._load_body_list(options["bodies"], server, uuid,
                                      seg_instance)

        # Choose more partitions than cores, so that early finishers have the opportunity to steal work.
        bodies_bag = dask.bag.from_sequence(bodies,
                                            npartitions=self.total_cores() *
                                            10)

        with Timer(f"Decimating {len(bodies)} meshes", logger):
            stats = bodies_bag.map(process_body).compute()

        stats_df = pd.DataFrame(stats,
                                columns=[
                                    'body', 'vertices', 'decimation',
                                    'orig_vertices', 'result', 'mutid'
                                ])
        stats_df['uuid'] = uuid

        stats_df.to_csv('mesh-stats.csv', index=False, header=True)
        np.save('mesh-stats.npy', stats_df.to_records(index=False))

        failed_df = stats_df.query('result != "success"')
        if len(failed_df) > 0:
            logger.warning(
                f"{len(failed_df)} meshes could not be generated. See mesh-stats.csv"
            )
            logger.warning(f"Results:\n{stats_df['result'].value_counts()}")
Ejemplo n.º 11
0
    def _prepare_output(self):
        """
        If necessary, create the output directory or
        DVID instance so that meshes can be written to it.
        """
        input_cfg = self.config["input"]
        output_cfg = self.config["output"]
        options = self.config["svdecimate"]

        ## directory output
        if 'directory' in output_cfg:
            # Convert to absolute so we can chdir with impunity later.
            output_cfg['directory'] = os.path.abspath(output_cfg['directory'])
            os.makedirs(output_cfg['directory'], exist_ok=True)
            return

        ##
        ## DVID output (either keyvalue or tarsupervoxels)
        ##
        (instance_type,) = output_cfg.keys()

        server = output_cfg[instance_type]['server']
        uuid = output_cfg[instance_type]['uuid']
        instance = output_cfg[instance_type]['instance']

        # If the output server or uuid is left blank,
        # we assume it should be auto-filled from the input settings.
        if server == "" or uuid == "":
            assert "dvid" in input_cfg
            if server == "":
                output_cfg[instance_type]['server'] = input_cfg["dvid"]["server"]

            if uuid == "":
                output_cfg[instance_type]['uuid'] = input_cfg["dvid"]["uuid"]

        # Resolve in case a branch was given instead of a specific uuid
        server = output_cfg[instance_type]['server']
        uuid = output_cfg[instance_type]['uuid']
        uuid = resolve_ref(server, uuid)

        if is_locked(server, uuid):
            info = fetch_server_info(server)
            if "Mode" in info and info["Mode"] == "allow writes on committed nodes":
                logger.warn(f"Output is a locked node ({uuid}), but server is in full-write mode. Proceeding.")
            elif os.environ.get("DVID_ADMIN_TOKEN", ""):
                logger.warn(f"Output is a locked node ({uuid}), but you defined DVID_ADMIN_TOKEN. Proceeding.")
            else:
                raise RuntimeError(f"Can't write to node {uuid} because it is locked.")

        if instance_type == 'tarsupervoxels' and not self.input_is_labelmap_supervoxels():
            msg = ("You shouldn't write to a tarsupervoxels instance unless "
                   "you're reading supervoxels from a labelmap input.\n"
                   "Use a labelmap input source, and set supervoxels: true")
            raise RuntimeError(msg)

        existing_instances = fetch_repo_instances(server, uuid)
        if instance in existing_instances:
            # Instance exists -- nothing to do.
            return

        if not output_cfg[instance_type]['create-if-necessary']:
            msg = (f"Output instance '{instance}' does not exist, "
                   "and your config did not specify create-if-necessary")
            raise RuntimeError(msg)

        assert instance_type in ('tarsupervoxels', 'keyvalue')

        ## keyvalue output
        if instance_type == "keyvalue":
            create_instance(server, uuid, instance, "keyvalue", tags=["type=meshes"])
            return

        ## tarsupervoxels output
        sync_instance = output_cfg["tarsupervoxels"]["sync-to"]

        if not sync_instance:
            # Auto-fill a default 'sync-to' instance using the input segmentation, if possible.
            info = fetch_instance_info(*[input_cfg["dvid"][k] for k in ("server", "uuid", "tarsupervoxels-instance")])
            syncs = info['Base']['Syncs']
            if syncs:
                sync_instance = syncs[0]

        if not sync_instance:
            msg = ("Can't create a tarsupervoxels instance unless "
                   "you specify a 'sync-to' labelmap instance name.")
            raise RuntimeError(msg)

        if sync_instance not in existing_instances:
            msg = ("Can't sync to labelmap instance '{sync_instance}': "
                   "it doesn't exist on the output server.")
            raise RuntimeError(msg)

        create_tarsupervoxel_instance(server, uuid, instance, sync_instance, options["format"])
def test_dvid_volume_service_labelmap(setup_dvid_repo, random_segmentation,
                                      disable_auto_retry):
    server, uuid = setup_dvid_repo
    instance_name = 'test-dvs-labelmap'

    volume = random_segmentation[:256, :192, :128]
    max_scale = 2
    voxel_dimensions = [4.0, 4.0, 32.0]

    config_text = textwrap.dedent(f"""\
        dvid:
          server: {server}
          uuid: {uuid}
          segmentation-name: {instance_name}
          supervoxels: true
          
          create-if-necessary: true
          creation-settings:
            max-scale: {max_scale}
            voxel-size: {voxel_dimensions}
       
        geometry:
          bounding-box: [[0,0,0], {list(volume.shape[::-1])}]
          message-block-shape: [64,64,64]
    """)

    yaml = YAML()
    with StringIO(config_text) as f:
        volume_config = yaml.load(f)

    assert instance_name not in fetch_repo_instances(server, uuid)

    service = VolumeService.create_from_config(volume_config)

    repo_instances = fetch_repo_instances(server, uuid)

    assert instance_name in repo_instances
    assert repo_instances[instance_name] == 'labelmap'

    info = fetch_instance_info(server, uuid, instance_name)
    assert info["Extended"]["VoxelSize"] == voxel_dimensions

    scaled_volumes = {}
    for scale in range(max_scale + 1):
        vol = downsample(volume, 2**scale, 'label')
        aligned_shape = (np.ceil(np.array(vol.shape) / 64) * 64).astype(int)
        aligned_vol = np.zeros(aligned_shape, np.uint64)
        overwrite_subvol(aligned_vol, [(0, 0, 0), vol.shape], vol)

        service.write_subvolume(aligned_vol, (0, 0, 0), scale)
        scaled_volumes[scale] = aligned_vol

    box = np.array([[40, 80, 40], [240, 160, 100]])
    for scale in range(max_scale + 1):
        scaled_box = box // 2**scale
        vol = service.get_subvolume(scaled_box, scale)
        assert (vol == extract_subvol(scaled_volumes[scale], scaled_box)).all()

    #
    # Check sparse coords function
    #
    labels = list({*pd.unique(volume.reshape(-1))} - {0})
    brick_coords_df = service.sparse_brick_coords_for_labels(labels)

    assert brick_coords_df.columns.tolist() == ['z', 'y', 'x', 'label']
    assert set(brick_coords_df['label'].values) == set(labels), \
        "Some labels were missing from the sparse brick coords!"

    def ndi(shape):
        return np.indices(shape).reshape(len(shape), -1).transpose()

    expected_df = pd.DataFrame(ndi(volume.shape), columns=[*'zyx'])

    expected_df['label'] = volume.reshape(-1)
    expected_df['z'] //= 64
    expected_df['y'] //= 64
    expected_df['x'] //= 64
    expected_df = expected_df.drop_duplicates()
    expected_df['z'] *= 64
    expected_df['y'] *= 64
    expected_df['x'] *= 64

    expected_df = expected_df.query('label != 0')

    expected_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)
    brick_coords_df.sort_values(['z', 'y', 'x', 'label'], inplace=True)

    expected_df.reset_index(drop=True, inplace=True)
    brick_coords_df.reset_index(drop=True, inplace=True)

    assert expected_df.shape == brick_coords_df.shape
    assert (brick_coords_df == expected_df).all().all()

    #
    # Check sample_labels()
    #
    points = [np.random.randint(d, size=(10, )) for d in vol.shape]
    points = np.transpose(points)
    labels = service.sample_labels(points)
    assert (labels == volume[(*points.transpose(), )]).all()
Ejemplo n.º 13
0
def main():
    configure_default_logging()

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--no-downres', action='store_true')
    parser.add_argument('--only-within-roi')
    parser.add_argument('--not-within-roi')
    parser.add_argument('dvid_server')
    parser.add_argument('uuid')
    parser.add_argument('labelmap_instance')
    parser.add_argument('sparsevol_files', nargs='+')
    args = parser.parse_args()

    instance_info = (args.dvid_server, args.uuid, args.labelmap_instance)

    assert not args.only_within_roi or not args.not_within_roi, \
        "Can't supply both --only-within-roi and --not-within-roi.  Pick one or the other (or neither)."

    roi = args.only_within_roi or args.not_within_roi
    invert_roi = (args.not_within_roi is not None)

    if roi:
        roi_mask, mask_box = fetch_roi(args.dvid_server,
                                       args.uuid,
                                       roi,
                                       format='mask')
        roi_sbm = SparseBlockMask(roi_mask, mask_box * (2**5),
                                  2**5)  # ROIs are provided at scale 5
    else:
        roi_sbm = None

    # Ideally, we would choose the max label for the node we're writing to,
    # but the /maxlabel endpoint doesn't work for all nodes
    # instead, we'll use the repo-wide maxlabel from the /info JSON.
    #maxlabel = fetch_maxlabel(args.dvid_server, args.uuid, args.labelmap_instance)
    maxlabel = fetch_instance_info(
        args.dvid_server, args.uuid,
        args.labelmap_instance)["Extended"]["MaxRepoLabel"]

    for i, path in enumerate(args.sparsevol_files):
        maxlabel += 1
        name = os.path.split(path)[1]
        prefix_logger = PrefixedLogger(logger, f"Vol #{i:02d} {name}: ")

        with Timer(f"Pasting {name} as {maxlabel}", logger):
            overwritten_labels = overwrite_sparsevol(*instance_info, maxlabel,
                                                     path, roi_sbm, invert_roi,
                                                     args.no_downres,
                                                     prefix_logger)

        results_path = os.path.splitext(path)[0] + '.json'
        with open(results_path, 'w') as f:
            results = {
                'new-label': maxlabel,
                'overwritten_labels': sorted(overwritten_labels)
            }
            json.dump(results, f, indent=2, cls=NumpyConvertingEncoder)

    logger.info(f"Done.")
Ejemplo n.º 14
0
    def execute(self):
        self._sanitize_config()
        self._prepare_output()

        input_config = self.config["input"]["dvid"]
        output_config = self.config["output"]
        options = self.config["svdecimate"]
        resource_config = self.config["resource-manager"]

        resource_mgr_client = ResourceManagerClient(resource_config["server"], resource_config["port"])

        server = input_config["server"]
        uuid = input_config["uuid"]
        tsv_instance = input_config["tarsupervoxels-instance"]

        bodies = load_body_list(options["bodies"], False)

        # Determine segmentation instance
        info = fetch_instance_info(server, uuid, tsv_instance)
        input_format = info["Extended"]["Extension"]

        output_format = options["format"]

        if np.array(options["rescale"] == 1.0).all() and output_format == "ngmesh" and input_format != "ngmesh":
            logger.warning("*** You are converting to ngmesh format, but you have not specified a rescale parameter! ***")

        decimation_lib = options["decimation-library"]
        max_sv_vertices = options["max-sv-vertices"]
        max_body_vertices = options["max-body-vertices"]
        num_procs = options["processes-per-body"]

        def process_body(body_id):
            with resource_mgr_client.access_context( input_config["server"], True, 1, 0 ):
                tar_bytes = fetch_tarfile(server, uuid, tsv_instance, body_id)

            sv_meshes = Mesh.from_tarfile(tar_bytes, concatenate=False)
            sv_meshes = {int(os.path.splitext(name)[0]): m for name, m in sv_meshes.items()}

            total_body_vertices = sum([len(m.vertices_zyx) for m in sv_meshes.values()])
            decimation = min(1.0, max_body_vertices / total_body_vertices)

            try:
                _process_sv = partial(process_sv, decimation, decimation_lib, max_sv_vertices, output_format)
                if num_procs <= 1:
                    output_table = [*starmap(_process_sv, sv_meshes.items())]
                else:
                    output_table = compute_parallel(_process_sv, sv_meshes.items(), starmap=True, processes=num_procs, ordered=False, show_progress=False)

                cols = ['sv', 'orig_vertices', 'final_vertices', 'final_decimation', 'effective_decimation', 'mesh_bytes']
                output_df = pd.DataFrame(output_table, columns=cols)
                output_df['body'] = body_id
                output_df['error'] = ""
                write_sv_meshes(output_df, output_config, output_format, resource_mgr_client)
            except Exception as ex:
                svs = [*sv_meshes.keys()]
                orig_vertices = [len(m.vertices_zyx) for m in sv_meshes.values()]
                output_df = pd.DataFrame({'sv': svs, 'orig_vertices': orig_vertices})
                output_df['final_vertices'] = -1
                output_df['final_decimation'] = -1
                output_df['effective_decimation'] = -1
                output_df['mesh_bytes'] = -1
                output_df['body'] = body_id
                output_df['error'] = str(ex)

            return output_df.drop(columns=['mesh_bytes'])

        futures = self.client.map(process_body, bodies)

        # Support synchronous testing with a fake 'as_completed' object
        if hasattr(self.client, 'DEBUG'):
            ac = as_completed_synchronous(futures, with_results=True)
        else:
            ac = distributed.as_completed(futures, with_results=True)

        try:
            stats = []
            for f, r in tqdm_proxy(ac, total=len(futures)):
                stats.append(r)
                if (r['error'] != "").any():
                    body = r['body'].iloc[0]
                    logger.warning(f"Body {body} failed!")

        finally:
            stats_df = pd.concat(stats)
            stats_df.to_csv('mesh-stats.csv', index=False, header=True)
            with open('mesh-stats.pkl', 'wb') as f:
                pickle.dump(stats_df, f)
Ejemplo n.º 15
0
def extract_assignment_fragments(server,
                                 uuid,
                                 syn_instance,
                                 edge_table,
                                 boi_rois=None,
                                 min_tbars_in_roi=2,
                                 min_psds_in_roi=10,
                                 fragment_rois=None,
                                 processes=16,
                                 *,
                                 request_processes=None,
                                 synapse_table=None,
                                 boi_table=None,
                                 seg_instance=None,
                                 update_edges=False):
    """
    Using the edge table emitted from the FindAdjacencies workflow,
    emit a table of "fragments" (sets of bodies) which connect two
    "bodies of interest" (BOIs, described below).
    
    The emitted fragments be used to generate
    focused assignments and/or merge review assignments.
    
    Essentially, we construct an adjacency graph from the edge table,
    and then search for any paths that can connect two BOIs:
    
        BOI - b - b - b - ... - b - BOI
    
    The path from one BOI to another is called a "fragment".
    
    If the path contains only the two BOIs and no other bodies, then 
    the two BOIs are directly adjacent, with no intervening bodies:
    
        BOI - BOI

    In those cases, it is possible to create a "focused proofreading" task
    from the body pair.  In all other cases, you can create a "merge review"
    task for the fragment.  See the following functions:
    
        generate_mergereview_assignments_from_df()
        neuclease.focused.asssignments.generate_focused_assignments()
    
    Exactly which bodies are considered "bodies of interest" is determined
    by the presence of T-bars and PSDs within the specified ROIs (boi_rois,
     if provided), thresholded by the given criteria.  If no boi_rois are
    specified, then all T-bars and PSDs in the given bodies are counted.
    
    Additionally, the final fragment set can be filtered to exclude
    fragments that travel outside of a given list of ROIs.

    See the explanation of the edge_table parameter for an explanation of
    the FindAdjacencies output.

    Tip:
        To visualize the adjacency graph for a subset of rows in either
        the input edge table or the output tables, see display_graph(), below.

    Args:
        server, uuid, syn_instance:
            DVID synapse (annotation) instance
    
        edge_table:
            A DataFrame as explained below, or a filepath to a
            .npy file that can be loaded into one.
        
            The FindAdjacencies workflow finds the sites at which
            preselected bodies are adjacent to one another.
            
            The user provides a list of body "groups" which are analyzed independently.
            In addition to "direct" adjacencies between touching bodies (distance=1.0),
            the workflow can be configured to also search for near-adjacencies,
            in which bodies come close to each other without physically touching (distance > 1.0).
            Each adjacency is referred to as an edge, and the results are emitted as
            an "edge table" with the following columns:
        
                [label_a, label_b, za, ya, xa, zb, yb, xb, distance, group, group_cc]
        
            with the following definitions:
            
                label_a, label_b:
                    Body IDs (assuming the FindAdjacencies workflow was executed on a body input source)
    
                 za, ya, xa, zb, yb, xb:
                    Coordinates that fall within the body on each side of the edge.
    
                distance:
                    The euclidean distance between the two coordinates.
                    For "direct" adjacencies, distance is always 1.0.
                    For "nearby" adjacencies, distance is always > 1.0.
    
                group:
                    The original body groups the user selected for adjacency analysis.
                    The exact group ID values are arbitrary (not necessarily consecutive),
                    and were provided by the user that ran the FindAdjacencies workflow.
                    Note that one body may exist in more than one group.
                
                group_cc:
                    An independent subgraph is constructed for each group (from the group's 'edges').
                    A connected components analysis is then performed on each subgraph,
                    and a unique ID is assigned to each CC.
    
                    Although the connected components are computed on each group in isolation,
                    the assigned group_cc values are unique across all of the groups in the table.
    
                    The group_cc values are otherwise arbitrary. (That is, they aren't necessarily
                    consecutive, or related to the other CC IDs in their group.)
                    For example, group 123 might be found to contain two connected components,
                    labeled group_cc=53412 and group_cc=82344

        boi_rois:
            Optional.  List of ROI instance names.
            If provided, only T-bars and PSDs that fall within the given list of ROIs will be
            counted when determining which bodies are considered BOIs.  Otherwise, all synapses
            in the volume are considered.
        
        min_tbars_in_roi, min_psds_in_roi:
            The criteria for determining what counts as a BOI.
            As indicated in the argument names, only synapse points WITHIN the ROI(s)
            will be counted towards these requirements. 
        
        fragment_rois:
            Optional.  Any fragments that extend outside of the given list of ROIs
            will be discarded from the result, even though they contained BOIs
            that matched the BOI criteria.
        
        processes:
            Various steps in this function can be parallelized.
            This specifies how much parallelism to use.

        request_processes:
            By default, requests to DVID are also made in parallel,
            with parallelism set by the 'processes' argument.
            But if you would like to reduce (or increase) the processes used when
            fetching from dvid (e.g. to reduce burden on the dvid server),
            specify a separate parallelism level via request_processes.
        
        synapse_table:
            Optional.  If you already fetched the synapses from DVID
            (via fetch_synapses_in_batches() or fetch_roi_synapses()),
            you can provide it here (or a file path to a stored .npy file),
            in which case this function will not need to fetch the synapses from DVID.
            (Not needed at all if you're providing your own boi_table.)
        
        boi_table:
            Optional.
            Normally this function computes the boi_table directly from the synapse points,
            but if you already have it handy, you can pass it in here.
            It will still be filtered according to min_tbars_in_roi and min_psds_in_roi,
            so the BOIs used will be accurate as long as the table contains all of the BOIs
            you might be interested in, or more.
        
        seg_instance:
            By default, this BOIs in this table will be extracted from the segmentation
            instance that is associated with the given synapse annotation instance.
            But if you would like to use a different segmentation instance, provide it here.
        
        update_edges:
            If True, re-fetch the body label under each coordinate in the table,
            and re-select the "best" (most central) edge for body pairs with multiple edges.
            This takes a while to run. It's only necessary if your edge table is likely to
            be out-of-date with respect to the given UUID.
    
    Returns:
        (focused_fragments_df, mr_fragments_df, bois), where:
        
        focused_fragments_df:
            A DataFrame consisting of rows suitable for "focused proofreading",
            i.e. every row (edge) is a single-edge fragment.
        
        mr_fragments_df:
            A DataFrame consisting of edges that belong to fragments with more
            than one edge, meaning they are not suitable for "focused proofreading"
            and are instead suitable for "merge review".
            The fragment IDs are the (group_cc, cc_task) columns.
            Edges with the same fragment ID should be grouped together into the
            same merge review task.
        
        mr_endpoint_df:
            A DataFrame containing only the 'endpoint' bodies of the MR fragments,
            one pair per row.
            The columns in which the bodies are found (a vs b) will not be the same
            as they appear in mr_fragments_df, but the group_cc and cc_task columns
            will correspond to the appropriate rows in the full DataFrame.
            This 'endpoint' dataframe does not contain enough information to create
            merge review tasks (it lacks information about the intermediate bodies
            that connect the two endpoints) but it is more convenient to analyze
            when computing certain statistics to describe the types of merge review
            tasks that were found.
        
        boi_table:
            A DataFrame containing the BOIs (based on the criteria given above)
            that were used to selecting fragments, indexed by body, with
            columns ['PreSyn', 'PostSyn'].
            (See ``neuclease.dvid.annotation.determine_bodies_of_interest()``.)
            Note that the returned fragments do not necessarily cover
            every BOI in this list.
    """
    if isinstance(boi_rois, str):
        boi_rois = [boi_rois]

    if isinstance(fragment_rois, str):
        fragment_rois = [fragment_rois]

    request_processes = request_processes or processes

    if seg_instance is None:
        syn_info = fetch_instance_info(server, uuid, syn_instance)
        seg_instance = syn_info["Base"]["Syncs"][0]

    ref_seg = (server, uuid, seg_instance)

    # Load edges (if necessary), pre-filter, normalize
    edges_df = load_edges(edge_table)

    if update_edges:
        # Update the table for consistency with the given UUID,
        # and re-post-process it to find the correct "central" and "closest" edges,
        # (in case some groups were merged).
        edges_df = update_localized_edges(*ref_seg, edges_df,
                                          request_processes)

    # Technically, you could provide 0 for either of these,
    # but that's probably a mistake on your part.
    # (Unless you specifically appended some 0-synapse bodies to your
    # synapse table, and expect those to be considered BOIs.)
    assert min_tbars_in_roi >= 1 and min_psds_in_roi >= 1

    if boi_table is not None:
        boi_table = boi_table.query(
            'PreSyn >= @min_tbars_in_roi or PostSyn >= @min_psds_in_roi')
    else:
        assert not boi_rois, \
            "You can't specify boi_rois if you're providing your own boi_table"

        # Fetch synapse labels and determine the set of BOIs
        boi_table = determine_bodies_of_interest(server,
                                                 uuid,
                                                 syn_instance,
                                                 boi_rois,
                                                 min_tbars_in_roi,
                                                 min_psds_in_roi,
                                                 request_processes,
                                                 synapse_table=synapse_table)

    assert boi_table.index.name == 'body'
    assert set(boi_table.columns) == {'PreSyn', 'PostSyn'}

    bois = set(boi_table.index)

    # We're trying to connect BOIs to each other.
    # Therefore, we're not interested in groups of bodies
    # that don't contain at least 2 BOIs.
    edges_df = filter_groups_for_min_boi_count(edges_df, bois, ['group_cc'], 2)

    # Find the paths ('fragments', a.k.a. 'tasks') that connect BOIs within each group.
    fragment_edges_df = compute_fragment_edges(edges_df, bois, processes)

    if fragment_rois is not None:
        # Drop fragments that extend outside of the specified ROIs.
        fragment_edges_df = filter_fragments_for_roi(server, uuid,
                                                     fragment_rois,
                                                     fragment_edges_df)

    # If a group itself contained multiple CCs, it's possible that the BOIs were separated
    # into separate tasks, meaning that each individual task no longer satisfies the 2-BOI requirement.
    # Refilter.
    fragment_edges_df = filter_groups_for_min_boi_count(
        fragment_edges_df, bois, ['group_cc', 'cc_task'], 2)

    # Fetch the supervoxel IDs for each edge.
    with Timer("Sampling supervoxel IDs", logger):
        points_a = fragment_edges_df[['za', 'ya', 'xa']].values
        points_b = fragment_edges_df[['zb', 'yb', 'xb']].values
        fragment_edges_df['sv_a'] = fetch_labels_batched(
            *ref_seg, points_a, True, processes=request_processes)
        fragment_edges_df['sv_b'] = fetch_labels_batched(
            *ref_seg, points_b, True, processes=request_processes)

    # Divide into 'focused' and 'merge review' fragments,
    # i.e. single-edge fragments and multi-edge fragments
    focused_fragments_df = (
        fragment_edges_df.groupby(['group_cc', 'cc_task']).filter(
            lambda task_df: len(task_df) == 1)  # exactly one edge
        .copy())

    mr_fragments_df = (
        fragment_edges_df.groupby(['group_cc', 'cc_task']).filter(
            lambda task_df: len(task_df) > 1)  # multiple edges
        .copy())

    num_focused_fragments = len(focused_fragments_df)
    num_mr_fragments = len(
        mr_fragments_df.drop_duplicates(['group_cc', 'cc_task']))
    fragment_bodies = pd.unique(fragment_edges_df[['label_a', 'label_b'
                                                   ]].values.reshape(-1))
    num_fragment_bois = len(
        set(fragment_bodies).intersection(set(boi_table.index)))

    logger.info(f"Emitting {num_focused_fragments} focused fragments and "
                f"{num_mr_fragments} merge-review fragments, "
                f"covering {num_fragment_bois} BOIs out of {len(boi_table)}.")

    with Timer("Merging synapse counts onto results", logger):
        focused_fragments_df = focused_fragments_df.merge(boi_table,
                                                          'left',
                                                          left_on='label_a',
                                                          right_index=True)
        focused_fragments_df = focused_fragments_df.merge(boi_table,
                                                          'left',
                                                          left_on='label_b',
                                                          right_index=True,
                                                          suffixes=('_a',
                                                                    '_b'))

        mr_fragments_df = mr_fragments_df.merge(boi_table,
                                                'left',
                                                left_on='label_a',
                                                right_index=True)
        mr_fragments_df = mr_fragments_df.merge(boi_table,
                                                'left',
                                                left_on='label_b',
                                                right_index=True,
                                                suffixes=('_a', '_b'))

    with Timer("Constructing merge-review 'endpoint' dataframe", logger):
        try:
            mr_endpoint_df = construct_mr_endpoint_df(mr_fragments_df, bois)
        except BaseException as ex:
            logger.error(str(ex))
            logger.error(
                "Failed to construct the merge-review 'endpoint' dataframe.  Returning None."
            )
            mr_endpoint_df = None

    return focused_fragments_df, mr_fragments_df, mr_endpoint_df, boi_table
Ejemplo n.º 16
0
    def __init__(self, volume_config, resource_manager_client=None):
        validate(volume_config, DvidGenericVolumeSchema, inject_defaults=True)

        assert 'apply-labelmap' not in volume_config["dvid"].keys(), \
            ("The apply-labelmap section should be in the 'adapters' section, (parallel to 'dvid' and 'geometry'), "
             "not nested within the 'dvid' section!")

        ##
        ## server, uuid
        ##
        ## Note:
        ##   self.uuid will be resolved, but volume_config["dvid"]["uuid"]
        ##   will not be overwritten. It will remain unresolved.
        ##
        self._server = volume_config["dvid"]["server"]
        self._uuid = resolve_ref(volume_config["dvid"]["server"],
                                 volume_config["dvid"]["uuid"])

        self._throttle = volume_config["dvid"]["accept-throttling"]

        ##
        ## instance, dtype, etc.
        ##

        config_block_width = volume_config["geometry"]["block-width"]

        assert ('segmentation-name' in volume_config["dvid"]) ^ ('grayscale-name' in volume_config["dvid"]), \
            "Config error: Specify either segmentation-name or grayscale-name (not both)"

        if "segmentation-name" in volume_config["dvid"]:
            self._instance_name = volume_config["dvid"]["segmentation-name"]
            self._dtype = np.uint64
        elif "grayscale-name" in volume_config["dvid"]:
            self._instance_name = volume_config["dvid"]["grayscale-name"]
            self._dtype = np.uint8

        self._dtype_nbytes = np.dtype(self._dtype).type().nbytes

        try:
            instance_info = fetch_instance_info(self._server, self._uuid,
                                                self._instance_name)
        except HTTPError as ex:
            if ex.response.status_code != 400:
                raise

            if not volume_config["dvid"]["create-if-necessary"]:
                existing_instances = fetch_repo_instances(
                    self._server, self._uuid)
                if self._instance_name not in existing_instances:
                    raise RuntimeError(
                        f"Instance '{self._instance_name}' does not exist in {self._server} / {self._uuid}."
                        "Add 'create-if-necessary: true' to your config if you want it to be created.'"
                    )
                raise

            # Instance doesn't exist yet -- we are going to create it.
            if "segmentation-name" in volume_config["dvid"]:
                self._instance_type = 'labelmap'  # get_voxels doesn't really care if it's labelarray or labelmap...
                self._is_labels = True
            else:
                self._instance_type = 'uint8blk'
                self._is_labels = False

            block_width = config_block_width
        else:
            self._instance_type = instance_info["Base"]["TypeName"]
            self._is_labels = self._instance_type in ('labelblk', 'labelarray',
                                                      'labelmap')
            if self._instance_type == "googlevoxels" and instance_info[
                    "Extended"]["Scales"][0]["channelType"] == "UINT64":
                self._is_labels = True

            bs_x, bs_y, bs_z = instance_info["Extended"]["BlockSize"]
            assert (bs_x == bs_y == bs_z), "Expected blocks to be cubes."
            block_width = bs_x

        if "disable-indexing" in volume_config["dvid"]:
            self.disable_indexing = volume_config["dvid"]["disable-indexing"]
        else:
            self.disable_indexing = DvidSegmentationServiceSchema[
                "properties"]["disable-indexing"]["default"]

        if "enable-downres" in volume_config["dvid"]:
            self.enable_downres = volume_config["dvid"]["enable-downres"]
        else:
            self.enable_downres = DvidSegmentationServiceSchema["properties"][
                "enable-downres"]["default"]

        if "gzip-level" in volume_config["dvid"]:
            self.gzip_level = volume_config["dvid"]["gzip-level"]
        else:
            self.gzip_level = DvidSegmentationServiceSchema["properties"][
                "gzip-level"]["default"]

        # Whether or not to read the supervoxels from the labelmap instance instead of agglomerated labels.
        self.supervoxels = ("supervoxels" in volume_config["dvid"]) and (
            volume_config["dvid"]["supervoxels"])

        ##
        ## default block width
        ##
        assert config_block_width in (-1, block_width), \
            f"DVID volume block-width ({config_block_width}) from config does not match server metadata ({block_width})"
        if block_width == -1:
            # No block-width specified; choose default
            block_width = 64

        ##
        ## bounding-box
        ##
        bounding_box_zyx = np.array(
            volume_config["geometry"]["bounding-box"])[:, ::-1]
        try:
            stored_extents = fetch_volume_box(self._server, self.uuid,
                                              self._instance_name)
        except HTTPError:
            assert -1 not in bounding_box_zyx.flat[:], \
                f"Instance '{self._instance_name}' does not yet exist on the server, "\
                "so your volume_config must specify explicit values for bounding-box"
        else:
            if stored_extents is not None and stored_extents.any():
                replace_default_entries(bounding_box_zyx, stored_extents)

        ##
        ## message-block-shape
        ##
        preferred_message_shape_zyx = np.array(
            volume_config["geometry"]["message-block-shape"][::-1])
        replace_default_entries(preferred_message_shape_zyx,
                                [block_width, block_width, 100 * block_width])

        ##
        ## available-scales
        ##
        available_scales = list(volume_config["geometry"]["available-scales"])

        ##
        ## resource_manager_client
        ##
        if resource_manager_client is None:
            # Dummy client
            resource_manager_client = ResourceManagerClient("", 0)

        ##
        ## Special setting to override resource manager for sparse coords
        ##
        try:
            use_resource_manager_for_sparse_coords = volume_config["dvid"][
                "use-resource-manager-for-sparse-coords"]
        except KeyError:
            # Grayscale doesn't have this setting
            use_resource_manager_for_sparse_coords = False

        ##
        ## Store members
        ##
        self._resource_manager_client = resource_manager_client
        self._block_width = block_width
        self._bounding_box_zyx = bounding_box_zyx
        self._preferred_message_shape_zyx = preferred_message_shape_zyx
        self._available_scales = available_scales
        self._use_resource_manager_for_sparse_coords = use_resource_manager_for_sparse_coords
        self.write_empty_blocks = volume_config["dvid"]["write-empty-blocks"]

        ##
        ## Overwrite config entries that we might have modified
        ##
        volume_config["geometry"]["block-width"] = self._block_width
        volume_config["geometry"][
            "bounding-box"] = self._bounding_box_zyx[:, ::-1].tolist()
        volume_config["geometry"][
            "message-block-shape"] = self._preferred_message_shape_zyx[::
                                                                       -1].tolist(
                                                                       )

        # TODO: Check the server for available scales and overwrite in the config?
        #volume_config["geometry"]["available-scales"] = [0]

        if volume_config["dvid"]["create-if-necessary"]:
            self._create_instance(volume_config)
Ejemplo n.º 17
0
def main():
    configure_default_logging()

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        '--use-mapping',
        action='store_true',
        help=
        'Use in-memory map + /exists instead of /missing, as described in the general help text above.'
    )
    parser.add_argument(
        '--output',
        '-o',
        default='missing-from-tsv.csv',
        help='Where to write the output CSV (default: missing-from-tsv.csv)')

    parser.add_argument(
        '--kafka-timestamp',
        '-k',
        type=str,
        help='Alternative to providing your own bodies list.\n'
        'Use the kafka log automatically determine the list of bodies that have changed after the given timestamp.\n'
        'Examples: -k="2018-11-22" -k="2018-11-22 17:34:00"')

    parser.add_argument('server', help='dvid server, e.g. emdata3:8900')
    parser.add_argument(
        'uuid',
        help=
        'dvid node to analyze or "master" for the latest master branch uuid')
    parser.add_argument(
        'tsv_instance',
        help="Name of a tarsupervoxels instance, e.g. segmentation_sv_meshes.\n"
        "Must be sync'd to a labelmap (segmentation) instance.")
    parser.add_argument(
        'bodies_csv',
        nargs='?',
        help='CSV containing a column named "body", which will be read.\n'
        'If no "body" column exists, the first column is used, regardless of the name.\n'
        '(Omit this arg if you are using --kafka-timestamp)')
    args = parser.parse_args()

    if not (bool(args.kafka_timestamp) ^ bool(args.bodies_csv)):
        print(
            "You must provide either --kafka-timestamp or a bodies list (not both)",
            file=sys.stderr)
        sys.exit(1)

    if args.uuid == "master":
        args.uuid = find_master(args.server)

    # Determine segmentation instance
    info = fetch_instance_info(args.server, args.uuid, args.tsv_instance)
    seg_instance = info["Base"]["Syncs"][0]

    kafka_msgs = None
    if args.bodies_csv:
        if 'body' in read_csv_header(args.bodies_csv):
            bodies = pd.read_csv(args.bodies_csv)['body'].drop_duplicates()
        else:
            # Just read the first column, no matter what it's named
            bodies = read_csv_col(args.bodies_csv, 0,
                                  np.uint64).drop_duplicates()
    elif args.kafka_timestamp:
        # Validate timestamp format before fetching kafka log, which takes a while.
        parse_timestamp(args.kafka_timestamp)

        kafka_msgs = read_kafka_messages(args.server, args.uuid, seg_instance)
        filtered_kafka_msgs = filter_kafka_msgs_by_timerange(
            kafka_msgs, min_timestamp=args.kafka_timestamp)

        new_bodies, changed_bodies, _removed_bodies, new_supervoxels, _deleted_svs = compute_affected_bodies(
            filtered_kafka_msgs)
        sv_split_bodies = set(
            fetch_mapping(args.server, args.uuid, seg_instance,
                          new_supervoxels)) - set([0])

        bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies))
        bodies = np.fromiter(bodies, np.uint64)
        bodies.sort()
    else:
        raise AssertionError("Shouldn't get here.")

    if args.use_mapping:
        missing_entries = check_tarsupervoxels_status_via_exists(
            args.server,
            args.uuid,
            args.tsv_instance,
            bodies,
            seg_instance,
            kafka_msgs=kafka_msgs)
    else:
        missing_entries = check_tarsupervoxels_status_via_missing(
            args.server, args.uuid, args.tsv_instance, bodies)

    logger.info(f"Writing to {args.output}")
    missing_entries.to_csv(args.output, index=True, header=True)
    logging.info("DONE")
Ejemplo n.º 18
0
    def _init_services(self):
        """
        Initialize the input and output services,
        and fill in 'auto' config values as needed.
        
        Also check the service configurations for errors.
        """
        input_config = self.config["input"]
        output_config = self.config["output"]
        mgr_options = self.config["resource-manager"]

        options = self.config["labelmapcopy"]
        self.mgr_client = ResourceManagerClient(mgr_options["server"],
                                                mgr_options["port"])
        self.input_service = VolumeService.create_from_config(
            input_config, self.mgr_client)
        assert input_config["dvid"]["supervoxels"], \
            'DVID input service config must use "supervoxels: true"'
        assert output_config["dvid"]["supervoxels"], \
            'DVID output service config must use "supervoxels: true"'

        input_service = self.input_service

        max_scale = options["max-scale"]
        if max_scale == -1:
            info = fetch_instance_info(*input_service.instance_triple)
            max_scale = int(info["Extended"]["MaxDownresLevel"])
            options["max-scale"] = max_scale

        assert not (set(range(1+max_scale)) - set(input_service.available_scales)), \
            "Your input config's 'available-scales' must include all levels you wish to copy."

        assert len(options["slab-shape"]) == 3
        slab_shape_zyx = np.array(options["slab-shape"][::-1])

        # FIXME: Should be a whole slab (per the docs above), not just the brick shape!
        replace_default_entries(slab_shape_zyx,
                                input_service.preferred_message_shape)
        options["slab-shape"] = slab_shape_zyx[::-1].tolist()

        assert (slab_shape_zyx % input_service.preferred_message_shape[0] == 0).all(), \
            "slab-shape must be divisible by the brick shape"

        # Transposed/remapped services aren't supported because we're not going to inflate the downloaded blocks.
        assert all(not isinstance(svc, TransposedVolumeService)
                   for svc in input_service.service_chain)
        assert all(not isinstance(svc, LabelmappedVolumeService)
                   for svc in input_service.service_chain)

        assert not (input_service.bounding_box_zyx % input_service.block_width).any(), \
            "Input bounding-box should be a multiple of the block size in all dimensions."
        assert not (input_service.preferred_message_shape % input_service.block_width).any(), \
            "Input message-block-shape should be a multiple of the block size in all dimensions."

        assert all(not isinstance( svc, ScaledVolumeService ) or svc.scale_delta == 0 for svc in input_service.service_chain), \
            "For now, we don't support rescaled input, though it would be possible in theory."

        if options["record-only"]:
            # Don't need to check output setting if we're not writing
            self.output_service = None
            assert options[
                "record-label-sets"], "If using 'record-only', you must set 'record-label-sets', too."
            assert not options["dont-overwrite-identical-blocks"], \
                "In record only mode, the output service can't be accessed, and you can't use dont-overwrite-identical-blocks"
            return

        if output_config["dvid"]["create-if-necessary"]:
            creation_depth = output_config["dvid"]["creation-settings"][
                "max-scale"]
            if creation_depth not in (-1, max_scale):
                msg = (
                    f"Inconsistent max-scale options in the labelmapcopy config options ({max_scale}) and creation-settings options ({creation_depth}). "
                    "Omit max-scale from your creation-settings.")
                raise RuntimeError(msg)
            output_config["dvid"]["creation-settings"]["max-scale"] = max_scale

        # Replace 'auto' dimensions with input bounding box
        replace_default_entries(output_config["geometry"]["bounding-box"],
                                input_service.bounding_box_zyx[:, ::-1])
        self.output_service = VolumeService.create_from_config(
            output_config, self.mgr_client)

        output_service = self.output_service
        assert isinstance(output_service, VolumeServiceWriter)

        if output_service.instance_name in fetch_repo_instances(
                output_service.server, output_service.uuid):
            info = fetch_instance_info(*output_service.instance_triple)
            existing_depth = int(info["Extended"]["MaxDownresLevel"])
            if max_scale not in (-1, existing_depth):
                raise Exception(
                    f"Can't set pyramid-depth to {max_scale}: \n"
                    f"Data instance '{output_service.instance_name}' already existed, with depth {existing_depth}.\n"
                    f"For now, you are required to populate ALL scales of the output, or create a new output instance from scratch."
                )

        assert all(not isinstance(svc, TransposedVolumeService)
                   for svc in output_service.service_chain)
        assert all(not isinstance(svc, LabelmappedVolumeService)
                   for svc in output_service.service_chain)
        assert all(
            not isinstance(svc, ScaledVolumeService) or svc.scale_delta == 0
            for svc in output_service.service_chain)

        # Output can't be a scaled service because we copied some geometry (bounding-box)
        # directly from the input service.
        assert not isinstance(
            output_service,
            ScaledVolumeService) or output_service.scale_delta == 0

        assert output_service.base_service.disable_indexing, \
            "During ingestion, indexing should be disabled.\n" \
            "Please add 'disable-indexing':true to your output dvid config."

        logger.info(
            f"Output bounding box (xyz) is: {output_service.bounding_box_zyx[:,::-1].tolist()}"
        )

        assert (input_service.bounding_box_zyx == output_service.bounding_box_zyx).all(), \
            "Input and output service bounding boxes must match exactly."
        assert input_service.block_width == output_service.block_width, \
            "Input and output must use the same block-width"
        assert not (output_service.bounding_box_zyx % output_service.block_width).any(), \
            "Output bounding-box should be a multiple of the block size in all dimensions."
        assert not (output_service.preferred_message_shape % output_service.block_width).any(), \
            "Output message-block-shape should be a multiple of the block size in all dimensions."
Ejemplo n.º 19
0
def main():
    # Early exit if we're dumping the config
    # (Parse it ourselves to allow omission of otherwise required parameters.)
    if ({'--dump-config-template', '-d'} & {*sys.argv}):
        dump_default_config(ConfigSchema, sys.stdout, "yaml-with-comments")
        sys.exit(0)

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--dump-config-template',
                        '-d',
                        action='store_true',
                        help='Dump out a template yaml config file and exit.')
    parser.add_argument('--count',
                        '-c',
                        type=int,
                        help='How many points to generate.')
    parser.add_argument('--roi', '-r', help='Limit points to the given ROI.')
    parser.add_argument('--body',
                        '-b',
                        type=int,
                        help='Limit points to the given body.')
    parser.add_argument(
        '--tbars',
        '-t',
        action='store_true',
        help=
        'If given, limit points to the tbars of the given body, from the "synapses" instance in the input UUID.'
    )
    parser.add_argument(
        '--skeleton',
        '-s',
        action='store_true',
        help=
        'If given, choose the points from the nodes of the skeleton for the given body.'
    )
    parser.add_argument(
        '--generate-points-only',
        '-g',
        action='store_true',
        help=
        "If given, generate the points list, but don't write neighborhood segmentations"
    )
    parser.add_argument(
        '--points',
        '-p',
        help=
        'A CSV file containing the points to use instead of automatically generating them.'
    )
    parser.add_argument(
        '--ng-links',
        '-n',
        action='store_true',
        help='If given, include neuroglancer links in the output CSV.'
        'Your config should specify the basic neuroglancer view settings; only the "position" will be overwritten in each link.'
    )
    parser.add_argument('config')
    args = parser.parse_args()

    configure_default_logging()

    config = load_config(args.config, ConfigSchema)
    update_ng_settings(config)
    input_seg = [*config["input"].values()]
    output_seg = [*config["output"].values()]
    radius = config["radius"]
    random_seed = config["random-seed"]

    if config["enforce-minimum-distance"]:
        minimum_distance = 2 * radius
    else:
        minimum_distance = 0

    if args.points and any(
        [args.count, args.roi, args.body, args.tbars, args.skeleton]):
        msg = ("If you're providing your own list of points, you shouldn't"
               " specify any of the auto-generation arguments, such as"
               " --count --roi --body --tbars")
        sys.exit(msg)

    if not args.points and not any(
        [args.count, args.roi, args.body, args.tbars, args.skeleton]):
        msg = "You must provide a list of points or specify how to auto-generate them."
        sys.exit(msg)

    if args.points:
        assert args.points.endswith('.csv')
        name, _ = os.path.splitext(args.points)
        output_path = name + '-neighborhoods.csv'
        points = pd.read_csv(args.points)
    else:
        points = autogen_points(input_seg, args.count, args.roi, args.body,
                                args.tbars, args.skeleton, random_seed,
                                minimum_distance)

        uuid = input_seg[1]
        output_path = f'neighborhoods-from-{uuid[:6]}'

        if not any([args.roi, args.body, args.tbars, args.skeleton]):
            output_path += input_seg[2]
        else:
            if args.roi:
                output_path += f'-{args.roi}'
            if args.body:
                output_path += f'-{args.body}'
            if args.tbars:
                output_path += '-tbars'
            if args.skeleton:
                output_path += '-skeleton'

        assignment_path = output_path + '.json'
        csv_path = output_path + '.csv'

    kd = scipy.spatial.cKDTree(points[[*'zyx']].values)
    if len(kd.query_pairs(2 * radius)) > 0:
        msg = (
            "Some of the chosen points are closer to each other than 2x the "
            f"configured radius ({radius}). Their neighborhood segments may "
            "be mangled in the output.")
        logger.warning(msg)

    cols = [*'xyz'] + list({*points.columns} - {*'xyz'})
    points = points[cols]

    if args.generate_points_only:
        add_link_col(points, config)
        export_as_html(points, csv_path)
        if not args.ng_links:
            del points['link']
            points.to_csv(csv_path,
                          index=False,
                          header=True,
                          quoting=csv.QUOTE_NONE)
        sys.exit(0)

    try:
        input_info = fetch_instance_info(*input_seg)
    except Exception:
        sys.exit(
            f"Couldn't find input segmentation instance: {' / '.join(input_seg)}"
        )

    try:
        fetch_instance_info(*output_seg)
    except Exception:
        logger.info(
            f"Output labelmap not found. Creating new label instance: {' / '.join(output_seg)}"
        )

        # Copy details from input instance.
        # But only provide a single value for each, even though the info provides three.
        # Otherwise, DVID kicks back errors like this:
        # Setting for 'VoxelUnits' was not a string: [nanometers nanometers nanometers]
        settings = {
            'block_size': input_info['Extended']['BlockSize'][0],
            'voxel_size': input_info['Extended']['VoxelSize'][0],
            'voxel_units': input_info['Extended']['VoxelUnits'][0],
            'max_scale': input_info['Extended']['MaxDownresLevel']
        }
        create_labelmap_instance(*output_seg, **settings)

        # Also create keyvalue for meshes
        create_instance(*output_seg[:2], output_seg[2] + '_meshes', 'keyvalue')

    results_df = write_point_neighborhoods(input_seg, output_seg, points,
                                           radius, args.body)

    add_link_col(results_df, config)
    export_as_html(results_df, csv_path)
    write_assignment_file(output_seg, results_df, assignment_path, config)
    if not args.ng_links:
        del results_df['link']
    results_df.to_csv(csv_path,
                      index=False,
                      header=True,
                      quoting=csv.QUOTE_NONE)