def main(): # Create the destination instance if necessary. dst_instances = fetch_repo_instances(*dst_node, 'annotation') if dst_syn not in dst_instances: logger.info(f"Creating instance '{dst_syn}'") create_instance(*dst_node, dst_syn, 'annotation') # Check to see if the sync already exists; add it if necessary syn_info = fetch_instance_info(*dst_node, dst_syn) if len(syn_info["Base"]["Syncs"]) == 0: logger.info(f"Adding a sync to '{dst_syn}' from '{dst_seg}'") post_sync(*dst_node, dst_syn, [dst_seg]) elif syn_info["Base"]["Syncs"][0] != dst_seg: other_seg = syn_info["Base"]["Syncs"][0] raise RuntimeError( f"Can't create a sync to '{dst_seg}'. " f"Your instance is already sync'd to a different segmentation: {other_seg}" ) # Fetch segmentation extents bounding_box_zyx = fetch_volume_box(*src_node, src_seg).tolist() # Break into block-aligned chunks (boxes) that are long in the X direction # (optimal access pattern for dvid read/write) boxes = boxes_from_grid(bounding_box_zyx, (256, 256, 6400), clipped=True) # Use a process pool to copy the chunks in parallel. compute_parallel(copy_syn_blocks, boxes, processes=PROCESSES, ordered=False)
def test_append_edges_for_focused_merges(labelmap_setup): dvid_server, dvid_repo, merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup decision_instance = 'segmentation_merged_TEST' create_instance(dvid_server, dvid_repo, decision_instance, 'keyvalue') # Post a new 'decision' between 1 and 5 post_key( dvid_server, dvid_repo, decision_instance, '1+5', json={ 'supervoxel ID 1': 1, 'supervoxel ID 2': 5, 'body ID 1': 1, 'body ID 2': 1, 'result': 'merge', 'supervoxel point 1': [0, 0, 0], # xyz 'supervoxel point 2': [12, 0, 0] }) # xyz merge_graph = LabelmapMergeGraph(merge_table_path) merge_graph.append_edges_for_focused_merges(dvid_server, dvid_repo, decision_instance) assert len( merge_graph.merge_table_df.query('id_a == 1 and id_b == 5')) == 1
def copy_synapses(src_loc, dst_loc, processes): """ See caveats in the module docstring above. """ src_loc = Location(*src_loc) dst_loc = Location(*dst_loc) # Create the destination instance if necessary. dst_instances = fetch_repo_instances(*dst_loc[:2], 'annotation') if dst_loc.syn_instance not in dst_instances: logger.info(f"Creating instance '{dst_loc.syn_instance}'") create_instance(*dst_loc, 'annotation') # Check to see if the sync already exists; add it if necessary syn_info = fetch_instance_info(*dst_loc[:3]) if len(syn_info["Base"]["Syncs"]) == 0: logger.info( f"Adding a sync to '{dst_loc.syn_instance}' from '{dst_loc.seg_instance}'" ) post_sync(*dst_loc[:3], [dst_loc.seg_instance]) elif syn_info["Base"]["Syncs"][0] != dst_loc.seg_instance: other_seg = syn_info["Base"]["Syncs"][0] raise RuntimeError( f"Can't create a sync to '{dst_loc.seg_instance}'. " f"Your instance is already sync'd to a different segmentation: {other_seg}" ) # Fetch segmentation extents bounding_box_zyx = fetch_volume_box(*src_loc[:2], src_loc.seg_instance).tolist() # Break into block-aligned chunks (boxes) that are long in the X direction # (optimal access pattern for dvid read/write) boxes = boxes_from_grid(bounding_box_zyx, (256, 256, 6400), clipped=True) # Use a process pool to copy the chunks in parallel. fn = partial(copy_syn_blocks, src_loc, dst_loc) compute_parallel(fn, boxes, processes=processes, ordered=False)
def main(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--annotation-instance', default='synapses') parser.add_argument('--labelmap-instance', default='segmentation') parser.add_argument('--labelsz-instance') parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('elements_json') args = parser.parse_args() server = args.server uuid = args.uuid syn_instance = args.annotation_instance seg_instance = args.labelmap_instance ## ## 1. Create an 'annotation' instance to store the synapse data ## ## POST .../instance ## create_instance(server, uuid, syn_instance, 'annotation') ## ## 2. Upload the synapse elements. ## ## POST .../elements ## ## Note: ## DVID stores these in block-aligned groups, based on the synapse coordinates. ## Ingestion will be fastest if you pre-sort your JSON elements by 64px blocks, ## in Z-Y-X order, as shown below. ## with open(args.elements_json, 'r') as f: elements = ujson.load(f) # Sort elements by block location (64px blocks) # FIXME: This code should work but I haven't tested it yet. Might have a typo. elements_df = pd.DataFrame([(*e["Pos"], e) for e in elements], columns=['x', 'y', 'z', 'element']) elements_df[['z', 'y', 'x']] //= 64 elements_df.sort_values(['z', 'y', 'x'], inplace=True) # Group blocks into larger chunks, with each chunk being 100 blocks # in the X direction (and 1x1 in the zy directions). elements_df['x'] //= 100 # Ingest in chunks. num_chunks = elements_df[['z', 'y', 'x']].drop_duplicates().shape[0] chunked_df = elements_df.groupby(['z', 'y', 'x']) for _zyx, batch_elements_df in tqdm_proxy(chunked_df, total=num_chunks): post_elements(server, uuid, syn_instance, batch_elements_df['element'].tolist()) ## ## 3. Sync the annotation instance to a pre-existing ## segmentation (labelmap) instance. ## ## POST .../sync ## ## This 'subscribes' the annotation instance to changes in the segmentation, ## keeping updated counts of synapses in each body. ## This will enable the .../<annotation>/labels endpoint to work efficiently. ## post_sync(server, uuid, syn_instance, [seg_instance]) ## ## 4. Reload the synapse instance AFTER the sync was configured (above). ## For real-world data sizes (e.g. millions of synapses) this will take ## a long time (hours). ## ## POST .../reload ## post_reload(server, uuid, syn_instance) ## ## 5. (Optional) ## For some proofreading protocols, you may wish to create a 'labelsz' (label size) instance, ## which allows you to ask for the largest N bodies (by synapse count). ## ## if args.labelsz_instance: create_instance(server, uuid, args.labelsz_instance, 'labelsz') post_sync(server, uuid, args.labelsz_instance, [syn_instance]) post_reload(server, uuid, args.labelsz_instance)
def _prepare_output(self): """ If necessary, create the output directory or DVID instance so that meshes can be written to it. """ input_cfg = self.config["input"] output_cfg = self.config["output"] options = self.config["svdecimate"] ## directory output if 'directory' in output_cfg: # Convert to absolute so we can chdir with impunity later. output_cfg['directory'] = os.path.abspath(output_cfg['directory']) os.makedirs(output_cfg['directory'], exist_ok=True) return ## ## DVID output (either keyvalue or tarsupervoxels) ## (instance_type,) = output_cfg.keys() server = output_cfg[instance_type]['server'] uuid = output_cfg[instance_type]['uuid'] instance = output_cfg[instance_type]['instance'] # If the output server or uuid is left blank, # we assume it should be auto-filled from the input settings. if server == "" or uuid == "": assert "dvid" in input_cfg if server == "": output_cfg[instance_type]['server'] = input_cfg["dvid"]["server"] if uuid == "": output_cfg[instance_type]['uuid'] = input_cfg["dvid"]["uuid"] # Resolve in case a branch was given instead of a specific uuid server = output_cfg[instance_type]['server'] uuid = output_cfg[instance_type]['uuid'] uuid = resolve_ref(server, uuid) if is_locked(server, uuid): info = fetch_server_info(server) if "Mode" in info and info["Mode"] == "allow writes on committed nodes": logger.warn(f"Output is a locked node ({uuid}), but server is in full-write mode. Proceeding.") elif os.environ.get("DVID_ADMIN_TOKEN", ""): logger.warn(f"Output is a locked node ({uuid}), but you defined DVID_ADMIN_TOKEN. Proceeding.") else: raise RuntimeError(f"Can't write to node {uuid} because it is locked.") if instance_type == 'tarsupervoxels' and not self.input_is_labelmap_supervoxels(): msg = ("You shouldn't write to a tarsupervoxels instance unless " "you're reading supervoxels from a labelmap input.\n" "Use a labelmap input source, and set supervoxels: true") raise RuntimeError(msg) existing_instances = fetch_repo_instances(server, uuid) if instance in existing_instances: # Instance exists -- nothing to do. return if not output_cfg[instance_type]['create-if-necessary']: msg = (f"Output instance '{instance}' does not exist, " "and your config did not specify create-if-necessary") raise RuntimeError(msg) assert instance_type in ('tarsupervoxels', 'keyvalue') ## keyvalue output if instance_type == "keyvalue": create_instance(server, uuid, instance, "keyvalue", tags=["type=meshes"]) return ## tarsupervoxels output sync_instance = output_cfg["tarsupervoxels"]["sync-to"] if not sync_instance: # Auto-fill a default 'sync-to' instance using the input segmentation, if possible. info = fetch_instance_info(*[input_cfg["dvid"][k] for k in ("server", "uuid", "tarsupervoxels-instance")]) syncs = info['Base']['Syncs'] if syncs: sync_instance = syncs[0] if not sync_instance: msg = ("Can't create a tarsupervoxels instance unless " "you specify a 'sync-to' labelmap instance name.") raise RuntimeError(msg) if sync_instance not in existing_instances: msg = ("Can't sync to labelmap instance '{sync_instance}': " "it doesn't exist on the output server.") raise RuntimeError(msg) create_tarsupervoxel_instance(server, uuid, instance, sync_instance, options["format"])
import os os.environ['DVID_ADMIN_TOKEN'] = 'stanfordrockskalsucks' from neuclease.dvid import create_instance, load_gary_psds, post_tbar_jsons, post_psd_jsons, post_sync, post_reload from neuclease import configure_default_logging configure_default_logging() print("Loading file") psd_df = load_gary_psds('/nrs/flyem/huangg/vnc/synapses_v1.p') print("Creating instance") new_root = ('emdata5.janelia.org:8400', '1ec355123bf94e588557a4568d26d258') create_instance(*new_root, 'synapses-reingest', 'annotation') print("Loading tbars") post_tbar_jsons(*new_root, 'synapses-reingest', psd_df, merge_existing=False, processes=16) print("Loading psds") post_psd_jsons(*new_root, 'synapses-reingest', psd_df, merge_existing=True, processes=16) print("Posting sync") post_sync(*new_root, 'synapses-reingest', ['segmentation'])
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--annotation-instance', default='synapses') parser.add_argument('--labelmap-instance', default='segmentation') parser.add_argument('--labelsz-instance') parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('elements_json') args = parser.parse_args() server = args.server uuid = args.uuid syn_instance = args.annotation_instance seg_instance = args.labelmap_instance with open(args.elements_json, 'r') as f: elements = ujson.load(f) ## ## 1. Create an 'annotation' instance to store the synapse data ## ## POST .../instance ## create_instance(server, uuid, syn_instance, 'annotation') ## ## 2. Upload the synapse elements. ## ## POST .../elements ## ## Note: ## DVID stores these in block-aligned groups, based on the synapse coordinates. ## Ingestion will be fastest if you pre-sort your JSON elements by 64px blocks, ## in Z-Y-X order. ## post_elements(server, uuid, syn_instance, elements) ## ## 3. Sync the annotation instance to a pre-existing ## segmentation (labelmap) instance. ## ## POST .../sync ## ## This 'subscribes' the annotation instance to changes in the segmentation, ## keeping updated counts of synapses in each body. ## This will enable the .../<annotation>/labels endpoint to work efficiently. ## post_sync(server, uuid, syn_instance, [seg_instance]) ## ## 4. Reload the synapse instance AFTER the sync was configured (above). ## For real-world data sizes (e.g. millions of synapses) this will take ## a long time (hours). ## ## POST .../reload ## post_reload(server, uuid, syn_instance) ## ## 5. (Optional) ## For some proofreading protocols, you may wish to create a 'labelsz' (label size) instance, ## which allows you to ask for the largest N bodies (by synapse count). ## ## if args.labelsz_instance: create_instance(server, uuid, args.labelsz_instance, 'labelsz') post_sync(server, uuid, args.labelsz_instance, [seg_instance]) post_reload(server, uuid, args.labelsz_instance)
def main(): # Early exit if we're dumping the config # (Parse it ourselves to allow omission of otherwise required parameters.) if ({'--dump-config-template', '-d'} & {*sys.argv}): dump_default_config(ConfigSchema, sys.stdout, "yaml-with-comments") sys.exit(0) parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--dump-config-template', '-d', action='store_true', help='Dump out a template yaml config file and exit.') parser.add_argument('--count', '-c', type=int, help='How many points to generate.') parser.add_argument('--roi', '-r', help='Limit points to the given ROI.') parser.add_argument('--body', '-b', type=int, help='Limit points to the given body.') parser.add_argument( '--tbars', '-t', action='store_true', help= 'If given, limit points to the tbars of the given body, from the "synapses" instance in the input UUID.' ) parser.add_argument( '--skeleton', '-s', action='store_true', help= 'If given, choose the points from the nodes of the skeleton for the given body.' ) parser.add_argument( '--generate-points-only', '-g', action='store_true', help= "If given, generate the points list, but don't write neighborhood segmentations" ) parser.add_argument( '--points', '-p', help= 'A CSV file containing the points to use instead of automatically generating them.' ) parser.add_argument( '--ng-links', '-n', action='store_true', help='If given, include neuroglancer links in the output CSV.' 'Your config should specify the basic neuroglancer view settings; only the "position" will be overwritten in each link.' ) parser.add_argument('config') args = parser.parse_args() configure_default_logging() config = load_config(args.config, ConfigSchema) update_ng_settings(config) input_seg = [*config["input"].values()] output_seg = [*config["output"].values()] radius = config["radius"] random_seed = config["random-seed"] if config["enforce-minimum-distance"]: minimum_distance = 2 * radius else: minimum_distance = 0 if args.points and any( [args.count, args.roi, args.body, args.tbars, args.skeleton]): msg = ("If you're providing your own list of points, you shouldn't" " specify any of the auto-generation arguments, such as" " --count --roi --body --tbars") sys.exit(msg) if not args.points and not any( [args.count, args.roi, args.body, args.tbars, args.skeleton]): msg = "You must provide a list of points or specify how to auto-generate them." sys.exit(msg) if args.points: assert args.points.endswith('.csv') name, _ = os.path.splitext(args.points) output_path = name + '-neighborhoods.csv' points = pd.read_csv(args.points) else: points = autogen_points(input_seg, args.count, args.roi, args.body, args.tbars, args.skeleton, random_seed, minimum_distance) uuid = input_seg[1] output_path = f'neighborhoods-from-{uuid[:6]}' if not any([args.roi, args.body, args.tbars, args.skeleton]): output_path += input_seg[2] else: if args.roi: output_path += f'-{args.roi}' if args.body: output_path += f'-{args.body}' if args.tbars: output_path += '-tbars' if args.skeleton: output_path += '-skeleton' assignment_path = output_path + '.json' csv_path = output_path + '.csv' kd = scipy.spatial.cKDTree(points[[*'zyx']].values) if len(kd.query_pairs(2 * radius)) > 0: msg = ( "Some of the chosen points are closer to each other than 2x the " f"configured radius ({radius}). Their neighborhood segments may " "be mangled in the output.") logger.warning(msg) cols = [*'xyz'] + list({*points.columns} - {*'xyz'}) points = points[cols] if args.generate_points_only: add_link_col(points, config) export_as_html(points, csv_path) if not args.ng_links: del points['link'] points.to_csv(csv_path, index=False, header=True, quoting=csv.QUOTE_NONE) sys.exit(0) try: input_info = fetch_instance_info(*input_seg) except Exception: sys.exit( f"Couldn't find input segmentation instance: {' / '.join(input_seg)}" ) try: fetch_instance_info(*output_seg) except Exception: logger.info( f"Output labelmap not found. Creating new label instance: {' / '.join(output_seg)}" ) # Copy details from input instance. # But only provide a single value for each, even though the info provides three. # Otherwise, DVID kicks back errors like this: # Setting for 'VoxelUnits' was not a string: [nanometers nanometers nanometers] settings = { 'block_size': input_info['Extended']['BlockSize'][0], 'voxel_size': input_info['Extended']['VoxelSize'][0], 'voxel_units': input_info['Extended']['VoxelUnits'][0], 'max_scale': input_info['Extended']['MaxDownresLevel'] } create_labelmap_instance(*output_seg, **settings) # Also create keyvalue for meshes create_instance(*output_seg[:2], output_seg[2] + '_meshes', 'keyvalue') results_df = write_point_neighborhoods(input_seg, output_seg, points, radius, args.body) add_link_col(results_df, config) export_as_html(results_df, csv_path) write_assignment_file(output_seg, results_df, assignment_path, config) if not args.ng_links: del results_df['link'] results_df.to_csv(csv_path, index=False, header=True, quoting=csv.QUOTE_NONE)