def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("--output", "-o", type=str, required=False) parser.add_argument('dvid_server') parser.add_argument('uuid') parser.add_argument('labelmap_instance') parser.add_argument('assignment_json') args = parser.parse_args() if args.output is None: name, ext = os.path.splitext(args.assignment_json) args.output = name + '-adjusted' + ext instance_info = (args.dvid_server, args.uuid, args.labelmap_instance) with Timer(f"Processing {args.assignment_json}", logger): with open(args.assignment_json, 'r') as f: assignment_data = ujson.load(f) new_assignment_data = adjust_focused_points(*instance_info, assignment_data) with open(args.output, 'w') as f: ujson.dump(new_assignment_data, f, indent=2) logger.info(f"Wrote to {args.output}")
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '--expand-for-alignment', '-e', action='store_true', help='Auto-expand the given subvolume bounds to make it aligned.') parser.add_argument('--grayscale', '-g', action='store_true', help='Copy grayscale only') parser.add_argument('--segmentation', '-s', action='store_true', help='Copy segmentation only') parser.add_argument( 'offset_xyz', help="Starting offset for the subvolume, e.g. '25280, 42048, 48448'") parser.add_argument('shape_xyz', help="Shape of the subvolume, e.g. '512, 512, 512'") args = parser.parse_args() if not args.grayscale and not args.segmentation: args.grayscale = True args.segmentation = True from neuclease import configure_default_logging configure_default_logging() import re import numpy as np from neuclease.util import round_box args.offset_xyz = re.sub(r'\D', ' ', args.offset_xyz) args.shape_xyz = re.sub(r'\D', ' ', args.shape_xyz) offset_xyz = np.array([*map(int, args.offset_xyz.split())]) shape_xyz = np.array([*map(int, args.shape_xyz.split())]) box_xyz = np.array([offset_xyz, offset_xyz + shape_xyz]) box_zyx = box_xyz[:, ::-1] del box_xyz if args.expand_for_alignment: box_zyx = round_box(box_zyx, 64, 'out') shape_zyx = box_zyx[1] - box_zyx[0] logger.info( f"Expanded box to {box_zyx[:, ::-1].tolist()} (shape = {shape_zyx[::-1].tolist()})" ) elif (box_zyx % 64).any(): raise RuntimeError( "Only 64px block-aligned volumes can be copied.\n" "Adjust your offset/shape or try the --expand-for-alignment option." ) copy_vnc_subvolume(box_zyx, args.grayscale, args.segmentation)
def main(): configure_default_logging() parser = argparse.ArgumentParser() parser.add_argument('body_list_csv') parser.add_argument('src_server') parser.add_argument('src_uuid') parser.add_argument('src_tarsupervoxels_instance') parser.add_argument('dest_server') parser.add_argument('dest_uuid') parser.add_argument('dest_keyvalue_instance') args = parser.parse_args() body_list = read_csv_col(args.body_list_csv) src_info = (args.src_server, args.src_uuid, args.src_tarsupervoxels_instance) dest_info = (args.dest_server, args.dest_uuid, args.dest_keyvalue_instance) logger.info(f"Copying {len(body_list)}") failed_bodies = copy_meshes_to_keyvalue(src_info, dest_info, body_list) if failed_bodies: logger.warning(f"Writing {len(failed_bodies)} to failed-bodies.txt") pd.Series(failed_bodies).to_csv('failed-bodies.txt', index=False) logger.info("DONE.")
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--num-ranked-rois', '-r', type=int, default=5) parser.add_argument('--neuprint', default='neuprint.janelia.org') parser.add_argument('--voxel-col', help='Name of the input column that contains the voxel counts.') parser.add_argument('--exclude-none-roi', action='store_true') parser.add_argument('input_path') parser.add_argument('neuprint_dataset') parser.add_argument('output_path', nargs='?') args = parser.parse_args() if args.output_path is None: if args.exclude_none_roi: args.output_path = 'ranked-roi-table-excluding-non-neuropil.csv' else: args.output_path = 'ranked-roi-table.csv' from neuclease import configure_default_logging configure_default_logging() roistats_table(args.input_path, args.neuprint_dataset, args.voxel_col, args.num_ranked_rois, args.exclude_none_roi, args.neuprint, args.output_path)
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('table_csv') parser.add_argument('rois', nargs='+') args = parser.parse_args() configure_default_logging() rois = args.rois if len(args.rois) == 1 and args.rois[0].endswith('.csv'): roi_csv_path = args.rois[0] rois_df = pd.read_csv(roi_csv_path, header=0) if 'roi' not in rois_df.columns: sys.stderr.write( "If providing ROIs via CSV, the CSV must have a header row, with a column named 'roi'.\n" ) sys.exit(1) rois = list(rois_df['roi']) syn_ext = os.path.splitext(args.coordinate_csv)[1] logging.info(f"Reading {args.table_csv}") assert syn_ext == '.csv' points_df = pd.read_csv(args.table_csv, dtype=CSV_DTYPES) determine_point_rois
def _configure_worker_logging(): configure_default_logging() # Levels copied from defaults in distributed/config.py logging.getLogger('distributed.client').setLevel(logging.WARNING) logging.getLogger('bokeh').setLevel(logging.ERROR) logging.getLogger('tornado').setLevel(logging.CRITICAL) logging.getLogger('tornado.application').setLevel(logging.ERROR)
def main(): configure_default_logging() initialize_excepthook() logger.setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--last-mutid', '-i', required=False, type=int) parser.add_argument( '--num-threads', '-t', default=0, type=int, help= 'How many threads to use when ingesting label indexes (does not currently apply to mappings)' ) parser.add_argument( '--num-processes', '-p', default=0, type=int, help= 'How many processes to use when ingesting label indexes (does not currently apply to mappings)' ) parser.add_argument( '--batch-size', '-b', default=100_000, type=int, help= 'Data is grouped in batches to the server. This is the batch size, as measured in ROWS of data to be processed for each batch.' ) parser.add_argument('server') parser.add_argument('src_uuid') parser.add_argument('dest_uuid') parser.add_argument('labelmap_instance') parser.add_argument( 'supervoxel_block_stats_h5', nargs='?', # not required if only ingesting mapping help= f'An HDF5 file with a single dataset "stats", with dtype: {STATS_DTYPE[1:]} (Note: No column for body_id)' ) args = parser.parse_args() with Timer() as timer: src_info = (args.server, args.src_uuid, args.labelmap_instance) dest_info = (args.server, args.dest_uuid, args.labelmap_instance) erase_from_labelindexes(src_info, dest_info, args.supervoxel_block_stats_h5, args.batch_size, threads=args.num_threads, processes=args.num_processes, last_mutid=args.last_mutid) logger.info(f"DONE. Total time: {timer.timedelta}")
def main(): # Early exit if we're dumping the config # (Parse it ourselves to allow omission of otherwise required parameters.) if ({'--dump-config-template', '-d'} & {*sys.argv}): from confiddler import dump_default_config dump_default_config(config_schema(), sys.stdout) sys.exit(0) parser = argparse.ArgumentParser() parser.add_argument('--processes', '-p', type=int, default=0) parser.add_argument('--threads', '-t', type=int, default=0) parser.add_argument('--check-scale', '-s', type=int, default=0) parser.add_argument('--dump-config-template', '-d', action='store_true') parser.add_argument('--verify', '-v', action='store_true') parser.add_argument('config') parser.add_argument( 'stats_df_pkl', help='Mito statistics table, as produced by the MitoStats workflow.' 'Note: The coordinates must be provided in scale-0 units,' ' regardless of the check-scale you want to use!') args = parser.parse_args() if args.threads == 0 and args.processes == 0: args.threads = 1 elif (args.threads != 0) and (args.processes != 0): raise RuntimeError( "Can't use multi-threading and multi-processing. Pick one.") from neuclease import configure_default_logging configure_default_logging() from confiddler import load_config config = load_config(args.config, config_schema()) with open(args.stats_df_pkl, 'rb') as f: stats_df = pickle.load(f) stats_df = correct_centroids(config, stats_df, check_scale=args.check_scale, verify=args.verify, threads=args.threads, processes=args.processes) with open('corrected_stats_df.pkl', 'wb') as f: pickle.dump(stats_df, f, protocol=pickle.HIGHEST_PROTOCOL)
def main(): from neuclease import configure_default_logging configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '--output-path', '-o', default='{body}.obj', help= 'Output path. If processing multiple bodies, use {body} in the name. Default: "{body}.obj"' ) parser.add_argument( '--simplify', '-s', type=float, default=1.0, help= 'Optional decimation to apply before serialization, between 0.01 (most aggressive) and 1.0 (no decimation, the default).' ) parser.add_argument( '--drop-normals', action='store_true', help='Drop the normals from the mesh before serializing it.') parser.add_argument( '--rescale-factor', '-r', type=float, default=1.0, help='Multiply by this factor before writing the mesh ' '(e.g. ngmesh should be written at 1-nm resolution, so you should ' 'probably rescale by 8 for FlyEM FIBSEM data.)') parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('tarsupervoxels_instance') parser.add_argument('body', nargs='+') args = parser.parse_args() mesh_from_dvid_tarfile(args.server, args.uuid, args.tarsupervoxels_instance, args.body, args.simplify, args.drop_normals, args.rescale_factor, args.output_path) logger.info("DONE")
def main(): parser = argparse.ArgumentParser() parser.add_argument('--split-into-batches', type=int, help='If given, also split the body stats into this many batches of roughly equal size') parser.add_argument('server') parser.add_argument('src_uuid') parser.add_argument('labelmap_instance') parser.add_argument('supervoxel_block_stats_h5', help=f'An HDF5 file with a single dataset "stats", with dtype: {STATS_DTYPE[1:]} (Note: No column for body_id)') args = parser.parse_args() configure_default_logging() initialize_excepthook() (block_sv_stats, _presorted_by, _agglo_path) = load_stats_h5_to_records(args.supervoxel_block_stats_h5) src_info = (args.server, args.src_uuid, args.labelmap_instance) mapping = fetch_mappings(*src_info) assert isinstance(mapping, pd.Series) mapping_df = mapping.reset_index().rename(columns={'sv': 'segment_id', 'body': 'body_id'}) # sorts in-place, and saves a copy to hdf5 sort_block_stats( block_sv_stats, mapping_df, args.supervoxel_block_stats_h5[:-3] + '-sorted-by-body.h5', '<fetched-from-dvid>') if args.split_into_batches: num_batches = args.split_into_batches batch_size = int(np.ceil(len(block_sv_stats) / args.split_into_batches)) logger.info(f"Splitting into {args.split_into_batches} batches of size ~{batch_size}") os.makedirs('stats-batches', exist_ok=True) body_spans = groupby_spans_presorted(block_sv_stats['body_id'][:, None]) for batch_index, batch_spans in enumerate(tqdm_proxy(iter_batches(body_spans, batch_size))): span_start, span_stop = batch_spans[0][0], batch_spans[-1][1] batch_stats = block_sv_stats[span_start:span_stop] digits = int(np.ceil(np.log10(num_batches))) batch_path = ('stats-batches/stats-batch-{:0' + str(digits) + 'd}.h5').format(batch_index) save_stats(batch_stats, batch_path) logger.info("DONE sorting stats by body")
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('synapse_table') parser.add_argument('rois', nargs='+') args = parser.parse_args() configure_default_logging() syn_ext = os.path.splitext(args.synapse_table)[1] assert syn_ext in ('.npy', '.csv') logging.info(f"Reading {args.synapse_table}") synapse_df = load_synapses(args.synapse_table) check_in_rois(args.server, args.uuid, synapse_df, args.rois) logging.info("DONE")
def main(): configure_default_logging() parser = argparse.ArgumentParser() parser.add_argument('--kafka-log') parser.add_argument('--kafka-servers') parser.add_argument('--min-timestamp') parser.add_argument('--max-timestamp') parser.add_argument('--min-mutid', type=int) parser.add_argument('--max-mutid', type=int) parser.add_argument('--pause-between-splits', type=float, default=0.0) parser.add_argument('src_server') parser.add_argument('src_uuid') parser.add_argument('src_labelmap_instance') parser.add_argument('dest_server') parser.add_argument('dest_uuid') parser.add_argument('dest_labelmap_instance') args = parser.parse_args() src_seg = (args.src_server, args.src_uuid, args.src_labelmap_instance) dest_seg = (args.dest_server, args.dest_uuid, args.dest_labelmap_instance) # Fetch kafka log from src if none was provided from the command line if args.kafka_log is not None: with open(args.kafka_log, 'r') as f: kafka_msgs = ujson.load(f) else: if args.kafka_servers: args.kafka_servers = args.kafka_servers.split(',') kafka_msgs = read_kafka_messages(*src_seg, kafka_servers=args.kafka_servers) # Cache for later path = f'kafka-msgs-{args.src_uuid[:4]}-{args.src_labelmap_instance}.json' logger.info(f"Writing {path}") with open(path, 'w') as f: write_json_list(kafka_msgs, f) copy_splits_exact(*src_seg, *dest_seg, kafka_msgs, args.min_timestamp, args.max_timestamp, args.min_mutid, args.max_mutid, args.pause_between_splits)
def main(): configure_default_logging() parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--x-center', '-x', type=int, required=True, help="Which X-coordinate to center the analysis around. Should be a hot-knife boundary") parser.add_argument('--spacing-radius', '-r', type=int, default=8, help="How far away from the X-center coordinate the sample planes should be. Always specified in scale-0 pixels.") parser.add_argument('--tile-width', '-t', type=int, default=2048, help="Width of the tiles to use internally when fetching/analyzing the X-planes around the center coordinate. Specified in scale-0 pixels.") parser.add_argument('--min-overlap', '-m', type=int, default=100, help="Required overlap for returned edges, always given in scale-0 coordinates") parser.add_argument('--min-jaccard', '-j', type=float, default=0.8, help="Required jaccard for returned edges, given as a fraction between 0.0 and 1.0") parser.add_argument('--scale', '-s', type=int, default=2, help="At which scale to perform the analysis. (Regardless, results will always be given in scale-0 pixels.)") parser.add_argument('--mapping') parser.add_argument('--output', '-o', type=str, help="Where to write the output (as CSV)") parser.add_argument('dvid_server') parser.add_argument('uuid') parser.add_argument('labelmap_instance') args = parser.parse_args() instance_info = (args.dvid_server, args.uuid, args.labelmap_instance) tile_shape = 2*(args.tile_width,) if args.mapping is None: mapping = None else: mapping = np.load(args.mapping) assert mapping.shape[1] == 2 if args.output is None: args.output = f"hotknife-edges-x{args.x_center:05d}-r{args.spacing_radius}-m{args.min_overlap}-j{args.min_jaccard}-s{args.scale}.csv" edge_table = find_all_hotknife_edges_for_plane(*instance_info, args.x_center, tile_shape, args.spacing_radius, args.min_overlap, args.min_jaccard, scale=args.scale, mapping=mapping) with Timer(f"Writing to {args.output}", logger): edge_table.to_csv(args.output, index=False, header=True) logger.info("DONE.")
def main(): # Early exit if we're dumping the config # (Parse it ourselves to allow omission of otherwise required parameters.) if ({'--dump-config-template', '-d'} & {*sys.argv}): dump_default_config(ConfigSchema, sys.stdout, "yaml-with-comments") sys.exit(0) parser = argparse.ArgumentParser() parser.add_argument('--dump-config-template', '-d', action='store_true', help='Dump out a template yaml config file and exit.') parser.add_argument('--processes', '-p', type=int, default=1, help="Size of the process pool to use") parser.add_argument('config') parser.add_argument('body_id', type=int) args = parser.parse_args() import numpy as np from neuclease import configure_default_logging configure_default_logging() config = load_config(args.config, ConfigSchema) seg_src = [*config["segmentation"].values()] mito_cc_src = [*config["mito-objects"].values()] mito_class_src = [*config["mito-masks"].values()] stats_df = neuron_mito_stats(seg_src, mito_cc_src, mito_class_src, args.body_id, config["scale"], config["min-size"], config["centroid-adjustment-radius"], args.processes) csv_path = f"mito-stats-{args.body_id}-scale-{config['scale']}.csv" logger.info(f"Writing {csv_path}") stats_df.to_csv(csv_path, index=True, header=True) npy_path = f"mito-stats-{args.body_id}-scale-{config['scale']}.npy" logger.info(f"Writing {npy_path}") np.save(npy_path, stats_df.to_records(index=True)) logger.info("DONE")
def main(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-n', type=int, default=8) parser.add_argument('src') parser.add_argument('dest') args = parser.parse_args() args.src = abspath(args.src) args.dest = abspath(args.dest) if exists(args.dest): sys.exit(f"Error: Destination already exists: {args.dest}") from neuclease.util import compute_parallel, tqdm_proxy from neuclease import configure_default_logging configure_default_logging() os.chdir(args.src) logger.info("Listing source files") r = subprocess.run('find . -type f', shell=True, capture_output=True) src_paths = r.stdout.decode('utf-8').strip().split('\n') dest_paths = [f'{args.dest}/{p}' for p in src_paths] dest_dirs = sorted(set([*map(dirname, dest_paths)])) logger.info("Initializing directory tree") for d in tqdm_proxy(dest_dirs): os.makedirs(d, exist_ok=True) logger.info(f"Copying {len(src_paths)} files") compute_parallel(copyfile, [*zip(src_paths, dest_paths)], 10, starmap=True, ordered=False, processes=args.n) logger.info("DONE")
def main(): """ Command-line wrapper interface for ingest_label_indexes(), and/or ingest_mapping(), below. """ configure_default_logging() initialize_excepthook() logger.setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--last-mutid', '-i', required=False, type=int) parser.add_argument('--check-mismatches', action='store_true', help='If given, every LabelIndex will be compared with the existing LabelIndex on the server, and only the mismatching ones will be sent.') parser.add_argument('--agglomeration-mapping', '-m', required=False, help='Either a UUID to pull the mapping from, or a CSV file (or .npy file) with two columns, mapping supervoxels to agglomerated bodies. Any missing entries implicitly identity-mapped.') parser.add_argument('--operation', default='indexes', choices=['indexes', 'mappings', 'both', 'sort-only'], help='Whether to load the LabelIndices, MappingOps, or both. If sort-only, sort/save the stats and exit.') parser.add_argument('--subset-labels', required=False, help='CSV file with a single column of label IDs to write LabelIndexes for.' 'Other labels found in the mapping and or block stats h5 file will be ignored. ' 'NOTE: Whether or not the label ids are interpreted as supervoxels or bodies depends on whether or not --agglomeration-mapping was provided.') parser.add_argument('--tombstones', default='include', choices=['include', 'exclude', 'only'], help="Whether to include 'tombstones' in the labelindexes (i.e. explicitly send empty labelindexes for all supervoxels in a body that don't match the body-id). " "Options are 'include', 'exclude', or 'only' (i.e. send only the tombstones and not the actual labelindices)") parser.add_argument('--num-threads', '-n', default=1, type=int, help='How many threads to use when ingesting label indexes (does not currently apply to mappings)') parser.add_argument('--batch-size', '-b', default=20_000, type=int, help='Data is grouped in batches to the server. This is the batch size, as measured in ROWS of data to be processed for each batch.') parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('labelmap_instance') parser.add_argument('supervoxel_block_stats_h5', nargs='?', # not required if only ingesting mapping help=f'An HDF5 file with a single dataset "stats", with dtype: {STATS_DTYPE[1:]} (Note: No column for body_id)') args = parser.parse_args() with Timer() as timer: main_impl(args) logger.info(f"DONE. Total time: {timer.timedelta}")
# This is our main result: mito IDs (and their sizes) mito_sizes = mito_sizes.loc[mito_sizes >= MIN_MITO_SIZE] # Just for extra info, group the mitos we found into connected components. mito_mask = mask_for_labels(mito_seg, mito_sizes.index) mito_box = compute_nonzero_box(mito_mask) mito_mask = extract_subvol(mito_mask, mito_box) mito_seg = extract_subvol(mito_seg, mito_box) mito_cc = label(mito_mask, connectivity=1) ct = contingency_table(mito_seg, mito_cc).reset_index() ct = ct.rename(columns={ 'left': 'mito', 'right': 'cc', 'voxel_count': 'cc_size' }) ct = ct.set_index('mito') mito_sizes = pd.DataFrame(mito_sizes).merge(ct, 'left', left_index=True, right_index=True) return mito_sizes if __name__ == "__main__": from neuclease import configure_default_logging configure_default_logging() #import os #os.chdir('/Users/bergs/Documents/FlyEM/mito-project/proofreading/mito-count') #sys.argv.append('mito-count-results.pkl') main()
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--smoothing-iterations', '-s', type=int, default=0) parser.add_argument('--decimation-fraction', '-d', type=float, default=1.0) parser.add_argument('--format', '-f', choices=['drc', 'obj']) parser.add_argument('--output-path', '-o', help='Optional. Must end with .obj or .drc') parser.add_argument( '--tarsupervoxels-instance', '-t', type=str, help= 'Optional. The name of a tarsupervoxels instance to post the mesh to, e.g. "segmenation_sv_meshes".' ) parser.add_argument( '--max-bounding-box-voxels', '-m', type=float, default=DEFAULT_MAX_BOUNDING_BOX_VOL, help= "Optional. Attempt to ensure that the downlaoded mask's bounding box will not exceed this volume." " (A high scale is used if necessary.)") parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('segmentation_instance') parser.add_argument('supervoxel_id', type=np.uint64) args = parser.parse_args() if not args.output_path and not args.tarsupervoxels_instance: sys.stderr.write( "Nothing to do: You must specify either an output path or a tarsupervoxels instance\n" ) sys.exit(1) if args.output_path and args.format: if args.format != os.path.splitext(args.output_path)[1][1:]: sys.exit( f"Specified format ({args.format}) conflicts with output filename." ) sys.exit(1) if args.output_path: args.format = os.path.splitext(args.output_path)[1][1:] elif not args.format: args.format = 'drc' # default # Fetch supervoxel mask and generate mesh mesh = sv_to_mesh(args.server, args.uuid, args.segmentation_instance, args.supervoxel_id, args.smoothing_iterations, args.decimation_fraction, args.max_bounding_box_voxels) # Serialize to a buffer (either .obj or .drc) logger.info(f"Serializing to {args.format}") mesh_bytes = mesh.serialize(fmt=args.format) # Write to file if args.output_path: logger.info(f"Writing {args.output_path}") with open(args.output_path, 'wb') as f: f.write(mesh_bytes) # Send to DVID if args.tarsupervoxels_instance: logger.info( f"Posting to {args.server} / {args.uuid} / {args.tarsupervoxels_instance}" ) post_supervoxel(args.server, args.uuid, args.tarsupervoxels_instance, args.supervoxel_id, mesh_bytes) logger.info("DONE.")
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) # Workflow info parser.add_argument('--list-workflows', '-w', action="store_true", help="List all built-in workflows and exit.") # Schema/config info parser.add_argument( '--dump-schema', '-s', help="dump config schema for the given workflow (as json)") parser.add_argument( '--dump-default-yaml', '-y', help="Dump default config values for the given workflow (as yaml)") parser.add_argument( '--dump-default-verbose-yaml', '-v', help= "Dump default config values for the given workflow (as yaml), commented with field descriptions." ) parser.add_argument( '--dump-complete-config', '-c', action='store_true', help= "Load the config from the given template dir, inject default values for missing settings, " "and dump the resulting complete config. (Do not execute the workflow.)" ) # Launch parameters parser.add_argument( '--num-workers', '-n', type=int, default=1, help= 'Number of workers to launch (i.e. each worker is launched with a single bsub command)' ) parser.add_argument( '--pause-before-exit', '-p', action='store_true', help= "Pause before exiting, to allow you to inspect the dask dashboard before it is shut down." ) parser.add_argument( 'template_dir', nargs='?', help='A template directory with a workflow.yaml file ' '(and possibly other files/scripts to be used by the workflow.)') args = parser.parse_args() if args.list_workflows: print("Built-in workflows:\n") for w in BUILTIN_WORKFLOWS: if w is not Workflow: print(f" {w.__name__}") print( "\nTo run a third-party workflow, use a fully-qualified class name in workflow.yaml.\n" ) print( "Example:\n\n workflow-name: mypackage.mymodule.MyWorkflowSubclass\n" ) sys.exit(0) if args.dump_schema: workflow_cls = Workflow.get_workflow_cls(args.dump_schema, True) print(json.dumps(workflow_cls.schema(), indent=2)) sys.exit(0) if args.dump_default_yaml: workflow_name = args.dump_default_yaml workflow_cls = Workflow.get_workflow_cls(workflow_name, True) schema = copy.deepcopy(workflow_cls.schema()) schema["properties"]["workflow-name"]["default"] = workflow_name.lower( ) dump_default_config(schema, sys.stdout, 'yaml') sys.exit(0) if args.dump_default_verbose_yaml: worfklow_name = args.dump_default_verbose_yaml workflow_cls = Workflow.get_workflow_cls(worfklow_name, True) schema = copy.deepcopy(workflow_cls.schema()) schema["properties"]["workflow-name"]["default"] = worfklow_name.lower( ) dump_default_config(schema, sys.stdout, 'yaml-with-comments') sys.exit(0) if not args.template_dir: print("Error: No config directory specified. Exiting.", file=sys.stderr) parser.print_help(sys.stderr) sys.exit(1) if not os.path.exists(args.template_dir): print(f"Error: template directory does not exist: {args.template_dir}", file=sys.stderr) sys.exit(1) if not os.path.isdir(args.template_dir): print( f"Error: Given template directory path is a file, not a directory: {args.template_dir}", file=sys.stderr) sys.exit(1) if args.dump_complete_config: workflow_cls, config_data = Workflow.load_workflow_config( args.template_dir) dump_config(config_data, sys.stdout) sys.exit(0) # Execute the workflow workflow = None try: _exc_dir, workflow = launch_flow(args.template_dir, args.num_workers, not args.pause_before_exit) except: if args.pause_before_exit: import traceback traceback.print_exc() else: raise finally: if args.pause_before_exit: logger.info( "Workflow complete, but pausing now due to --pause-before-exit. Hit Ctrl+C to exit." ) try: while True: time.sleep(1.0) except KeyboardInterrupt: pass # Workflow must not be deleted until we're ready to exit. if workflow: del workflow
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--no-downres', action='store_true') parser.add_argument('--only-within-roi') parser.add_argument('--not-within-roi') parser.add_argument('dvid_server') parser.add_argument('uuid') parser.add_argument('labelmap_instance') parser.add_argument('sparsevol_files', nargs='+') args = parser.parse_args() instance_info = (args.dvid_server, args.uuid, args.labelmap_instance) assert not args.only_within_roi or not args.not_within_roi, \ "Can't supply both --only-within-roi and --not-within-roi. Pick one or the other (or neither)." roi = args.only_within_roi or args.not_within_roi invert_roi = (args.not_within_roi is not None) if roi: roi_mask, mask_box = fetch_roi(args.dvid_server, args.uuid, roi, format='mask') roi_sbm = SparseBlockMask(roi_mask, mask_box * (2**5), 2**5) # ROIs are provided at scale 5 else: roi_sbm = None # Ideally, we would choose the max label for the node we're writing to, # but the /maxlabel endpoint doesn't work for all nodes # instead, we'll use the repo-wide maxlabel from the /info JSON. #maxlabel = fetch_maxlabel(args.dvid_server, args.uuid, args.labelmap_instance) maxlabel = fetch_instance_info( args.dvid_server, args.uuid, args.labelmap_instance)["Extended"]["MaxRepoLabel"] for i, path in enumerate(args.sparsevol_files): maxlabel += 1 name = os.path.split(path)[1] prefix_logger = PrefixedLogger(logger, f"Vol #{i:02d} {name}: ") with Timer(f"Pasting {name} as {maxlabel}", logger): overwritten_labels = overwrite_sparsevol(*instance_info, maxlabel, path, roi_sbm, invert_roi, args.no_downres, prefix_logger) results_path = os.path.splitext(path)[0] + '.json' with open(results_path, 'w') as f: results = { 'new-label': maxlabel, 'overwritten_labels': sorted(overwritten_labels) } json.dump(results, f, indent=2, cls=NumpyConvertingEncoder) logger.info(f"Done.")
def main(): args = parse_args() signal.signal(signal.SIGHUP, handle_signal) signal.signal(signal.SIGTERM, handle_signal) # Late imports so --help works quickly from requests import HTTPError from neuclease import configure_default_logging from neuclease.logging_setup import ExceptionLogger from neuclease.dvid import reset_kafka_offset, read_labelmap_kafka_df, filter_kafka_msgs_by_timerange configure_default_logging() logger.info(' '.join(sys.argv)) logger.info(f"Running as PID {os.getpid()}") # # Defaults # if args.starting_timestamp is None: args.starting_timestamp = datetime.now() if not args.cwd: args.cwd = os.getcwd() if not args.conda_path: r = subprocess.run('which conda', shell=True, capture_output=True, check=True) args.conda_path = r.stdout.decode('utf-8').strip() if not args.conda_env: # TODO: Test that the conda environment works args.conda_env = os.environ["CONDA_DEFAULT_ENV"] assert args.conda_env != "base", "Don't use the base conda environment!" # # Initialize ssh connection # if args.submit_locally: c = None else: c = init_ssh_connection(args.submission_node, args.ask_for_password) # # Check existence of template directory on submission node # try: run_cmd(c, f'cd {args.cwd} && ls -d {args.template_dir}', log_stdout=False) except Exception: raise RuntimeError( f"Your template directory {args.template_dir} is not accessible from {args.cwd}" ) # # Load workflow config to determine DVID info. # seg_instance, body_csv = parse_workflow_config(args.template_dir) # # Kafka setup # group_id = f'mesh update daemon {seg_instance[0]} {seg_instance[1]} {args.starting_timestamp}' if args.kafka_group_id_suffix: group_id += ' ' + args.kafka_group_id_suffix if args.reset_kafka_offset: reset_kafka_offset(*seg_instance, group_id) # # Main loop # while True: try: with ExceptionLogger(logger) as el: msgs_df = read_labelmap_kafka_df(*seg_instance, drop_completes=True, group_id=group_id) msgs_df = filter_kafka_msgs_by_timerange( msgs_df, args.starting_timestamp) extract_body_ids_and_launch(c, args, seg_instance, body_csv, msgs_df) need_kafka_reset = False except HTTPError: msg = ( "Failed to process mesh job. (See traceback above.) " "Will reset kafka offset and try again at the next interval.") logger.warning(msg) send_error_email(el.last_traceback + '\n' + msg, args.email_on_error) need_kafka_reset = True # TODO: Get feedback on successful/failed runs, and restart failed jobs # (or accumulate their body lists into the next job) time.sleep(60 * args.interval) if need_kafka_reset: need_kafka_reset = False try: reset_kafka_offset(*seg_instance, group_id) except HTTPError: pass
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '--use-mapping', action='store_true', help= 'Use in-memory map + /exists instead of /missing, as described in the general help text above.' ) parser.add_argument( '--output', '-o', default='missing-from-tsv.csv', help='Where to write the output CSV (default: missing-from-tsv.csv)') parser.add_argument( '--kafka-timestamp', '-k', type=str, help='Alternative to providing your own bodies list.\n' 'Use the kafka log automatically determine the list of bodies that have changed after the given timestamp.\n' 'Examples: -k="2018-11-22" -k="2018-11-22 17:34:00"') parser.add_argument('server', help='dvid server, e.g. emdata3:8900') parser.add_argument( 'uuid', help= 'dvid node to analyze or "master" for the latest master branch uuid') parser.add_argument( 'tsv_instance', help="Name of a tarsupervoxels instance, e.g. segmentation_sv_meshes.\n" "Must be sync'd to a labelmap (segmentation) instance.") parser.add_argument( 'bodies_csv', nargs='?', help='CSV containing a column named "body", which will be read.\n' 'If no "body" column exists, the first column is used, regardless of the name.\n' '(Omit this arg if you are using --kafka-timestamp)') args = parser.parse_args() if not (bool(args.kafka_timestamp) ^ bool(args.bodies_csv)): print( "You must provide either --kafka-timestamp or a bodies list (not both)", file=sys.stderr) sys.exit(1) if args.uuid == "master": args.uuid = find_master(args.server) # Determine segmentation instance info = fetch_instance_info(args.server, args.uuid, args.tsv_instance) seg_instance = info["Base"]["Syncs"][0] kafka_msgs = None if args.bodies_csv: if 'body' in read_csv_header(args.bodies_csv): bodies = pd.read_csv(args.bodies_csv)['body'].drop_duplicates() else: # Just read the first column, no matter what it's named bodies = read_csv_col(args.bodies_csv, 0, np.uint64).drop_duplicates() elif args.kafka_timestamp: # Validate timestamp format before fetching kafka log, which takes a while. parse_timestamp(args.kafka_timestamp) kafka_msgs = read_kafka_messages(args.server, args.uuid, seg_instance) filtered_kafka_msgs = filter_kafka_msgs_by_timerange( kafka_msgs, min_timestamp=args.kafka_timestamp) new_bodies, changed_bodies, _removed_bodies, new_supervoxels, _deleted_svs = compute_affected_bodies( filtered_kafka_msgs) sv_split_bodies = set( fetch_mapping(args.server, args.uuid, seg_instance, new_supervoxels)) - set([0]) bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies)) bodies = np.fromiter(bodies, np.uint64) bodies.sort() else: raise AssertionError("Shouldn't get here.") if args.use_mapping: missing_entries = check_tarsupervoxels_status_via_exists( args.server, args.uuid, args.tsv_instance, bodies, seg_instance, kafka_msgs=kafka_msgs) else: missing_entries = check_tarsupervoxels_status_via_missing( args.server, args.uuid, args.tsv_instance, bodies) logger.info(f"Writing to {args.output}") missing_entries.to_csv(args.output, index=True, header=True) logging.info("DONE")
def main(): configure_default_logging() parser = argparse.ArgumentParser() parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('instance') parser.add_argument('block_stats') args = parser.parse_args() seg_instance = (args.server, args.uuid, args.instance) from flyemflows.bin.ingest_label_indexes import load_stats_h5_to_records with Timer("Loading block stats", logger): (block_sv_stats, _presorted_by, _agglo_path) = load_stats_h5_to_records('block-statistics.h5') stats_df = pd.DataFrame(block_sv_stats) stats_df = stats_df[['z', 'y', 'x', 'segment_id', 'count']] stats_df = stats_df.rename(columns={'segment_id': 'sv'}) # Keep only the new supervoxels. stats_df = stats_df.query('sv > @NEW_SV_THRESHOLD').copy() with Timer("Fetching old labelindex", logger): labelindex = fetch_labelindex(*seg_instance, 106979579, format='protobuf') with Timer("Extracting labelindex table", logger): old_df = convert_labelindex_to_pandas(labelindex).blocks with Timer("Patching labelindex table", logger): # Discard old supervoxel stats within patched area in_patch = (old_df[['z', 'y', 'x']].values >= PATCH_BOX_ZYX[0]).all(axis=1) in_patch &= (old_df[['z', 'y', 'x']].values < PATCH_BOX_ZYX[1]).all(axis=1) old_df['in_patch'] = in_patch unpatched_df = old_df.query('not (in_patch and sv == @FRANKENBODY_SV)').copy() del unpatched_df['in_patch'] # Append new stats new_df = pd.concat((unpatched_df, stats_df), ignore_index=True) new_df = new_df.sort_values(['z', 'y', 'x', 'sv']) np.save('old_df.npy', old_df.to_records(index=False)) np.save('new_df.npy', new_df.to_records(index=False)) if old_df['count'].sum() != new_df['count'].sum(): logger.warning("Old and new indexes do not have the same total counts. See old_df.npy and new_df.npy") with Timer("Constructing new labelindex", logger): last_mutid = fetch_repo_info(*seg_instance[:2])["MutationID"] mod_time = datetime.datetime.now().isoformat() new_li = PandasLabelIndex(new_df, FRANKENBODY_SV, last_mutid, mod_time, os.environ.get("USER", "unknown")) new_labelindex = create_labelindex(new_li) with Timer("Posting new labelindex", logger): post_labelindex(*seg_instance, FRANKENBODY_SV, new_labelindex) with Timer("Posting updated mapping", logger): new_mapping = pd.Series(FRANKENBODY_SV, index=new_df['sv'].unique(), dtype=np.uint64, name='body') post_mappings(*seg_instance, new_mapping, last_mutid) logger.info("DONE")
def main(): configure_default_logging() parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--fraction', type=float, help='Fraction of vertices to retain in the decimated mesh. Between 0.0 and 1.0') parser.add_argument('--max-vertices', type=float, default=1e9, help='If necessary, decimate the mesh even further so that it has no more than this vertex count (approximately).') parser.add_argument('--format', help='Either obj or drc', required=True) parser.add_argument('--rescale', type=float, help='Multiply all vertex coordinates by this factor before storing the mesh. Important for writing to ngmesh format.') parser.add_argument('--output-directory', '-d', help='Directory to dump decimated meshes.') parser.add_argument('--output-url', '-u', help='DVID keyvalue instance to write decimated mesh files to, ' 'specified as a complete URL, e.g. http://emdata1:8000/api/node/123abc/my-meshes') parser.add_argument('server', help='dvid server, e.g. emdata3:8900') parser.add_argument('uuid', help='dvid node') parser.add_argument('tsv_instance', help='name of a tarsupervoxels instance, e.g. segmentation_sv_meshes') parser.add_argument('bodies', nargs='+', help='A list of body IDs OR a path to a CSV containing a column named "body", which will be read.\n' 'If no "body" column exists, the first column is used, regardless of the name.') args = parser.parse_args() if args.fraction is None: raise RuntimeError("Please specify a decimation fraction.") if args.format is None: raise RuntimeError("Please specify an output format (either 'drc' or 'obj' via --format") if args.output_directory: os.makedirs(args.output_directory, exist_ok=True) if args.format == "ngmesh" and args.rescale is None: raise RuntimeError("When writing to ngmesh, please specify an explict rescale factor.") args.rescale = args.rescale or 1.0 output_dvid = None if args.output_url: if '/api/node' not in args.output_url: raise RuntimeError("Please specify the output instance as a complete URL, " "e.g. http://emdata1:8000/api/node/123abc/my-meshes") # drop 'http://' (if present) url = args.output_url.split('://')[-1] parts = url.split('/') assert parts[1] == 'api' assert parts[2] == 'node' output_server = parts[0] output_uuid = parts[3] output_instance = parts[4] output_dvid = (output_server, output_uuid, output_instance) all_bodies = [] for body in args.bodies: if body.endswith('.csv'): if 'body' in read_csv_header(body): bodies = pd.read_csv(body)['body'].drop_duplicates() else: # Just read the first column, no matter what it's named bodies = read_csv_col(body, 0, np.uint64).drop_duplicates() else: try: body = int(body) except ValueError: raise RuntimeError(f"Invalid body ID: '{body}'") all_bodies.extend(bodies) for body_id in tqdm_proxy(all_bodies): output_path = None if args.output_directory: output_path = f'{args.output_directory}/{body_id}.{args.format}' decimate_existing_mesh(args.server, args.uuid, args.tsv_instance, body_id, args.fraction, args.max_vertices, args.rescale, args.format, output_path, output_dvid)
def main(): # Early exit if we're dumping the config # (Parse it ourselves to allow omission of otherwise required parameters.) if ({'--dump-config-template', '-d'} & {*sys.argv}): dump_default_config(ConfigSchema, sys.stdout, "yaml-with-comments") sys.exit(0) parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--dump-config-template', '-d', action='store_true', help='Dump out a template yaml config file and exit.') parser.add_argument('--count', '-c', type=int, help='How many points to generate.') parser.add_argument('--roi', '-r', help='Limit points to the given ROI.') parser.add_argument('--body', '-b', type=int, help='Limit points to the given body.') parser.add_argument( '--tbars', '-t', action='store_true', help= 'If given, limit points to the tbars of the given body, from the "synapses" instance in the input UUID.' ) parser.add_argument( '--skeleton', '-s', action='store_true', help= 'If given, choose the points from the nodes of the skeleton for the given body.' ) parser.add_argument( '--generate-points-only', '-g', action='store_true', help= "If given, generate the points list, but don't write neighborhood segmentations" ) parser.add_argument( '--points', '-p', help= 'A CSV file containing the points to use instead of automatically generating them.' ) parser.add_argument( '--ng-links', '-n', action='store_true', help='If given, include neuroglancer links in the output CSV.' 'Your config should specify the basic neuroglancer view settings; only the "position" will be overwritten in each link.' ) parser.add_argument('config') args = parser.parse_args() configure_default_logging() config = load_config(args.config, ConfigSchema) update_ng_settings(config) input_seg = [*config["input"].values()] output_seg = [*config["output"].values()] radius = config["radius"] random_seed = config["random-seed"] if config["enforce-minimum-distance"]: minimum_distance = 2 * radius else: minimum_distance = 0 if args.points and any( [args.count, args.roi, args.body, args.tbars, args.skeleton]): msg = ("If you're providing your own list of points, you shouldn't" " specify any of the auto-generation arguments, such as" " --count --roi --body --tbars") sys.exit(msg) if not args.points and not any( [args.count, args.roi, args.body, args.tbars, args.skeleton]): msg = "You must provide a list of points or specify how to auto-generate them." sys.exit(msg) if args.points: assert args.points.endswith('.csv') name, _ = os.path.splitext(args.points) output_path = name + '-neighborhoods.csv' points = pd.read_csv(args.points) else: points = autogen_points(input_seg, args.count, args.roi, args.body, args.tbars, args.skeleton, random_seed, minimum_distance) uuid = input_seg[1] output_path = f'neighborhoods-from-{uuid[:6]}' if not any([args.roi, args.body, args.tbars, args.skeleton]): output_path += input_seg[2] else: if args.roi: output_path += f'-{args.roi}' if args.body: output_path += f'-{args.body}' if args.tbars: output_path += '-tbars' if args.skeleton: output_path += '-skeleton' assignment_path = output_path + '.json' csv_path = output_path + '.csv' kd = scipy.spatial.cKDTree(points[[*'zyx']].values) if len(kd.query_pairs(2 * radius)) > 0: msg = ( "Some of the chosen points are closer to each other than 2x the " f"configured radius ({radius}). Their neighborhood segments may " "be mangled in the output.") logger.warning(msg) cols = [*'xyz'] + list({*points.columns} - {*'xyz'}) points = points[cols] if args.generate_points_only: add_link_col(points, config) export_as_html(points, csv_path) if not args.ng_links: del points['link'] points.to_csv(csv_path, index=False, header=True, quoting=csv.QUOTE_NONE) sys.exit(0) try: input_info = fetch_instance_info(*input_seg) except Exception: sys.exit( f"Couldn't find input segmentation instance: {' / '.join(input_seg)}" ) try: fetch_instance_info(*output_seg) except Exception: logger.info( f"Output labelmap not found. Creating new label instance: {' / '.join(output_seg)}" ) # Copy details from input instance. # But only provide a single value for each, even though the info provides three. # Otherwise, DVID kicks back errors like this: # Setting for 'VoxelUnits' was not a string: [nanometers nanometers nanometers] settings = { 'block_size': input_info['Extended']['BlockSize'][0], 'voxel_size': input_info['Extended']['VoxelSize'][0], 'voxel_units': input_info['Extended']['VoxelUnits'][0], 'max_scale': input_info['Extended']['MaxDownresLevel'] } create_labelmap_instance(*output_seg, **settings) # Also create keyvalue for meshes create_instance(*output_seg[:2], output_seg[2] + '_meshes', 'keyvalue') results_df = write_point_neighborhoods(input_seg, output_seg, points, radius, args.body) add_link_col(results_df, config) export_as_html(results_df, csv_path) write_assignment_file(output_seg, results_df, assignment_path, config) if not args.ng_links: del results_df['link'] results_df.to_csv(csv_path, index=False, header=True, quoting=csv.QUOTE_NONE)