def test_read_csv_col_noheader(): tmpfile = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', prefix='test_read_csv_col') tmpfile.write(textwrap.dedent("""\ 0,1,2 3,4,5 """)) tmpfile.flush() col0 = read_csv_col(tmpfile.name) assert (col0 == [0, 3]).all() assert col0.name is None col1 = read_csv_col(tmpfile.name, 1) assert (col1 == [1, 4]).all() assert col1.name is None col2 = read_csv_col(tmpfile.name, 2) assert (col2 == [2, 5]).all() assert col2.name is None
def main(): handler = logging.StreamHandler(sys.stdout) logger.setLevel(logging.INFO) logging.getLogger().addHandler(handler) parser = argparse.ArgumentParser() parser.add_argument('--results-output-log', '-o', default='split-copy-results-log.csv') parser.add_argument('--src-supervoxels-csv', required=False) parser.add_argument('--src-supervoxels-from-kafka', action='store_true') parser.add_argument('src_server') parser.add_argument('src_uuid') parser.add_argument('src_labelmap_instance') parser.add_argument('dest_server') parser.add_argument('dest_uuid') parser.add_argument('dest_labelmap_instance') args = parser.parse_args() src_info = InstanceInfo(args.src_server, args.src_uuid, args.src_labelmap_instance) dest_info = InstanceInfo(args.dest_server, args.dest_uuid, args.dest_labelmap_instance) if not ((args.src_supervoxels_csv is not None) ^ args.src_supervoxels_from_kafka): print("You must select either CSV or Kafka (not both)", file=sys.stderr) sys.exit(1) if args.src_supervoxels_csv: src_supervoxels = read_csv_col(args.src_supervoxels_csv, col=0, dtype=np.uint64) else: src_supervoxels = read_src_supervoxels_from_kafka(src_info) if len(src_supervoxels) == 0: logger.error("Error: No source supervoxels provided!") sys.exit(1) copy_results = copy_splits(src_supervoxels, src_info, dest_info) df = pd.DataFrame( np.array(copy_results, dtype=np.uint64), columns=['src_sv', 'overwritten_sv', 'split_sv', 'remain_sv']) df.to_csv(args.results_output_log, index=False, header=True) print(f"Saved results log to {args.results_output_log}") logger.info("Done.")
def load_body_list(config_data, is_supervoxels): if isinstance(config_data, list): return np.array(config_data, dtype=np.uint64) bodies_csv = config_data del config_data assert os.path.exists(bodies_csv), \ f"CSV file does not exist: {bodies_csv}" if is_supervoxels: col = 'sv' else: col = 'body' if col in read_csv_header(bodies_csv): bodies = pd.read_csv(bodies_csv)[col].drop_duplicates() else: # Just read the first column, no matter what it's named logger.warning( f"No column named {col}, so reading first column instead") bodies = read_csv_col(bodies_csv, 0, np.uint64).drop_duplicates() return bodies.values.astype(np.uint64)
def main(): configure_default_logging() parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( '--use-mapping', action='store_true', help= 'Use in-memory map + /exists instead of /missing, as described in the general help text above.' ) parser.add_argument( '--output', '-o', default='missing-from-tsv.csv', help='Where to write the output CSV (default: missing-from-tsv.csv)') parser.add_argument( '--kafka-timestamp', '-k', type=str, help='Alternative to providing your own bodies list.\n' 'Use the kafka log automatically determine the list of bodies that have changed after the given timestamp.\n' 'Examples: -k="2018-11-22" -k="2018-11-22 17:34:00"') parser.add_argument('server', help='dvid server, e.g. emdata3:8900') parser.add_argument( 'uuid', help= 'dvid node to analyze or "master" for the latest master branch uuid') parser.add_argument( 'tsv_instance', help="Name of a tarsupervoxels instance, e.g. segmentation_sv_meshes.\n" "Must be sync'd to a labelmap (segmentation) instance.") parser.add_argument( 'bodies_csv', nargs='?', help='CSV containing a column named "body", which will be read.\n' 'If no "body" column exists, the first column is used, regardless of the name.\n' '(Omit this arg if you are using --kafka-timestamp)') args = parser.parse_args() if not (bool(args.kafka_timestamp) ^ bool(args.bodies_csv)): print( "You must provide either --kafka-timestamp or a bodies list (not both)", file=sys.stderr) sys.exit(1) if args.uuid == "master": args.uuid = find_master(args.server) # Determine segmentation instance info = fetch_instance_info(args.server, args.uuid, args.tsv_instance) seg_instance = info["Base"]["Syncs"][0] kafka_msgs = None if args.bodies_csv: if 'body' in read_csv_header(args.bodies_csv): bodies = pd.read_csv(args.bodies_csv)['body'].drop_duplicates() else: # Just read the first column, no matter what it's named bodies = read_csv_col(args.bodies_csv, 0, np.uint64).drop_duplicates() elif args.kafka_timestamp: # Validate timestamp format before fetching kafka log, which takes a while. parse_timestamp(args.kafka_timestamp) kafka_msgs = read_kafka_messages(args.server, args.uuid, seg_instance) filtered_kafka_msgs = filter_kafka_msgs_by_timerange( kafka_msgs, min_timestamp=args.kafka_timestamp) new_bodies, changed_bodies, _removed_bodies, new_supervoxels, _deleted_svs = compute_affected_bodies( filtered_kafka_msgs) sv_split_bodies = set( fetch_mapping(args.server, args.uuid, seg_instance, new_supervoxels)) - set([0]) bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies)) bodies = np.fromiter(bodies, np.uint64) bodies.sort() else: raise AssertionError("Shouldn't get here.") if args.use_mapping: missing_entries = check_tarsupervoxels_status_via_exists( args.server, args.uuid, args.tsv_instance, bodies, seg_instance, kafka_msgs=kafka_msgs) else: missing_entries = check_tarsupervoxels_status_via_missing( args.server, args.uuid, args.tsv_instance, bodies) logger.info(f"Writing to {args.output}") missing_entries.to_csv(args.output, index=True, header=True) logging.info("DONE")
def main(): configure_default_logging() parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--fraction', type=float, help='Fraction of vertices to retain in the decimated mesh. Between 0.0 and 1.0') parser.add_argument('--max-vertices', type=float, default=1e9, help='If necessary, decimate the mesh even further so that it has no more than this vertex count (approximately).') parser.add_argument('--format', help='Either obj or drc', required=True) parser.add_argument('--rescale', type=float, help='Multiply all vertex coordinates by this factor before storing the mesh. Important for writing to ngmesh format.') parser.add_argument('--output-directory', '-d', help='Directory to dump decimated meshes.') parser.add_argument('--output-url', '-u', help='DVID keyvalue instance to write decimated mesh files to, ' 'specified as a complete URL, e.g. http://emdata1:8000/api/node/123abc/my-meshes') parser.add_argument('server', help='dvid server, e.g. emdata3:8900') parser.add_argument('uuid', help='dvid node') parser.add_argument('tsv_instance', help='name of a tarsupervoxels instance, e.g. segmentation_sv_meshes') parser.add_argument('bodies', nargs='+', help='A list of body IDs OR a path to a CSV containing a column named "body", which will be read.\n' 'If no "body" column exists, the first column is used, regardless of the name.') args = parser.parse_args() if args.fraction is None: raise RuntimeError("Please specify a decimation fraction.") if args.format is None: raise RuntimeError("Please specify an output format (either 'drc' or 'obj' via --format") if args.output_directory: os.makedirs(args.output_directory, exist_ok=True) if args.format == "ngmesh" and args.rescale is None: raise RuntimeError("When writing to ngmesh, please specify an explict rescale factor.") args.rescale = args.rescale or 1.0 output_dvid = None if args.output_url: if '/api/node' not in args.output_url: raise RuntimeError("Please specify the output instance as a complete URL, " "e.g. http://emdata1:8000/api/node/123abc/my-meshes") # drop 'http://' (if present) url = args.output_url.split('://')[-1] parts = url.split('/') assert parts[1] == 'api' assert parts[2] == 'node' output_server = parts[0] output_uuid = parts[3] output_instance = parts[4] output_dvid = (output_server, output_uuid, output_instance) all_bodies = [] for body in args.bodies: if body.endswith('.csv'): if 'body' in read_csv_header(body): bodies = pd.read_csv(body)['body'].drop_duplicates() else: # Just read the first column, no matter what it's named bodies = read_csv_col(body, 0, np.uint64).drop_duplicates() else: try: body = int(body) except ValueError: raise RuntimeError(f"Invalid body ID: '{body}'") all_bodies.extend(bodies) for body_id in tqdm_proxy(all_bodies): output_path = None if args.output_directory: output_path = f'{args.output_directory}/{body_id}.{args.format}' decimate_existing_mesh(args.server, args.uuid, args.tsv_instance, body_id, args.fraction, args.max_vertices, args.rescale, args.format, output_path, output_dvid)