Python read_csv_colの例、neuclease.util.read_csv_col Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_util.py プロジェクト: stuarteberg/pydvid

def test_read_csv_col_noheader():
    tmpfile = tempfile.NamedTemporaryFile(mode='w',
                                          suffix='.csv',
                                          prefix='test_read_csv_col')
    tmpfile.write(textwrap.dedent("""\
        0,1,2
        3,4,5
    """))
    tmpfile.flush()

    col0 = read_csv_col(tmpfile.name)
    assert (col0 == [0, 3]).all()
    assert col0.name is None

    col1 = read_csv_col(tmpfile.name, 1)
    assert (col1 == [1, 4]).all()
    assert col1.name is None

    col2 = read_csv_col(tmpfile.name, 2)
    assert (col2 == [2, 5]).all()
    assert col2.name is None

コード例 #2

0

ファイルを表示

ファイル: copy_splits.py プロジェクト: y2mk1ng/neuclease

def main():
    handler = logging.StreamHandler(sys.stdout)
    logger.setLevel(logging.INFO)
    logging.getLogger().addHandler(handler)

    parser = argparse.ArgumentParser()
    parser.add_argument('--results-output-log',
                        '-o',
                        default='split-copy-results-log.csv')
    parser.add_argument('--src-supervoxels-csv', required=False)
    parser.add_argument('--src-supervoxels-from-kafka', action='store_true')
    parser.add_argument('src_server')
    parser.add_argument('src_uuid')
    parser.add_argument('src_labelmap_instance')
    parser.add_argument('dest_server')
    parser.add_argument('dest_uuid')
    parser.add_argument('dest_labelmap_instance')
    args = parser.parse_args()

    src_info = InstanceInfo(args.src_server, args.src_uuid,
                            args.src_labelmap_instance)
    dest_info = InstanceInfo(args.dest_server, args.dest_uuid,
                             args.dest_labelmap_instance)

    if not ((args.src_supervoxels_csv is not None)
            ^ args.src_supervoxels_from_kafka):
        print("You must select either CSV or Kafka (not both)",
              file=sys.stderr)
        sys.exit(1)

    if args.src_supervoxels_csv:
        src_supervoxels = read_csv_col(args.src_supervoxels_csv,
                                       col=0,
                                       dtype=np.uint64)
    else:
        src_supervoxels = read_src_supervoxels_from_kafka(src_info)

    if len(src_supervoxels) == 0:
        logger.error("Error: No source supervoxels provided!")
        sys.exit(1)

    copy_results = copy_splits(src_supervoxels, src_info, dest_info)
    df = pd.DataFrame(
        np.array(copy_results, dtype=np.uint64),
        columns=['src_sv', 'overwritten_sv', 'split_sv', 'remain_sv'])
    df.to_csv(args.results_output_log, index=False, header=True)
    print(f"Saved results log to {args.results_output_log}")

    logger.info("Done.")

コード例 #3

0

ファイルを表示

def load_body_list(config_data, is_supervoxels):
    if isinstance(config_data, list):
        return np.array(config_data, dtype=np.uint64)

    bodies_csv = config_data
    del config_data

    assert os.path.exists(bodies_csv), \
        f"CSV file does not exist: {bodies_csv}"

    if is_supervoxels:
        col = 'sv'
    else:
        col = 'body'

    if col in read_csv_header(bodies_csv):
        bodies = pd.read_csv(bodies_csv)[col].drop_duplicates()
    else:
        # Just read the first column, no matter what it's named
        logger.warning(
            f"No column named {col}, so reading first column instead")
        bodies = read_csv_col(bodies_csv, 0, np.uint64).drop_duplicates()

    return bodies.values.astype(np.uint64)

コード例 #4

0

ファイルを表示

def main():
    configure_default_logging()

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        '--use-mapping',
        action='store_true',
        help=
        'Use in-memory map + /exists instead of /missing, as described in the general help text above.'
    )
    parser.add_argument(
        '--output',
        '-o',
        default='missing-from-tsv.csv',
        help='Where to write the output CSV (default: missing-from-tsv.csv)')

    parser.add_argument(
        '--kafka-timestamp',
        '-k',
        type=str,
        help='Alternative to providing your own bodies list.\n'
        'Use the kafka log automatically determine the list of bodies that have changed after the given timestamp.\n'
        'Examples: -k="2018-11-22" -k="2018-11-22 17:34:00"')

    parser.add_argument('server', help='dvid server, e.g. emdata3:8900')
    parser.add_argument(
        'uuid',
        help=
        'dvid node to analyze or "master" for the latest master branch uuid')
    parser.add_argument(
        'tsv_instance',
        help="Name of a tarsupervoxels instance, e.g. segmentation_sv_meshes.\n"
        "Must be sync'd to a labelmap (segmentation) instance.")
    parser.add_argument(
        'bodies_csv',
        nargs='?',
        help='CSV containing a column named "body", which will be read.\n'
        'If no "body" column exists, the first column is used, regardless of the name.\n'
        '(Omit this arg if you are using --kafka-timestamp)')
    args = parser.parse_args()

    if not (bool(args.kafka_timestamp) ^ bool(args.bodies_csv)):
        print(
            "You must provide either --kafka-timestamp or a bodies list (not both)",
            file=sys.stderr)
        sys.exit(1)

    if args.uuid == "master":
        args.uuid = find_master(args.server)

    # Determine segmentation instance
    info = fetch_instance_info(args.server, args.uuid, args.tsv_instance)
    seg_instance = info["Base"]["Syncs"][0]

    kafka_msgs = None
    if args.bodies_csv:
        if 'body' in read_csv_header(args.bodies_csv):
            bodies = pd.read_csv(args.bodies_csv)['body'].drop_duplicates()
        else:
            # Just read the first column, no matter what it's named
            bodies = read_csv_col(args.bodies_csv, 0,
                                  np.uint64).drop_duplicates()
    elif args.kafka_timestamp:
        # Validate timestamp format before fetching kafka log, which takes a while.
        parse_timestamp(args.kafka_timestamp)

        kafka_msgs = read_kafka_messages(args.server, args.uuid, seg_instance)
        filtered_kafka_msgs = filter_kafka_msgs_by_timerange(
            kafka_msgs, min_timestamp=args.kafka_timestamp)

        new_bodies, changed_bodies, _removed_bodies, new_supervoxels, _deleted_svs = compute_affected_bodies(
            filtered_kafka_msgs)
        sv_split_bodies = set(
            fetch_mapping(args.server, args.uuid, seg_instance,
                          new_supervoxels)) - set([0])

        bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies))
        bodies = np.fromiter(bodies, np.uint64)
        bodies.sort()
    else:
        raise AssertionError("Shouldn't get here.")

    if args.use_mapping:
        missing_entries = check_tarsupervoxels_status_via_exists(
            args.server,
            args.uuid,
            args.tsv_instance,
            bodies,
            seg_instance,
            kafka_msgs=kafka_msgs)
    else:
        missing_entries = check_tarsupervoxels_status_via_missing(
            args.server, args.uuid, args.tsv_instance, bodies)

    logger.info(f"Writing to {args.output}")
    missing_entries.to_csv(args.output, index=True, header=True)
    logging.info("DONE")

コード例 #5

0

ファイルを表示

ファイル: decimate_existing_mesh.py プロジェクト: y2mk1ng/neuclease

def main():
    configure_default_logging()
    
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--fraction', type=float,
                        help='Fraction of vertices to retain in the decimated mesh.  Between 0.0 and 1.0')
    parser.add_argument('--max-vertices', type=float, default=1e9,
                        help='If necessary, decimate the mesh even further so that it has no more than this vertex count (approximately).')
    parser.add_argument('--format',
                        help='Either obj or drc', required=True)
    parser.add_argument('--rescale', type=float,
                        help='Multiply all vertex coordinates by this factor before storing the mesh. Important for writing to ngmesh format.')
    parser.add_argument('--output-directory', '-d',
                        help='Directory to dump decimated meshes.')
    parser.add_argument('--output-url', '-u',
                        help='DVID keyvalue instance to write decimated mesh files to, '
                        'specified as a complete URL, e.g. http://emdata1:8000/api/node/123abc/my-meshes')
    parser.add_argument('server', help='dvid server, e.g. emdata3:8900')
    parser.add_argument('uuid', help='dvid node')
    parser.add_argument('tsv_instance', help='name of a tarsupervoxels instance, e.g. segmentation_sv_meshes')    
    parser.add_argument('bodies', nargs='+',
                        help='A list of body IDs OR a path to a CSV containing a column named "body", which will be read.\n'
                             'If no "body" column exists, the first column is used, regardless of the name.')

    args = parser.parse_args()

    if args.fraction is None:
        raise RuntimeError("Please specify a decimation fraction.")

    if args.format is None:
        raise RuntimeError("Please specify an output format (either 'drc' or 'obj' via --format")

    if args.output_directory:
        os.makedirs(args.output_directory, exist_ok=True)

    if args.format == "ngmesh" and args.rescale is None:
        raise RuntimeError("When writing to ngmesh, please specify an explict rescale factor.")

    args.rescale = args.rescale or 1.0

    output_dvid = None    
    if args.output_url:
        if '/api/node' not in args.output_url:
            raise RuntimeError("Please specify the output instance as a complete URL, "
                               "e.g. http://emdata1:8000/api/node/123abc/my-meshes")
        
        # drop 'http://' (if present)
        url = args.output_url.split('://')[-1]
        parts = url.split('/')
        assert parts[1] == 'api'
        assert parts[2] == 'node'
        
        output_server = parts[0]
        output_uuid = parts[3]
        output_instance = parts[4]
        
        output_dvid = (output_server, output_uuid, output_instance)


    all_bodies = []
    for body in args.bodies:
        if body.endswith('.csv'):
            if 'body' in read_csv_header(body):
                bodies = pd.read_csv(body)['body'].drop_duplicates()
            else:
                # Just read the first column, no matter what it's named
                bodies = read_csv_col(body, 0, np.uint64).drop_duplicates()
        else:
            try:
                body = int(body)
            except ValueError:
                raise RuntimeError(f"Invalid body ID: '{body}'")
        
        all_bodies.extend(bodies)

    for body_id in tqdm_proxy(all_bodies):
        output_path = None
        if args.output_directory:
            output_path = f'{args.output_directory}/{body_id}.{args.format}'

        decimate_existing_mesh(args.server, args.uuid, args.tsv_instance, body_id, args.fraction, args.max_vertices, args.rescale, args.format, output_path, output_dvid)