Ejemplos de read_csv_header en Python, ejemplos de neuclease.util.read_csv_header en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_util.py Proyecto: stuarteberg/pydvid

def test_read_csv_header_noheader():
    tmpfile = tempfile.NamedTemporaryFile(
        mode='w', suffix='.csv', prefix='test_read_csv_header_noheader')
    tmpfile.write(textwrap.dedent("""\
        0,1,2
        3,4,5
    """))
    tmpfile.flush()
    assert read_csv_header(tmpfile.name) is None

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_util.py Proyecto: stuarteberg/pydvid

def test_read_csv_header_singlecol():
    tmpfile = tempfile.NamedTemporaryFile(
        mode='w', suffix='.csv', prefix='test_read_csv_header_singlecol')
    tmpfile.write(textwrap.dedent("""\
        a
        0
        3
    """))
    tmpfile.flush()
    assert read_csv_header(tmpfile.name) == ['a']

Ejemplo n.º 3

0

Mostrar archivo

 def _sanitize_config(self):
     """
     - Normalize/overwrite certain config values
     - Check for config mistakes
     - Simple sanity checks
     """
     # Convert input/output CSV to absolute paths
     options = self.config["samplepoints"]
     header = read_csv_header(options["input-table"])
     if header is None:
         raise RuntimeError(f"Input table does not have a header row: {options['input-table']}")
     
     if set('zyx') - set(header):
         raise RuntimeError(f"Input table does not have the expected column names: {options['input-table']}")

Ejemplo n.º 4

0

Mostrar archivo

Archivo: SamplePoints.py Proyecto: janelia-flyem/DVIDSparkServices

 def _sanitize_config(self):
     """
     - Normalize/overwrite certain config values
     - Check for config mistakes
     - Simple sanity checks
     """
     # Convert input/output CSV to absolute paths
     options = self.config_data["options"]
     options["input-table"] = self.relpath_to_abspath(options["input-table"])
     options["output-table"] = self.relpath_to_abspath(options["output-table"])
     
     header = read_csv_header(options["input-table"])
     if header is None:
         raise RuntimeError(f"Input table does not have a header row: {options['input-table']}")
     
     if set('zyx') - set(header):
         raise RuntimeError(f"Input table does not have the expected column names: {options['input-table']}")

Ejemplo n.º 5

0

Mostrar archivo

def load_body_list(config_data, is_supervoxels):
    if isinstance(config_data, list):
        return np.array(config_data, dtype=np.uint64)

    bodies_csv = config_data
    del config_data

    assert os.path.exists(bodies_csv), \
        f"CSV file does not exist: {bodies_csv}"

    if is_supervoxels:
        col = 'sv'
    else:
        col = 'body'

    if col in read_csv_header(bodies_csv):
        bodies = pd.read_csv(bodies_csv)[col].drop_duplicates()
    else:
        # Just read the first column, no matter what it's named
        logger.warning(
            f"No column named {col}, so reading first column instead")
        bodies = read_csv_col(bodies_csv, 0, np.uint64).drop_duplicates()

    return bodies.values.astype(np.uint64)

Ejemplo n.º 6

0

Mostrar archivo

def main():
    configure_default_logging()

    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument(
        '--use-mapping',
        action='store_true',
        help=
        'Use in-memory map + /exists instead of /missing, as described in the general help text above.'
    )
    parser.add_argument(
        '--output',
        '-o',
        default='missing-from-tsv.csv',
        help='Where to write the output CSV (default: missing-from-tsv.csv)')

    parser.add_argument(
        '--kafka-timestamp',
        '-k',
        type=str,
        help='Alternative to providing your own bodies list.\n'
        'Use the kafka log automatically determine the list of bodies that have changed after the given timestamp.\n'
        'Examples: -k="2018-11-22" -k="2018-11-22 17:34:00"')

    parser.add_argument('server', help='dvid server, e.g. emdata3:8900')
    parser.add_argument(
        'uuid',
        help=
        'dvid node to analyze or "master" for the latest master branch uuid')
    parser.add_argument(
        'tsv_instance',
        help="Name of a tarsupervoxels instance, e.g. segmentation_sv_meshes.\n"
        "Must be sync'd to a labelmap (segmentation) instance.")
    parser.add_argument(
        'bodies_csv',
        nargs='?',
        help='CSV containing a column named "body", which will be read.\n'
        'If no "body" column exists, the first column is used, regardless of the name.\n'
        '(Omit this arg if you are using --kafka-timestamp)')
    args = parser.parse_args()

    if not (bool(args.kafka_timestamp) ^ bool(args.bodies_csv)):
        print(
            "You must provide either --kafka-timestamp or a bodies list (not both)",
            file=sys.stderr)
        sys.exit(1)

    if args.uuid == "master":
        args.uuid = find_master(args.server)

    # Determine segmentation instance
    info = fetch_instance_info(args.server, args.uuid, args.tsv_instance)
    seg_instance = info["Base"]["Syncs"][0]

    kafka_msgs = None
    if args.bodies_csv:
        if 'body' in read_csv_header(args.bodies_csv):
            bodies = pd.read_csv(args.bodies_csv)['body'].drop_duplicates()
        else:
            # Just read the first column, no matter what it's named
            bodies = read_csv_col(args.bodies_csv, 0,
                                  np.uint64).drop_duplicates()
    elif args.kafka_timestamp:
        # Validate timestamp format before fetching kafka log, which takes a while.
        parse_timestamp(args.kafka_timestamp)

        kafka_msgs = read_kafka_messages(args.server, args.uuid, seg_instance)
        filtered_kafka_msgs = filter_kafka_msgs_by_timerange(
            kafka_msgs, min_timestamp=args.kafka_timestamp)

        new_bodies, changed_bodies, _removed_bodies, new_supervoxels, _deleted_svs = compute_affected_bodies(
            filtered_kafka_msgs)
        sv_split_bodies = set(
            fetch_mapping(args.server, args.uuid, seg_instance,
                          new_supervoxels)) - set([0])

        bodies = set(chain(new_bodies, changed_bodies, sv_split_bodies))
        bodies = np.fromiter(bodies, np.uint64)
        bodies.sort()
    else:
        raise AssertionError("Shouldn't get here.")

    if args.use_mapping:
        missing_entries = check_tarsupervoxels_status_via_exists(
            args.server,
            args.uuid,
            args.tsv_instance,
            bodies,
            seg_instance,
            kafka_msgs=kafka_msgs)
    else:
        missing_entries = check_tarsupervoxels_status_via_missing(
            args.server, args.uuid, args.tsv_instance, bodies)

    logger.info(f"Writing to {args.output}")
    missing_entries.to_csv(args.output, index=True, header=True)
    logging.info("DONE")

Ejemplo n.º 7

0

Mostrar archivo

Archivo: decimate_existing_mesh.py Proyecto: y2mk1ng/neuclease

def main():
    configure_default_logging()
    
    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--fraction', type=float,
                        help='Fraction of vertices to retain in the decimated mesh.  Between 0.0 and 1.0')
    parser.add_argument('--max-vertices', type=float, default=1e9,
                        help='If necessary, decimate the mesh even further so that it has no more than this vertex count (approximately).')
    parser.add_argument('--format',
                        help='Either obj or drc', required=True)
    parser.add_argument('--rescale', type=float,
                        help='Multiply all vertex coordinates by this factor before storing the mesh. Important for writing to ngmesh format.')
    parser.add_argument('--output-directory', '-d',
                        help='Directory to dump decimated meshes.')
    parser.add_argument('--output-url', '-u',
                        help='DVID keyvalue instance to write decimated mesh files to, '
                        'specified as a complete URL, e.g. http://emdata1:8000/api/node/123abc/my-meshes')
    parser.add_argument('server', help='dvid server, e.g. emdata3:8900')
    parser.add_argument('uuid', help='dvid node')
    parser.add_argument('tsv_instance', help='name of a tarsupervoxels instance, e.g. segmentation_sv_meshes')    
    parser.add_argument('bodies', nargs='+',
                        help='A list of body IDs OR a path to a CSV containing a column named "body", which will be read.\n'
                             'If no "body" column exists, the first column is used, regardless of the name.')

    args = parser.parse_args()

    if args.fraction is None:
        raise RuntimeError("Please specify a decimation fraction.")

    if args.format is None:
        raise RuntimeError("Please specify an output format (either 'drc' or 'obj' via --format")

    if args.output_directory:
        os.makedirs(args.output_directory, exist_ok=True)

    if args.format == "ngmesh" and args.rescale is None:
        raise RuntimeError("When writing to ngmesh, please specify an explict rescale factor.")

    args.rescale = args.rescale or 1.0

    output_dvid = None    
    if args.output_url:
        if '/api/node' not in args.output_url:
            raise RuntimeError("Please specify the output instance as a complete URL, "
                               "e.g. http://emdata1:8000/api/node/123abc/my-meshes")
        
        # drop 'http://' (if present)
        url = args.output_url.split('://')[-1]
        parts = url.split('/')
        assert parts[1] == 'api'
        assert parts[2] == 'node'
        
        output_server = parts[0]
        output_uuid = parts[3]
        output_instance = parts[4]
        
        output_dvid = (output_server, output_uuid, output_instance)


    all_bodies = []
    for body in args.bodies:
        if body.endswith('.csv'):
            if 'body' in read_csv_header(body):
                bodies = pd.read_csv(body)['body'].drop_duplicates()
            else:
                # Just read the first column, no matter what it's named
                bodies = read_csv_col(body, 0, np.uint64).drop_duplicates()
        else:
            try:
                body = int(body)
            except ValueError:
                raise RuntimeError(f"Invalid body ID: '{body}'")
        
        all_bodies.extend(bodies)

    for body_id in tqdm_proxy(all_bodies):
        output_path = None
        if args.output_directory:
            output_path = f'{args.output_directory}/{body_id}.{args.format}'

        decimate_existing_mesh(args.server, args.uuid, args.tsv_instance, body_id, args.fraction, args.max_vertices, args.rescale, args.format, output_path, output_dvid)