Esempio n. 1
0
def test_fetch_complete_mappings(labelmap_setup):
    """
    Very BASIC test for fetch_complete_mappings().
    Does not verify features related to split supervoxels
    """
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup
    instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation')

    mapping = fetch_complete_mappings(*instance_info, kafka_msgs=[])
    assert isinstance(mapping, pd.Series)
    assert mapping.index.name == 'sv'
    assert mapping.name == 'body'
    assert (sorted(mapping.index) == [1, 2, 3, 4, 5])
    assert (mapping == 1).all()  # see initialization in conftest.py
Esempio n. 2
0
def main_impl(args):
    # Read agglomeration file
    segment_to_body_df = None
    if args.agglomeration_mapping:
        with Timer("Loading agglomeration mapping", logger):
            if args.agglomeration_mapping.endswith('.csv'):
                mapping_pairs = load_edge_csv(args.agglomeration_mapping)
                segment_to_body_df = pd.DataFrame(mapping_pairs, columns=AGGLO_MAP_COLUMNS)
            elif args.agglomeration_mapping.endswith('.npy'):
                mapping_pairs = np.load(args.agglomeration_mapping)
                # Accept either a (N,2) array or an (N,) record array
                if mapping_pairs.ndim == 2 and mapping_pairs.shape[1] == 2:
                    segment_to_body_df = pd.DataFrame(mapping_pairs, columns=AGGLO_MAP_COLUMNS)
                elif mapping_pairs.ndim == 1:
                    segment_to_body_df = pd.DataFrame(mapping_pairs)
                    assert segment_to_body_df.columns.tolist() == AGGLO_MAP_COLUMNS, \
                        f"mapping given in {args.agglomeration_mapping} has the wrong column names."
                else:
                    raise RuntimeError(f"Did not understand mapping file: {args.agglomeration_mapping}")
            else:
                if set(args.agglomeration_mapping) - set('0123456789abcdef'):
                    raise RuntimeError(f"Your agglomeration mapping is neither a CSV file nor a UUID: {args.agglomeration_mapping}")

                mapping_uuid = args.agglomeration_mapping
                logger.info(f"Loading agglomeration mapping from UUID {mapping_uuid}")
                mapping_series = fetch_complete_mappings(args.server, mapping_uuid, args.labelmap_instance)
                segment_to_body_df = pd.DataFrame( {'segment_id': mapping_series.index.values} )
                segment_to_body_df['body_id'] = mapping_series.values
                assert (segment_to_body_df.columns == AGGLO_MAP_COLUMNS).all()

    subset_labels = None
    if args.subset_labels:
        is_supervoxels = (args.agglomeration_mapping is None)
        subset_labels = load_body_list(args.subset_labels, is_supervoxels)
        subset_labels = set(subset_labels)

    if args.last_mutid is None:
        args.last_mutid = fetch_repo_info(args.server, args.uuid)['MutationID']

    # Upload label indexes
    if args.operation in ('indexes', 'both', 'sort-only'):
        if not args.supervoxel_block_stats_h5:
            raise RuntimeError("You must provide a supervoxel_block_stats_h5 file if you want to ingest LabelIndexes")

        # Read block stats file
        block_sv_stats, presorted_by, agglomeration_path = load_stats_h5_to_records(args.supervoxel_block_stats_h5)
        
        stats_are_presorted = False
        if args.agglomeration_mapping:
            if (presorted_by == 'body_id') and (agglomeration_path == args.agglomeration_mapping):
                stats_are_presorted = True
        elif presorted_by == 'segment_id':
            stats_are_presorted = True
        
        if stats_are_presorted:
            logger.info("Stats are pre-sorted")
        else:
            output_dir, basename = os.path.split(os.path.abspath(args.supervoxel_block_stats_h5))
            if segment_to_body_df is None:
                output_path = output_dir + '/sorted-by-segment-' + basename
            else:
                output_path = output_dir + '/sorted-by-body-' +  basename
            sort_block_stats(block_sv_stats, segment_to_body_df, output_path, args.agglomeration_mapping)
    
        if args.operation == 'sort-only':
            return

        with Timer(f"Grouping {len(block_sv_stats)} blockwise supervoxel counts and loading LabelIndices", logger):
            ingest_label_indexes( args.server,
                                  args.uuid,
                                  args.labelmap_instance,
                                  args.last_mutid,
                                  block_sv_stats,
                                  subset_labels,
                                  args.tombstones,
                                  batch_rows=args.batch_size,
                                  num_threads=args.num_threads,
                                  check_mismatches=args.check_mismatches )

    # Upload mappings
    if args.operation in ('mappings', 'both'):
        if not args.agglomeration_mapping:
            raise RuntimeError("Can't load mappings without an agglomeration-mapping file.")
        
        with Timer(f"Loading mapping ops", logger):
            ingest_mapping( args.server,
                            args.uuid,
                            args.labelmap_instance,
                            args.last_mutid,
                            segment_to_body_df,
                            subset_labels,
                            args.batch_size )
def main_impl(args):
    # Read agglomeration file
    segment_to_body_df = None
    if args.agglomeration_mapping:
        with Timer("Loading agglomeration mapping", logger):
            if args.agglomeration_mapping.endswith('.csv'):
                mapping_pairs = load_edge_csv(args.agglomeration_mapping)
                segment_to_body_df = pd.DataFrame(mapping_pairs, columns=AGGLO_MAP_COLUMNS)
            else:
                if set(args.agglomeration_mapping) - set('0123456789abcdef'):
                    raise RuntimeError(f"Your agglomeration mapping is neither a CSV file nor a UUID: {args.agglomeration_mapping}")

                mapping_uuid = args.agglomeration_mapping
                logger.info(f"Loading agglomeration mapping from UUID {mapping_uuid}")
                mapping_series = fetch_complete_mappings(args.server, mapping_uuid, args.labelmap_instance)
                segment_to_body_df = pd.DataFrame( {'segment_id': mapping_series.index.values} )
                segment_to_body_df['body_id'] = mapping_series.values
                assert (segment_to_body_df.columns == AGGLO_MAP_COLUMNS).all()

    if args.last_mutid is None:
        args.last_mutid = fetch_repo_info(args.server, args.uuid)['MutationID']

    # Upload label indexes
    if args.operation in ('indexes', 'both', 'sort-only'):
        if not args.supervoxel_block_stats_h5:
            raise RuntimeError("You must provide a supervoxel_block_stats_h5 file if you want to ingest LabelIndexes")

        # Read block stats file
        block_sv_stats, presorted_by, agglomeration_path = load_stats_h5_to_records(args.supervoxel_block_stats_h5)
        
        stats_are_presorted = False
        if args.agglomeration_mapping:
            if (presorted_by == 'body_id') and (agglomeration_path == args.agglomeration_mapping):
                stats_are_presorted = True
        elif presorted_by == 'segment_id':
            stats_are_presorted = True
        
        if stats_are_presorted:
            logger.info("Stats are pre-sorted")
        else:
            output_dir, basename = os.path.split(os.path.abspath(args.supervoxel_block_stats_h5))
            if segment_to_body_df is None:
                output_path = output_dir + '/sorted-by-segment-' + basename
            else:
                output_path = output_dir + '/sorted-by-body-' +  basename
            sort_block_stats(block_sv_stats, segment_to_body_df, output_path, args.agglomeration_mapping)
    
        if args.operation == 'sort-only':
            return

        with Timer(f"Grouping {len(block_sv_stats)} blockwise supervoxel counts and loading LabelIndices", logger):
            ingest_label_indexes( args.server,
                                  args.uuid,
                                  args.labelmap_instance,
                                  args.last_mutid,
                                  block_sv_stats,
                                  args.tombstones,
                                  batch_rows=args.batch_size,
                                  num_threads=args.num_threads,
                                  show_progress_bar=not args.no_progress_bar,
                                  check_mismatches=args.check_mismatches )

    # Upload mappings
    if args.operation in ('mappings', 'both'):
        if not args.agglomeration_mapping:
            raise RuntimeError("Can't load mappings without an agglomeration-mapping file.")
        
        with Timer(f"Loading mapping ops", logger):
            ingest_mapping( args.server,
                            args.uuid,
                            args.labelmap_instance,
                            args.last_mutid,
                            segment_to_body_df,
                            args.batch_size,
                            show_progress_bar=not args.no_progress_bar )