def test_fetch_complete_mappings(labelmap_setup): """ Very BASIC test for fetch_complete_mappings(). Does not verify features related to split supervoxels """ dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup instance_info = DvidInstanceInfo(dvid_server, dvid_repo, 'segmentation') mapping = fetch_complete_mappings(*instance_info, kafka_msgs=[]) assert isinstance(mapping, pd.Series) assert mapping.index.name == 'sv' assert mapping.name == 'body' assert (sorted(mapping.index) == [1, 2, 3, 4, 5]) assert (mapping == 1).all() # see initialization in conftest.py
def main_impl(args): # Read agglomeration file segment_to_body_df = None if args.agglomeration_mapping: with Timer("Loading agglomeration mapping", logger): if args.agglomeration_mapping.endswith('.csv'): mapping_pairs = load_edge_csv(args.agglomeration_mapping) segment_to_body_df = pd.DataFrame(mapping_pairs, columns=AGGLO_MAP_COLUMNS) elif args.agglomeration_mapping.endswith('.npy'): mapping_pairs = np.load(args.agglomeration_mapping) # Accept either a (N,2) array or an (N,) record array if mapping_pairs.ndim == 2 and mapping_pairs.shape[1] == 2: segment_to_body_df = pd.DataFrame(mapping_pairs, columns=AGGLO_MAP_COLUMNS) elif mapping_pairs.ndim == 1: segment_to_body_df = pd.DataFrame(mapping_pairs) assert segment_to_body_df.columns.tolist() == AGGLO_MAP_COLUMNS, \ f"mapping given in {args.agglomeration_mapping} has the wrong column names." else: raise RuntimeError(f"Did not understand mapping file: {args.agglomeration_mapping}") else: if set(args.agglomeration_mapping) - set('0123456789abcdef'): raise RuntimeError(f"Your agglomeration mapping is neither a CSV file nor a UUID: {args.agglomeration_mapping}") mapping_uuid = args.agglomeration_mapping logger.info(f"Loading agglomeration mapping from UUID {mapping_uuid}") mapping_series = fetch_complete_mappings(args.server, mapping_uuid, args.labelmap_instance) segment_to_body_df = pd.DataFrame( {'segment_id': mapping_series.index.values} ) segment_to_body_df['body_id'] = mapping_series.values assert (segment_to_body_df.columns == AGGLO_MAP_COLUMNS).all() subset_labels = None if args.subset_labels: is_supervoxels = (args.agglomeration_mapping is None) subset_labels = load_body_list(args.subset_labels, is_supervoxels) subset_labels = set(subset_labels) if args.last_mutid is None: args.last_mutid = fetch_repo_info(args.server, args.uuid)['MutationID'] # Upload label indexes if args.operation in ('indexes', 'both', 'sort-only'): if not args.supervoxel_block_stats_h5: raise RuntimeError("You must provide a supervoxel_block_stats_h5 file if you want to ingest LabelIndexes") # Read block stats file block_sv_stats, presorted_by, agglomeration_path = load_stats_h5_to_records(args.supervoxel_block_stats_h5) stats_are_presorted = False if args.agglomeration_mapping: if (presorted_by == 'body_id') and (agglomeration_path == args.agglomeration_mapping): stats_are_presorted = True elif presorted_by == 'segment_id': stats_are_presorted = True if stats_are_presorted: logger.info("Stats are pre-sorted") else: output_dir, basename = os.path.split(os.path.abspath(args.supervoxel_block_stats_h5)) if segment_to_body_df is None: output_path = output_dir + '/sorted-by-segment-' + basename else: output_path = output_dir + '/sorted-by-body-' + basename sort_block_stats(block_sv_stats, segment_to_body_df, output_path, args.agglomeration_mapping) if args.operation == 'sort-only': return with Timer(f"Grouping {len(block_sv_stats)} blockwise supervoxel counts and loading LabelIndices", logger): ingest_label_indexes( args.server, args.uuid, args.labelmap_instance, args.last_mutid, block_sv_stats, subset_labels, args.tombstones, batch_rows=args.batch_size, num_threads=args.num_threads, check_mismatches=args.check_mismatches ) # Upload mappings if args.operation in ('mappings', 'both'): if not args.agglomeration_mapping: raise RuntimeError("Can't load mappings without an agglomeration-mapping file.") with Timer(f"Loading mapping ops", logger): ingest_mapping( args.server, args.uuid, args.labelmap_instance, args.last_mutid, segment_to_body_df, subset_labels, args.batch_size )
def main_impl(args): # Read agglomeration file segment_to_body_df = None if args.agglomeration_mapping: with Timer("Loading agglomeration mapping", logger): if args.agglomeration_mapping.endswith('.csv'): mapping_pairs = load_edge_csv(args.agglomeration_mapping) segment_to_body_df = pd.DataFrame(mapping_pairs, columns=AGGLO_MAP_COLUMNS) else: if set(args.agglomeration_mapping) - set('0123456789abcdef'): raise RuntimeError(f"Your agglomeration mapping is neither a CSV file nor a UUID: {args.agglomeration_mapping}") mapping_uuid = args.agglomeration_mapping logger.info(f"Loading agglomeration mapping from UUID {mapping_uuid}") mapping_series = fetch_complete_mappings(args.server, mapping_uuid, args.labelmap_instance) segment_to_body_df = pd.DataFrame( {'segment_id': mapping_series.index.values} ) segment_to_body_df['body_id'] = mapping_series.values assert (segment_to_body_df.columns == AGGLO_MAP_COLUMNS).all() if args.last_mutid is None: args.last_mutid = fetch_repo_info(args.server, args.uuid)['MutationID'] # Upload label indexes if args.operation in ('indexes', 'both', 'sort-only'): if not args.supervoxel_block_stats_h5: raise RuntimeError("You must provide a supervoxel_block_stats_h5 file if you want to ingest LabelIndexes") # Read block stats file block_sv_stats, presorted_by, agglomeration_path = load_stats_h5_to_records(args.supervoxel_block_stats_h5) stats_are_presorted = False if args.agglomeration_mapping: if (presorted_by == 'body_id') and (agglomeration_path == args.agglomeration_mapping): stats_are_presorted = True elif presorted_by == 'segment_id': stats_are_presorted = True if stats_are_presorted: logger.info("Stats are pre-sorted") else: output_dir, basename = os.path.split(os.path.abspath(args.supervoxel_block_stats_h5)) if segment_to_body_df is None: output_path = output_dir + '/sorted-by-segment-' + basename else: output_path = output_dir + '/sorted-by-body-' + basename sort_block_stats(block_sv_stats, segment_to_body_df, output_path, args.agglomeration_mapping) if args.operation == 'sort-only': return with Timer(f"Grouping {len(block_sv_stats)} blockwise supervoxel counts and loading LabelIndices", logger): ingest_label_indexes( args.server, args.uuid, args.labelmap_instance, args.last_mutid, block_sv_stats, args.tombstones, batch_rows=args.batch_size, num_threads=args.num_threads, show_progress_bar=not args.no_progress_bar, check_mismatches=args.check_mismatches ) # Upload mappings if args.operation in ('mappings', 'both'): if not args.agglomeration_mapping: raise RuntimeError("Can't load mappings without an agglomeration-mapping file.") with Timer(f"Loading mapping ops", logger): ingest_mapping( args.server, args.uuid, args.labelmap_instance, args.last_mutid, segment_to_body_df, args.batch_size, show_progress_bar=not args.no_progress_bar )