def test_labelindex(labelmap_setup): dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup # Need an unlocked node to test these posts uuid = post_branch(dvid_server, dvid_repo, 'test_labelindex', 'test_labelindex') instance_info = (dvid_server, uuid, 'segmentation-scratch') # Write some random data sv = 99 vol = sv * np.random.randint(2, size=(128, 128, 128), dtype=np.uint64) offset = np.array((64, 64, 64)) # DVID will generate the index. post_labelmap_voxels(*instance_info, offset, vol) # Compute labelindex table from scratch rows = [] for block_coord in ndrange(offset, offset + vol.shape, (64, 64, 64)): block_coord = np.array(block_coord) block_box = np.array((block_coord, block_coord + 64)) block = extract_subvol(vol, block_box - offset) count = (block == sv).sum() rows.append([*block_coord, sv, count]) index_df = pd.DataFrame(rows, columns=['z', 'y', 'x', 'sv', 'count']) # Check DVID's generated labelindex table against expected labelindex_tuple = fetch_labelindex(*instance_info, sv, format='pandas') assert labelindex_tuple.label == sv labelindex_tuple.blocks.sort_values(['z', 'y', 'x', 'sv'], inplace=True) labelindex_tuple.blocks.reset_index(drop=True, inplace=True) assert (labelindex_tuple.blocks == index_df).all().all() # Check our protobuf against DVID's index_tuple = PandasLabelIndex(index_df, sv, 1, datetime.datetime.now().isoformat(), 'someuser') labelindex = create_labelindex(index_tuple) # Since labelindex block entries are not required to be sorted, # dvid might return them in a different order. # Hence this comparison function which sorts them first. def compare_proto_blocks(left, right): left_blocks = sorted(left.blocks.items()) right_blocks = sorted(right.blocks.items()) return left_blocks == right_blocks dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf') assert compare_proto_blocks(labelindex, dvid_labelindex) # Check post/get roundtrip post_labelindex(*instance_info, sv, labelindex) dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf') assert compare_proto_blocks(labelindex, dvid_labelindex)
def _repair_index(master_seg, body): pli = fetch_labelindex(*master_seg, body, format='pandas') # Just drop the blocks below coordinate 1024 # (That's where the bad blocks were added, and # there isn't supposed to be segmentation in that region.) pli.blocks.query('z >= 1024 and y >= 1024 and x >= 1024', inplace=True) li = create_labelindex(pli) post_labelindex(*master_seg, pli.label, li)
def main(): configure_default_logging() parser = argparse.ArgumentParser() parser.add_argument('server') parser.add_argument('uuid') parser.add_argument('instance') parser.add_argument('block_stats') args = parser.parse_args() seg_instance = (args.server, args.uuid, args.instance) from flyemflows.bin.ingest_label_indexes import load_stats_h5_to_records with Timer("Loading block stats", logger): (block_sv_stats, _presorted_by, _agglo_path) = load_stats_h5_to_records('block-statistics.h5') stats_df = pd.DataFrame(block_sv_stats) stats_df = stats_df[['z', 'y', 'x', 'segment_id', 'count']] stats_df = stats_df.rename(columns={'segment_id': 'sv'}) # Keep only the new supervoxels. stats_df = stats_df.query('sv > @NEW_SV_THRESHOLD').copy() with Timer("Fetching old labelindex", logger): labelindex = fetch_labelindex(*seg_instance, 106979579, format='protobuf') with Timer("Extracting labelindex table", logger): old_df = convert_labelindex_to_pandas(labelindex).blocks with Timer("Patching labelindex table", logger): # Discard old supervoxel stats within patched area in_patch = (old_df[['z', 'y', 'x']].values >= PATCH_BOX_ZYX[0]).all(axis=1) in_patch &= (old_df[['z', 'y', 'x']].values < PATCH_BOX_ZYX[1]).all(axis=1) old_df['in_patch'] = in_patch unpatched_df = old_df.query('not (in_patch and sv == @FRANKENBODY_SV)').copy() del unpatched_df['in_patch'] # Append new stats new_df = pd.concat((unpatched_df, stats_df), ignore_index=True) new_df = new_df.sort_values(['z', 'y', 'x', 'sv']) np.save('old_df.npy', old_df.to_records(index=False)) np.save('new_df.npy', new_df.to_records(index=False)) if old_df['count'].sum() != new_df['count'].sum(): logger.warning("Old and new indexes do not have the same total counts. See old_df.npy and new_df.npy") with Timer("Constructing new labelindex", logger): last_mutid = fetch_repo_info(*seg_instance[:2])["MutationID"] mod_time = datetime.datetime.now().isoformat() new_li = PandasLabelIndex(new_df, FRANKENBODY_SV, last_mutid, mod_time, os.environ.get("USER", "unknown")) new_labelindex = create_labelindex(new_li) with Timer("Posting new labelindex", logger): post_labelindex(*seg_instance, FRANKENBODY_SV, new_labelindex) with Timer("Posting updated mapping", logger): new_mapping = pd.Series(FRANKENBODY_SV, index=new_df['sv'].unique(), dtype=np.uint64, name='body') post_mappings(*seg_instance, new_mapping, last_mutid) logger.info("DONE")
def process_batch(self, batch_and_rowcount): """ Given a batch of ERASED block stats, fetches the existing LabelIndex, subtracts the erased stats, and posts either an updated labelindex or a tombstone (if the body is completely erased). """ next_stats_batch, next_stats_batch_total_rows = batch_and_rowcount batch_indexes = [] missing_bodies = [] unexpected_dfs = [] all_deleted_svs = [] for body_group in next_stats_batch: body_id = body_group[0]['body_id'] try: old_index = fetch_labelindex(*self.src_info, body_id, format='pandas') except requests.RequestException as ex: missing_bodies.append(body_id) if not str(ex.response.status_code).startswith('4'): logger.warning( f"Failed to fetch LabelIndex for label: {body_id} due to error {ex.response.status_code}" ) continue old_df = old_index.blocks erased_df = pd.DataFrame(body_group).rename( columns={'segment_id': 'sv'})[['z', 'y', 'x', 'sv', 'count']] assert erased_df.columns.tolist() == old_df.columns.tolist() assert old_df.duplicated(['z', 'y', 'x', 'sv']).sum() == 0 assert erased_df.duplicated(['z', 'y', 'x', 'sv']).sum() == 0 # Find the rows that exist on the old side (or both) merged_df = old_df.merge(erased_df, 'outer', on=['z', 'y', 'x', 'sv'], suffixes=['_old', '_erased'], indicator='side') merged_df['count_old'] = merged_df['count_old'].fillna(0).astype( np.uint32) merged_df['count_erased'] = merged_df['count_erased'].fillna( 0).astype(np.uint32) # If some supervoxel was "erased" from a particular block and the original # labelindex didn't mention it, that's a sign of corruption. # Save it for subsequent analysis unexpected_df = merged_df.query('count_old == 0').copy() if len(unexpected_df) > 0: unexpected_df['body'] = body_id unexpected_dfs.append(unexpected_df) merged_df = merged_df.query('count_old > 0').copy() merged_df[ 'count'] = merged_df['count_old'] - merged_df['count_erased'] new_df = merged_df[['z', 'y', 'x', 'sv', 'count']] new_df = new_df.query('count > 0').copy() deleted_svs = set(old_df['sv']) - set(new_df['sv']) if deleted_svs: deleted_svs = np.fromiter(deleted_svs, dtype=np.uint64) all_deleted_svs.append(deleted_svs) if len(new_df) == 0: # Nothing to keep. Make a tombstone. tombstone_index = LabelIndex() tombstone_index.label = body_id tombstone_index.last_mutid = self.last_mutid tombstone_index.last_mod_user = self.user tombstone_index.last_mod_time = self.mod_time batch_indexes.append(tombstone_index) else: pli = PandasLabelIndex(new_df, body_id, self.last_mutid, self.mod_time, self.user) new_labelindex = create_labelindex(pli) batch_indexes.append(new_labelindex) # Write entire batch to DVID post_labelindex_batch(*self.dest_info, batch_indexes) # Return missing body IDs and the set of unexpected rows if unexpected_dfs: unexpected_df = pd.concat(unexpected_dfs) else: unexpected_df = None if all_deleted_svs: all_deleted_svs = np.concatenate(all_deleted_svs) return next_stats_batch_total_rows, missing_bodies, unexpected_df, all_deleted_svs