Exemple #1
0
def test_labelindex(labelmap_setup):
    dvid_server, dvid_repo, _merge_table_path, _mapping_path, _supervoxel_vol = labelmap_setup

    # Need an unlocked node to test these posts
    uuid = post_branch(dvid_server, dvid_repo, 'test_labelindex',
                       'test_labelindex')
    instance_info = (dvid_server, uuid, 'segmentation-scratch')

    # Write some random data
    sv = 99
    vol = sv * np.random.randint(2, size=(128, 128, 128), dtype=np.uint64)
    offset = np.array((64, 64, 64))

    # DVID will generate the index.
    post_labelmap_voxels(*instance_info, offset, vol)

    # Compute labelindex table from scratch
    rows = []
    for block_coord in ndrange(offset, offset + vol.shape, (64, 64, 64)):
        block_coord = np.array(block_coord)
        block_box = np.array((block_coord, block_coord + 64))
        block = extract_subvol(vol, block_box - offset)

        count = (block == sv).sum()
        rows.append([*block_coord, sv, count])

    index_df = pd.DataFrame(rows, columns=['z', 'y', 'x', 'sv', 'count'])

    # Check DVID's generated labelindex table against expected
    labelindex_tuple = fetch_labelindex(*instance_info, sv, format='pandas')
    assert labelindex_tuple.label == sv

    labelindex_tuple.blocks.sort_values(['z', 'y', 'x', 'sv'], inplace=True)
    labelindex_tuple.blocks.reset_index(drop=True, inplace=True)
    assert (labelindex_tuple.blocks == index_df).all().all()

    # Check our protobuf against DVID's
    index_tuple = PandasLabelIndex(index_df, sv, 1,
                                   datetime.datetime.now().isoformat(),
                                   'someuser')
    labelindex = create_labelindex(index_tuple)

    # Since labelindex block entries are not required to be sorted,
    # dvid might return them in a different order.
    # Hence this comparison function which sorts them first.
    def compare_proto_blocks(left, right):
        left_blocks = sorted(left.blocks.items())
        right_blocks = sorted(right.blocks.items())
        return left_blocks == right_blocks

    dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf')
    assert compare_proto_blocks(labelindex, dvid_labelindex)

    # Check post/get roundtrip
    post_labelindex(*instance_info, sv, labelindex)
    dvid_labelindex = fetch_labelindex(*instance_info, sv, format='protobuf')
    assert compare_proto_blocks(labelindex, dvid_labelindex)
Exemple #2
0
def _repair_index(master_seg, body):
    pli = fetch_labelindex(*master_seg, body, format='pandas')

    # Just drop the blocks below coordinate 1024
    # (That's where the bad blocks were added, and
    # there isn't supposed to be segmentation in that region.)
    pli.blocks.query('z >= 1024 and y >= 1024 and x >= 1024', inplace=True)

    li = create_labelindex(pli)
    post_labelindex(*master_seg, pli.label, li)
def main():
    configure_default_logging()
    
    parser = argparse.ArgumentParser()
    parser.add_argument('server')
    parser.add_argument('uuid')
    parser.add_argument('instance')
    parser.add_argument('block_stats')
    args = parser.parse_args()
    
    seg_instance = (args.server, args.uuid, args.instance)
    
    from flyemflows.bin.ingest_label_indexes import load_stats_h5_to_records
    with Timer("Loading block stats", logger):
        (block_sv_stats, _presorted_by, _agglo_path) = load_stats_h5_to_records('block-statistics.h5')
        stats_df = pd.DataFrame(block_sv_stats)
        stats_df = stats_df[['z', 'y', 'x', 'segment_id', 'count']]
        stats_df = stats_df.rename(columns={'segment_id': 'sv'})
        
        # Keep only the new supervoxels.
        stats_df = stats_df.query('sv > @NEW_SV_THRESHOLD').copy()
    
    with Timer("Fetching old labelindex", logger):
        labelindex = fetch_labelindex(*seg_instance, 106979579, format='protobuf')

    with Timer("Extracting labelindex table", logger):
        old_df = convert_labelindex_to_pandas(labelindex).blocks

    with Timer("Patching labelindex table", logger):
        # Discard old supervoxel stats within patched area
        in_patch  = (old_df[['z', 'y', 'x']].values >= PATCH_BOX_ZYX[0]).all(axis=1)
        in_patch &= (old_df[['z', 'y', 'x']].values  < PATCH_BOX_ZYX[1]).all(axis=1)
        
        old_df['in_patch'] = in_patch
        unpatched_df = old_df.query('not (in_patch and sv == @FRANKENBODY_SV)').copy()
        del unpatched_df['in_patch']
        
        # Append new stats
        new_df = pd.concat((unpatched_df, stats_df), ignore_index=True)
        new_df = new_df.sort_values(['z', 'y', 'x', 'sv'])

        np.save('old_df.npy', old_df.to_records(index=False))
        np.save('new_df.npy', new_df.to_records(index=False))

        if old_df['count'].sum() != new_df['count'].sum():
            logger.warning("Old and new indexes do not have the same total counts.  See old_df.npy and new_df.npy")

    with Timer("Constructing new labelindex", logger):    
        last_mutid = fetch_repo_info(*seg_instance[:2])["MutationID"]
        mod_time = datetime.datetime.now().isoformat()
        new_li = PandasLabelIndex(new_df, FRANKENBODY_SV, last_mutid, mod_time, os.environ.get("USER", "unknown"))
        new_labelindex = create_labelindex(new_li)

    with Timer("Posting new labelindex", logger):
        post_labelindex(*seg_instance, FRANKENBODY_SV, new_labelindex)

    with Timer("Posting updated mapping", logger):
        new_mapping = pd.Series(FRANKENBODY_SV, index=new_df['sv'].unique(), dtype=np.uint64, name='body')
        post_mappings(*seg_instance, new_mapping, last_mutid)

    logger.info("DONE")
    def process_batch(self, batch_and_rowcount):
        """
        Given a batch of ERASED block stats, fetches the existing LabelIndex,
        subtracts the erased stats, and posts either an updated labelindex or
        a tombstone (if the body is completely erased).
        """
        next_stats_batch, next_stats_batch_total_rows = batch_and_rowcount

        batch_indexes = []
        missing_bodies = []
        unexpected_dfs = []
        all_deleted_svs = []
        for body_group in next_stats_batch:
            body_id = body_group[0]['body_id']

            try:
                old_index = fetch_labelindex(*self.src_info,
                                             body_id,
                                             format='pandas')
            except requests.RequestException as ex:
                missing_bodies.append(body_id)
                if not str(ex.response.status_code).startswith('4'):
                    logger.warning(
                        f"Failed to fetch LabelIndex for label: {body_id} due to error {ex.response.status_code}"
                    )
                continue

            old_df = old_index.blocks
            erased_df = pd.DataFrame(body_group).rename(
                columns={'segment_id': 'sv'})[['z', 'y', 'x', 'sv', 'count']]
            assert erased_df.columns.tolist() == old_df.columns.tolist()
            assert old_df.duplicated(['z', 'y', 'x', 'sv']).sum() == 0
            assert erased_df.duplicated(['z', 'y', 'x', 'sv']).sum() == 0

            # Find the rows that exist on the old side (or both)
            merged_df = old_df.merge(erased_df,
                                     'outer',
                                     on=['z', 'y', 'x', 'sv'],
                                     suffixes=['_old', '_erased'],
                                     indicator='side')
            merged_df['count_old'] = merged_df['count_old'].fillna(0).astype(
                np.uint32)
            merged_df['count_erased'] = merged_df['count_erased'].fillna(
                0).astype(np.uint32)

            # If some supervoxel was "erased" from a particular block and the original
            # labelindex didn't mention it, that's a sign of corruption.
            # Save it for subsequent analysis
            unexpected_df = merged_df.query('count_old == 0').copy()
            if len(unexpected_df) > 0:
                unexpected_df['body'] = body_id
                unexpected_dfs.append(unexpected_df)

            merged_df = merged_df.query('count_old > 0').copy()
            merged_df[
                'count'] = merged_df['count_old'] - merged_df['count_erased']

            new_df = merged_df[['z', 'y', 'x', 'sv', 'count']]
            new_df = new_df.query('count > 0').copy()

            deleted_svs = set(old_df['sv']) - set(new_df['sv'])
            if deleted_svs:
                deleted_svs = np.fromiter(deleted_svs, dtype=np.uint64)
                all_deleted_svs.append(deleted_svs)

            if len(new_df) == 0:
                # Nothing to keep. Make a tombstone.
                tombstone_index = LabelIndex()
                tombstone_index.label = body_id
                tombstone_index.last_mutid = self.last_mutid
                tombstone_index.last_mod_user = self.user
                tombstone_index.last_mod_time = self.mod_time
                batch_indexes.append(tombstone_index)
            else:
                pli = PandasLabelIndex(new_df, body_id, self.last_mutid,
                                       self.mod_time, self.user)
                new_labelindex = create_labelindex(pli)
                batch_indexes.append(new_labelindex)

        # Write entire batch to DVID
        post_labelindex_batch(*self.dest_info, batch_indexes)

        # Return missing body IDs and the set of unexpected rows
        if unexpected_dfs:
            unexpected_df = pd.concat(unexpected_dfs)
        else:
            unexpected_df = None

        if all_deleted_svs:
            all_deleted_svs = np.concatenate(all_deleted_svs)

        return next_stats_batch_total_rows, missing_bodies, unexpected_df, all_deleted_svs