Example #1
0
    def init_support(indexer,
                     aid_list,
                     vecs_list,
                     fgws_list,
                     fxs_list,
                     verbose=True):
        r"""
        prepares inverted indicies and FLANN data structure

        flattens vecs_list and builds a reverse index from the flattened indices
        (idx) to the original aids and fxs
        """
        assert indexer.flann is None, 'already initalized'

        logger.info('[nnindex] Preparing data for indexing / loading index')
        # Check input
        assert len(aid_list) == len(vecs_list), 'invalid input. bad len'
        assert len(aid_list) > 0, ('len(aid_list) == 0.'
                                   'Cannot invert index without features!')
        # Create indexes into the input aids
        ax_list = np.arange(len(aid_list))

        # Invert indicies
        tup = invert_index(vecs_list,
                           fgws_list,
                           ax_list,
                           fxs_list,
                           verbose=verbose)
        idx2_vec, idx2_fgw, idx2_ax, idx2_fx = tup

        ax2_aid = np.array(aid_list)

        indexer.flann = pyflann.FLANN()  # Approximate search structure
        indexer.ax2_aid = ax2_aid  # (A x 1) Mapping to original annot ids
        indexer.idx2_vec = idx2_vec  # (M x D) Descriptors to index
        indexer.idx2_fgw = idx2_fgw  # (M x 1) Descriptor forground weight
        indexer.idx2_ax = idx2_ax  # (M x 1) Index into the aid_list
        indexer.idx2_fx = idx2_fx  # (M x 1) Index into the annot's features
        indexer.aid2_ax = ut.make_index_lookup(indexer.ax2_aid)
        indexer.num_indexed = indexer.idx2_vec.shape[0]
        if indexer.idx2_vec.dtype == hstypes.VEC_TYPE:
            # these are sift descriptors
            indexer.max_distance_sqrd = hstypes.VEC_PSEUDO_MAX_DISTANCE_SQRD
        else:
            # FIXME: hacky way to support siam128 descriptors.
            # raise AssertionError(
            # 'NNindexer should get uint8s right now unless the algorithm has
            # changed')
            indexer.max_distance_sqrd = None
Example #2
0
    def load_or_build_flann(dstcnvs_normer, cachedir=None, verbose=True, *args, **kwargs):
        from vtool._pyflann_backend import pyflann as pyflann

        flann_fpath = dstcnvs_normer.get_flann_fpath(cachedir)
        if ut.checkpath(flann_fpath, verbose=ut.VERBOSE):
            try:
                dstcnvs_normer.flann = pyflann.FLANN()
                dstcnvs_normer.flann.load_index(flann_fpath, dstcnvs_normer.vecs)
                assert dstcnvs_normer.flann._FLANN__curindex is not None
                # load_success = True
            except Exception as ex:
                ut.printex(ex, '... cannot load distinctiveness flann', iswarning=True)
                dstcnvs_normer.rebuild(cachedir)
        else:
            dstcnvs_normer.ensure_flann(cachedir)
def pyflann_test_remove_add2():
    r"""
    CommandLine:
        python -m wbia.algo.hots._neighbor_experiment --exec-pyflann_test_remove_add2

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots._neighbor_experiment import *  # NOQA
        >>> pyflann_test_remove_add2()
    """
    from vtool._pyflann_backend import pyflann as pyflann
    import numpy as np

    rng = np.random.RandomState(0)
    vecs = (rng.rand(400, 128) * 255).astype(np.uint8)

    logger.info('Test initial save load')
    flann_params = {
        'random_seed': 42,
        'log_level': 'debug',
    }

    # pyflann.flann_ctypes.flannlib.flann_log_verbosity(4)

    logger.info('Test remove and then add THE SAME points')
    flann = pyflann.FLANN()
    flann.build_index(vecs, **flann_params)  # NOQA

    remove_idxs = np.arange(0, len(vecs), 2)
    flann.remove_points(remove_idxs)

    vecs2 = vecs[remove_idxs[0:100]]
    flann.add_points(vecs2)

    all_vecs = flann._get_stacked_data()
    idx_all, dist_all = flann.nn_index(all_vecs, 3)

    removed_idxs = flann.get_removed_ids()
    nonself_idxs = np.nonzero(np.arange(len(idx_all)) != idx_all.T[0])[0]
    assert np.all(nonself_idxs == removed_idxs)
    logger.info(
        'removed indexexes were only ones whos nearest neighbor was not self')
    assert np.all(
        idx_all.T[0][-len(vecs2):] == np.arange(len(vecs),
                                                len(vecs) + len(vecs2)))
    logger.info('added vecs correctly got their padded index')
    assert idx_all.T[0].max() == 499
def subindexer_time_experiment():
    """
    builds plot of number of annotations vs indexer build time.

    TODO: time experiment
    """
    import wbia
    import utool as ut
    from vtool._pyflann_backend import pyflann as pyflann
    import wbia.plottool as pt

    ibs = wbia.opendb(db='PZ_Master0')
    daid_list = ibs.get_valid_aids()
    count_list = []
    time_list = []
    flann_params = vt.get_flann_params()
    for count in ut.ProgressIter(range(1, 301)):
        daids_ = daid_list[:]
        np.random.shuffle(daids_)
        daids = daids_[0:count]
        vecs = np.vstack(ibs.get_annot_vecs(daids))
        with ut.Timer(verbose=False) as t:
            flann = pyflann.FLANN()
            flann.build_index(vecs, **flann_params)
        count_list.append(count)
        time_list.append(t.ellapsed)
    count_arr = np.array(count_list)
    time_arr = np.array(time_list)
    pt.plot2(
        count_arr,
        time_arr,
        marker='-',
        equal_aspect=False,
        x_label='num_annotations',
        y_label='FLANN build time',
    )
def pyflann_remove_and_save():
    """
    References:
        # Logic goes here
        ~/code/flann/src/cpp/flann/algorithms/kdtree_index.h

        ~/code/flann/src/cpp/flann/util/serialization.h
        ~/code/flann/src/cpp/flann/util/dynamic_bitset.h

        # Bindings go here
        ~/code/flann/src/cpp/flann/flann.cpp
        ~/code/flann/src/cpp/flann/flann.h

        # Contains stuff for the flann namespace like flann::log_level
        # Also has Index with
        # Matrix<ElementType> features; SEEMS USEFUL
        ~/code/flann/src/cpp/flann/flann.hpp


        # Wrappers go here
        ~/code/flann/src/python/pyflann/flann_ctypes.py
        ~/code/flann/src/python/pyflann/index.py

        ~/local/build_scripts/flannscripts/autogen_bindings.py

    Greping:
        cd ~/code/flann/src
        grep -ER cleanRemovedPoints *
        grep -ER removed_points_ *

    CommandLine:
        python -m wbia.algo.hots._neighbor_experiment --exec-pyflann_remove_and_save

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots._neighbor_experiment import *  # NOQA
        >>> pyflann_remove_and_save()
    """
    from vtool._pyflann_backend import pyflann as pyflann
    import numpy as np

    rng = np.random.RandomState(0)
    vecs = (rng.rand(400, 128) * 255).astype(np.uint8)
    vecs2 = (rng.rand(100, 128) * 255).astype(np.uint8)
    qvecs = (rng.rand(10, 128) * 255).astype(np.uint8)

    ut.delete('test1.flann')
    ut.delete('test2.flann')
    ut.delete('test3.flann')
    ut.delete('test4.flann')

    logger.info('\nTest initial save load')
    flann_params = {
        'random_seed': 42,
        # 'log_level': 'debug', 'info',
        # 'log_level': 4,
        'cores': 1,
        'log_level': 'debug',
    }

    # pyflann.flann_ctypes.flannlib.flann_log_verbosity(4)

    flann1 = pyflann.FLANN(**flann_params)
    params1 = flann1.build_index(vecs, **flann_params)  # NOQA
    idx1, dist = flann1.nn_index(qvecs, 3)
    flann1.save_index('test1.flann')

    flann1_ = pyflann.FLANN()
    flann1_.load_index('test1.flann', vecs)
    idx1_, dist = flann1.nn_index(qvecs, 3)
    assert np.all(idx1 == idx1_), 'initial save load fail'

    logger.info('\nTEST ADD SAVE LOAD')
    flann2 = flann1
    flann2.add_points(vecs2)
    idx2, dist = flann2.nn_index(qvecs, 3)
    assert np.any(idx2 != idx1), 'something should change'
    flann2.save_index('test2.flann')

    # Load saved data with added vecs
    tmp = flann2.get_indexed_data()
    vecs_combined = np.vstack([tmp[0]] + tmp[1])

    flann2_ = pyflann.FLANN()
    flann2_.load_index('test2.flann', vecs_combined)
    idx2_, dist = flann2_.nn_index(qvecs, 3)
    assert np.all(idx2_ == idx2), 'loading saved added data fails'

    # Load saved data with remoed vecs
    logger.info('\n\n---TEST REMOVE SAVE LOAD')
    flann1 = pyflann.FLANN()  # rebuild flann1
    _params1 = flann1.build_index(vecs, **flann_params)  # NOQA
    logger.info('\n * CHECK NN')
    _idx1, dist = flann1.nn_index(qvecs, 3)
    idx1 = _idx1

    logger.info('\n * REMOVE POINTS')
    remove_idx_list = np.unique(idx1.T[0][0:10])
    flann1.remove_points(remove_idx_list)
    flann3 = flann1
    logger.info('\n * CHECK NN')
    idx3, dist = flann3.nn_index(qvecs, 3)
    assert (len(np.intersect1d(
        idx3.ravel(), remove_idx_list)) == 0), 'points were not removed'
    logger.info('\n * SAVE')
    flann3.save_index('test3.flann')

    logger.info('\n\n---TEST LOAD SAVED INDEX 0 (with removed points)')
    clean_vecs = np.delete(vecs, remove_idx_list, axis=0)
    flann3.clean_removed_points()
    flann3.save_index('test4.flann')
    flann4 = pyflann.FLANN(**flann_params)
    # THIS CAUSES A SEGFAULT
    flann4.load_index('test4.flann', clean_vecs)
    idx4, dist = flann4.nn_index(qvecs, 3)
    assert np.all(idx4 == idx3), 'load failed'
    logger.info('\nloaded succesfully (WITHOUT THE BAD DATA)')

    logger.info('\n\n---TEST LOAD SAVED INDEX 1 (with removed points)')
    flann4 = pyflann.FLANN(**flann_params)
    flann4.load_index('test3.flann', vecs)
    idx4, dist = flann4.nn_index(qvecs, 3)
    assert np.all(idx4 == idx3), 'load failed'
    logger.info('\nloaded succesfully (BUT NEED TO MAINTAIN BAD DATA)')

    if False:
        logger.info('\n\n---TEST LOAD SAVED INDEX 2 (with removed points)')
        clean_vecs = np.delete(vecs, remove_idx_list, axis=0)
        flann4 = pyflann.FLANN(**flann_params)
        logger.info('\n * CALL LOAD')
        flann4.load_index('test3.flann', clean_vecs)
def trytest_multiple_add_removes():
    r"""
    CommandLine:
        python -m wbia.algo.hots._neighbor_experiment --exec-test_multiple_add_removes

    Example:
        >>> # DISABLE_DOCTEST
        >>> from wbia.algo.hots._neighbor_experiment import *  # NOQA
        >>> result = test_multiple_add_removes()
        >>> print(result)
    """
    from wbia.algo.hots.neighbor_index_cache import test_nnindexer

    K = 4
    nnindexer, qreq_, ibs = test_nnindexer('PZ_MTEST', use_memcache=False)

    assert len(nnindexer.get_removed_idxs()) == 0
    logger.info('\n\n --- got nnindex testdata --- ')
    logger.info('')

    @ut.tracefunc_xml
    def print_nnindexer(nnindexer):
        logger.info('nnindexer.get_indexed_aids() = %r' %
                    (nnindexer.get_indexed_aids(), ))
        logger.info('nnindexer.num_indexed_vecs() = %r' %
                    (nnindexer.num_indexed_vecs(), ))
        logger.info('nnindexer.get_removed_idxs().shape = %r' %
                    (nnindexer.get_removed_idxs().shape, ))

    logger.info('INITIALIZE TEST')
    print_nnindexer(nnindexer)

    config2_ = qreq_.get_internal_query_config2()
    qaid = 1
    qfx2_vec = ibs.get_annot_vecs(qaid, config2_=config2_)
    (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K)
    aids1 = set(nnindexer.get_nn_aids(qfx2_idx1).ravel())
    logger.info('aids1 = %r' % (aids1, ))

    logger.info('')
    logger.info('TESTING ADD')
    add_first_daids = [17, 22]
    nnindexer.add_wbia_support(qreq_, add_first_daids)
    print_nnindexer(nnindexer)
    (qfx2_idx0, qfx2_dist0) = nnindexer.knn(qfx2_vec, K)
    assert np.any(qfx2_idx0 != qfx2_idx1), 'some should change'
    aids0 = set(nnindexer.get_nn_aids(qfx2_idx0).ravel())
    logger.info('aids0 = %r' % (aids0, ))

    # execute test function
    logger.info('')
    logger.info('TESTING REMOVE')
    remove_daid_list = [8, 10, 11]
    nnindexer.remove_wbia_support(qreq_, remove_daid_list)
    print_nnindexer(nnindexer)
    # test after modification
    (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K)
    aids2 = set(nnindexer.get_nn_aids(qfx2_idx2).ravel())
    logger.info('aids2 = %r' % (aids2, ))
    assert len(aids2.intersection(remove_daid_list)) == 0

    __removed_ids = nnindexer.flann._FLANN__removed_ids
    invalid_idxs = nnindexer.get_removed_idxs()
    assert len(np.intersect1d(invalid_idxs,
                              __removed_ids)) == len(__removed_ids)

    logger.info('')
    logger.info('TESTING DUPLICATE REMOVE')
    nnindexer.remove_wbia_support(qreq_, remove_daid_list)
    print_nnindexer(nnindexer)
    # test after modification
    (qfx2_idx2_, qfx2_dist2_) = nnindexer.knn(qfx2_vec, K)
    assert np.all(qfx2_idx2_ == qfx2_idx2)
    assert np.all(qfx2_dist2_ == qfx2_dist2)

    logger.info('')
    logger.info('TESTING ADD AFTER REMOVE')
    # Is the error here happening because added points seem to
    # get the ids of the removed points?
    new_daid_list = [8, 10]
    nnindexer.add_wbia_support(qreq_, new_daid_list)
    print_nnindexer(nnindexer)
    # test after modification
    (qfx2_idx3, qfx2_dist3) = nnindexer.knn(qfx2_vec, K)
    qfx2_aid3 = nnindexer.get_nn_aids(qfx2_idx3)
    found_removed_idxs = np.intersect1d(qfx2_idx3,
                                        nnindexer.get_removed_idxs())
    if len(found_removed_idxs) != 0:
        logger.info('found_removed_idxs.max() = %r' %
                    (found_removed_idxs.max(), ))
        logger.info('found_removed_idxs.min() = %r' %
                    (found_removed_idxs.min(), ))
        raise AssertionError('found_removed_idxs.shape = %r' %
                             (found_removed_idxs.shape, ))
    aids3 = set(qfx2_aid3.ravel())
    assert aids3.intersection(remove_daid_list) == set(
        new_daid_list).intersection(remove_daid_list)

    logger.info('TESTING DUPLICATE ADD')
    new_daid_list = [8, 10]
    nnindexer.add_wbia_support(qreq_, new_daid_list)
    # test after modification
    print_nnindexer(nnindexer)
    (qfx2_idx3_, qfx2_dist3_) = nnindexer.knn(qfx2_vec, K)
    qfx2_aid3_ = nnindexer.get_nn_aids(qfx2_idx3_)
    assert np.all(qfx2_aid3 == qfx2_aid3_)

    logger.info('TESTING ADD QUERY TO DATABASE')
    add_daid_list1 = [qaid]
    nnindexer.add_wbia_support(qreq_, add_daid_list1)
    print_nnindexer(nnindexer)
    (qfx2_idx4_, qfx2_dist4_) = nnindexer.knn(qfx2_vec, K)
    qfx2_aid4_ = nnindexer.get_nn_aids(qfx2_idx4_)
    qfx2_fx4_ = nnindexer.get_nn_featxs(qfx2_idx4_)
    assert np.all(qfx2_aid4_.T[0] == qaid), 'should find self'
    assert ut.issorted(qfx2_fx4_.T[0]), 'should be in order'

    logger.info('TESTING REMOVE QUERY POINTS')
    add_daid_list1 = [qaid]
    nnindexer.remove_wbia_support(qreq_, add_daid_list1)
    print_nnindexer(nnindexer)
    (qfx2_idx5_, qfx2_dist5_) = nnindexer.knn(qfx2_vec, K)
    issame = qfx2_idx5_ == qfx2_idx3_
    percentsame = issame.sum() / issame.size
    logger.info('percentsame = %r' % (percentsame, ))
    assert (percentsame > 0.85
            ), 'a large majority of the feature idxs should remain the same'

    print_nnindexer(nnindexer)

    # Do this multiple times
    for _ in range(10):
        add_daid_list1 = [qaid]
        nnindexer.add_wbia_support(qreq_, add_daid_list1, verbose=False)
        nnindexer.remove_wbia_support(qreq_, add_daid_list1, verbose=False)
        (qfx2_idxX_, qfx2_distX_) = nnindexer.knn(qfx2_vec, K)
        issame = qfx2_idxX_ == qfx2_idx3_
        percentsame = issame.sum() / issame.size
        logger.info('percentsame = %r' % (percentsame, ))
        assert (
            percentsame > 0.85
        ), 'a large majority of the feature idxs should remain the same'

    # Test again with more data
    logger.info('testing remove query points with more data')
    nnindexer.add_wbia_support(qreq_, ibs.get_valid_aids())
    (qfx2_idx6_, qfx2_dist6_) = nnindexer.knn(qfx2_vec, K)
    qfx2_aid6_ = nnindexer.get_nn_aids(qfx2_idx6_)
    assert np.all(qfx2_aid6_.T[0] == qaid), 'should be same'

    nnindexer.remove_wbia_support(qreq_, add_daid_list1)
    print_nnindexer(nnindexer)
    (qfx2_idx7_, qfx2_dist6_) = nnindexer.knn(qfx2_vec, K)
    qfx2_aid7_ = nnindexer.get_nn_aids(qfx2_idx7_)
    assert np.all(qfx2_aid7_.T[0] != qaid), 'should not be same'

    # Do this multiple times
    for _ in range(10):
        add_daid_list1 = [qaid]
        nnindexer.add_wbia_support(qreq_, add_daid_list1, verbose=True)
        nnindexer.remove_wbia_support(qreq_, add_daid_list1, verbose=True)
        # weird that all seem to work here
        (qfx2_idxX_, qfx2_distX_) = nnindexer.knn(qfx2_vec, K)
        issame = qfx2_idxX_ == qfx2_idx7_
        percentsame = issame.sum() / issame.size
        logger.info('percentsame = %r' % (percentsame, ))
        print_nnindexer(nnindexer)
        assert (
            percentsame > 0.85
        ), 'a large majority of the feature idxs should remain the same'

    nnindexer, qreq_, ibs = test_nnindexer('PZ_MTEST', use_memcache=False)
    big_set = ibs.get_valid_aids()[5:]
    remove_later = big_set[10:14]
    nnindexer.add_wbia_support(qreq_, big_set)

    # Try again where remove is not the last operation
    logger.info('testing remove query points with more op')
    extra_data = np.setdiff1d(ibs.get_valid_aids()[0:5], add_daid_list1)
    nnindexer.remove_wbia_support(qreq_, extra_data)

    nnindexer.add_wbia_support(qreq_, add_daid_list1)
    nnindexer.add_wbia_support(qreq_, extra_data)

    (qfx2_idx8_, qfx2_dist8_) = nnindexer.knn(qfx2_vec, K)
    qfx2_aid8_ = nnindexer.get_nn_aids(qfx2_idx8_)
    assert np.all(qfx2_aid8_.T[0] == qaid), 'should be same'

    nnindexer.remove_wbia_support(qreq_, extra_data)
    (qfx2_idx9_, qfx2_dist9_) = nnindexer.knn(qfx2_vec, K)
    qfx2_aid9_ = nnindexer.get_nn_aids(qfx2_idx9_)
    assert np.all(qfx2_aid9_.T[0] == qaid), 'should be same'
    nnindexer.remove_wbia_support(qreq_, add_daid_list1)

    nnindexer.add_wbia_support(qreq_, add_daid_list1)
    nnindexer.add_wbia_support(qreq_, extra_data)
    nnindexer.remove_wbia_support(qreq_, remove_later)
    logger.info(nnindexer.ax2_aid)

    aid_list = nnindexer.get_indexed_aids()  # NOQA
    nnindexer.flann.save_index('test.flann')

    idx2_vec_masked = nnindexer.idx2_vec
    idx2_vec_compressed = nnindexer.get_indexed_vecs()

    from vtool._pyflann_backend import pyflann as pyflann

    flann1 = pyflann.FLANN()
    flann1.load_index('test.flann', idx2_vec_masked)

    from vtool._pyflann_backend import pyflann as pyflann

    flann2 = pyflann.FLANN()
    flann2.load_index('test.flann', idx2_vec_compressed)

    # NOW WE NEED TO TEST THAT WE CAN SAVE AND LOAD THIS DATA

    #
    # ax2_nvecs = ut.dict_take(ut.dict_hist(nnindexer.idx2_ax), range(len(nnindexer.ax2_aid)))
    pass
 def make_flann_index(vecs, flann_params):
     flann = pyflann.FLANN()
     flann.build_index(vecs, **flann_params)
     return flann