def init_support(indexer, aid_list, vecs_list, fgws_list, fxs_list, verbose=True): r""" prepares inverted indicies and FLANN data structure flattens vecs_list and builds a reverse index from the flattened indices (idx) to the original aids and fxs """ assert indexer.flann is None, 'already initalized' logger.info('[nnindex] Preparing data for indexing / loading index') # Check input assert len(aid_list) == len(vecs_list), 'invalid input. bad len' assert len(aid_list) > 0, ('len(aid_list) == 0.' 'Cannot invert index without features!') # Create indexes into the input aids ax_list = np.arange(len(aid_list)) # Invert indicies tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=verbose) idx2_vec, idx2_fgw, idx2_ax, idx2_fx = tup ax2_aid = np.array(aid_list) indexer.flann = pyflann.FLANN() # Approximate search structure indexer.ax2_aid = ax2_aid # (A x 1) Mapping to original annot ids indexer.idx2_vec = idx2_vec # (M x D) Descriptors to index indexer.idx2_fgw = idx2_fgw # (M x 1) Descriptor forground weight indexer.idx2_ax = idx2_ax # (M x 1) Index into the aid_list indexer.idx2_fx = idx2_fx # (M x 1) Index into the annot's features indexer.aid2_ax = ut.make_index_lookup(indexer.ax2_aid) indexer.num_indexed = indexer.idx2_vec.shape[0] if indexer.idx2_vec.dtype == hstypes.VEC_TYPE: # these are sift descriptors indexer.max_distance_sqrd = hstypes.VEC_PSEUDO_MAX_DISTANCE_SQRD else: # FIXME: hacky way to support siam128 descriptors. # raise AssertionError( # 'NNindexer should get uint8s right now unless the algorithm has # changed') indexer.max_distance_sqrd = None
def load_or_build_flann(dstcnvs_normer, cachedir=None, verbose=True, *args, **kwargs): from vtool._pyflann_backend import pyflann as pyflann flann_fpath = dstcnvs_normer.get_flann_fpath(cachedir) if ut.checkpath(flann_fpath, verbose=ut.VERBOSE): try: dstcnvs_normer.flann = pyflann.FLANN() dstcnvs_normer.flann.load_index(flann_fpath, dstcnvs_normer.vecs) assert dstcnvs_normer.flann._FLANN__curindex is not None # load_success = True except Exception as ex: ut.printex(ex, '... cannot load distinctiveness flann', iswarning=True) dstcnvs_normer.rebuild(cachedir) else: dstcnvs_normer.ensure_flann(cachedir)
def pyflann_test_remove_add2(): r""" CommandLine: python -m wbia.algo.hots._neighbor_experiment --exec-pyflann_test_remove_add2 Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots._neighbor_experiment import * # NOQA >>> pyflann_test_remove_add2() """ from vtool._pyflann_backend import pyflann as pyflann import numpy as np rng = np.random.RandomState(0) vecs = (rng.rand(400, 128) * 255).astype(np.uint8) logger.info('Test initial save load') flann_params = { 'random_seed': 42, 'log_level': 'debug', } # pyflann.flann_ctypes.flannlib.flann_log_verbosity(4) logger.info('Test remove and then add THE SAME points') flann = pyflann.FLANN() flann.build_index(vecs, **flann_params) # NOQA remove_idxs = np.arange(0, len(vecs), 2) flann.remove_points(remove_idxs) vecs2 = vecs[remove_idxs[0:100]] flann.add_points(vecs2) all_vecs = flann._get_stacked_data() idx_all, dist_all = flann.nn_index(all_vecs, 3) removed_idxs = flann.get_removed_ids() nonself_idxs = np.nonzero(np.arange(len(idx_all)) != idx_all.T[0])[0] assert np.all(nonself_idxs == removed_idxs) logger.info( 'removed indexexes were only ones whos nearest neighbor was not self') assert np.all( idx_all.T[0][-len(vecs2):] == np.arange(len(vecs), len(vecs) + len(vecs2))) logger.info('added vecs correctly got their padded index') assert idx_all.T[0].max() == 499
def subindexer_time_experiment(): """ builds plot of number of annotations vs indexer build time. TODO: time experiment """ import wbia import utool as ut from vtool._pyflann_backend import pyflann as pyflann import wbia.plottool as pt ibs = wbia.opendb(db='PZ_Master0') daid_list = ibs.get_valid_aids() count_list = [] time_list = [] flann_params = vt.get_flann_params() for count in ut.ProgressIter(range(1, 301)): daids_ = daid_list[:] np.random.shuffle(daids_) daids = daids_[0:count] vecs = np.vstack(ibs.get_annot_vecs(daids)) with ut.Timer(verbose=False) as t: flann = pyflann.FLANN() flann.build_index(vecs, **flann_params) count_list.append(count) time_list.append(t.ellapsed) count_arr = np.array(count_list) time_arr = np.array(time_list) pt.plot2( count_arr, time_arr, marker='-', equal_aspect=False, x_label='num_annotations', y_label='FLANN build time', )
def pyflann_remove_and_save(): """ References: # Logic goes here ~/code/flann/src/cpp/flann/algorithms/kdtree_index.h ~/code/flann/src/cpp/flann/util/serialization.h ~/code/flann/src/cpp/flann/util/dynamic_bitset.h # Bindings go here ~/code/flann/src/cpp/flann/flann.cpp ~/code/flann/src/cpp/flann/flann.h # Contains stuff for the flann namespace like flann::log_level # Also has Index with # Matrix<ElementType> features; SEEMS USEFUL ~/code/flann/src/cpp/flann/flann.hpp # Wrappers go here ~/code/flann/src/python/pyflann/flann_ctypes.py ~/code/flann/src/python/pyflann/index.py ~/local/build_scripts/flannscripts/autogen_bindings.py Greping: cd ~/code/flann/src grep -ER cleanRemovedPoints * grep -ER removed_points_ * CommandLine: python -m wbia.algo.hots._neighbor_experiment --exec-pyflann_remove_and_save Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots._neighbor_experiment import * # NOQA >>> pyflann_remove_and_save() """ from vtool._pyflann_backend import pyflann as pyflann import numpy as np rng = np.random.RandomState(0) vecs = (rng.rand(400, 128) * 255).astype(np.uint8) vecs2 = (rng.rand(100, 128) * 255).astype(np.uint8) qvecs = (rng.rand(10, 128) * 255).astype(np.uint8) ut.delete('test1.flann') ut.delete('test2.flann') ut.delete('test3.flann') ut.delete('test4.flann') logger.info('\nTest initial save load') flann_params = { 'random_seed': 42, # 'log_level': 'debug', 'info', # 'log_level': 4, 'cores': 1, 'log_level': 'debug', } # pyflann.flann_ctypes.flannlib.flann_log_verbosity(4) flann1 = pyflann.FLANN(**flann_params) params1 = flann1.build_index(vecs, **flann_params) # NOQA idx1, dist = flann1.nn_index(qvecs, 3) flann1.save_index('test1.flann') flann1_ = pyflann.FLANN() flann1_.load_index('test1.flann', vecs) idx1_, dist = flann1.nn_index(qvecs, 3) assert np.all(idx1 == idx1_), 'initial save load fail' logger.info('\nTEST ADD SAVE LOAD') flann2 = flann1 flann2.add_points(vecs2) idx2, dist = flann2.nn_index(qvecs, 3) assert np.any(idx2 != idx1), 'something should change' flann2.save_index('test2.flann') # Load saved data with added vecs tmp = flann2.get_indexed_data() vecs_combined = np.vstack([tmp[0]] + tmp[1]) flann2_ = pyflann.FLANN() flann2_.load_index('test2.flann', vecs_combined) idx2_, dist = flann2_.nn_index(qvecs, 3) assert np.all(idx2_ == idx2), 'loading saved added data fails' # Load saved data with remoed vecs logger.info('\n\n---TEST REMOVE SAVE LOAD') flann1 = pyflann.FLANN() # rebuild flann1 _params1 = flann1.build_index(vecs, **flann_params) # NOQA logger.info('\n * CHECK NN') _idx1, dist = flann1.nn_index(qvecs, 3) idx1 = _idx1 logger.info('\n * REMOVE POINTS') remove_idx_list = np.unique(idx1.T[0][0:10]) flann1.remove_points(remove_idx_list) flann3 = flann1 logger.info('\n * CHECK NN') idx3, dist = flann3.nn_index(qvecs, 3) assert (len(np.intersect1d( idx3.ravel(), remove_idx_list)) == 0), 'points were not removed' logger.info('\n * SAVE') flann3.save_index('test3.flann') logger.info('\n\n---TEST LOAD SAVED INDEX 0 (with removed points)') clean_vecs = np.delete(vecs, remove_idx_list, axis=0) flann3.clean_removed_points() flann3.save_index('test4.flann') flann4 = pyflann.FLANN(**flann_params) # THIS CAUSES A SEGFAULT flann4.load_index('test4.flann', clean_vecs) idx4, dist = flann4.nn_index(qvecs, 3) assert np.all(idx4 == idx3), 'load failed' logger.info('\nloaded succesfully (WITHOUT THE BAD DATA)') logger.info('\n\n---TEST LOAD SAVED INDEX 1 (with removed points)') flann4 = pyflann.FLANN(**flann_params) flann4.load_index('test3.flann', vecs) idx4, dist = flann4.nn_index(qvecs, 3) assert np.all(idx4 == idx3), 'load failed' logger.info('\nloaded succesfully (BUT NEED TO MAINTAIN BAD DATA)') if False: logger.info('\n\n---TEST LOAD SAVED INDEX 2 (with removed points)') clean_vecs = np.delete(vecs, remove_idx_list, axis=0) flann4 = pyflann.FLANN(**flann_params) logger.info('\n * CALL LOAD') flann4.load_index('test3.flann', clean_vecs)
def trytest_multiple_add_removes(): r""" CommandLine: python -m wbia.algo.hots._neighbor_experiment --exec-test_multiple_add_removes Example: >>> # DISABLE_DOCTEST >>> from wbia.algo.hots._neighbor_experiment import * # NOQA >>> result = test_multiple_add_removes() >>> print(result) """ from wbia.algo.hots.neighbor_index_cache import test_nnindexer K = 4 nnindexer, qreq_, ibs = test_nnindexer('PZ_MTEST', use_memcache=False) assert len(nnindexer.get_removed_idxs()) == 0 logger.info('\n\n --- got nnindex testdata --- ') logger.info('') @ut.tracefunc_xml def print_nnindexer(nnindexer): logger.info('nnindexer.get_indexed_aids() = %r' % (nnindexer.get_indexed_aids(), )) logger.info('nnindexer.num_indexed_vecs() = %r' % (nnindexer.num_indexed_vecs(), )) logger.info('nnindexer.get_removed_idxs().shape = %r' % (nnindexer.get_removed_idxs().shape, )) logger.info('INITIALIZE TEST') print_nnindexer(nnindexer) config2_ = qreq_.get_internal_query_config2() qaid = 1 qfx2_vec = ibs.get_annot_vecs(qaid, config2_=config2_) (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K) aids1 = set(nnindexer.get_nn_aids(qfx2_idx1).ravel()) logger.info('aids1 = %r' % (aids1, )) logger.info('') logger.info('TESTING ADD') add_first_daids = [17, 22] nnindexer.add_wbia_support(qreq_, add_first_daids) print_nnindexer(nnindexer) (qfx2_idx0, qfx2_dist0) = nnindexer.knn(qfx2_vec, K) assert np.any(qfx2_idx0 != qfx2_idx1), 'some should change' aids0 = set(nnindexer.get_nn_aids(qfx2_idx0).ravel()) logger.info('aids0 = %r' % (aids0, )) # execute test function logger.info('') logger.info('TESTING REMOVE') remove_daid_list = [8, 10, 11] nnindexer.remove_wbia_support(qreq_, remove_daid_list) print_nnindexer(nnindexer) # test after modification (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K) aids2 = set(nnindexer.get_nn_aids(qfx2_idx2).ravel()) logger.info('aids2 = %r' % (aids2, )) assert len(aids2.intersection(remove_daid_list)) == 0 __removed_ids = nnindexer.flann._FLANN__removed_ids invalid_idxs = nnindexer.get_removed_idxs() assert len(np.intersect1d(invalid_idxs, __removed_ids)) == len(__removed_ids) logger.info('') logger.info('TESTING DUPLICATE REMOVE') nnindexer.remove_wbia_support(qreq_, remove_daid_list) print_nnindexer(nnindexer) # test after modification (qfx2_idx2_, qfx2_dist2_) = nnindexer.knn(qfx2_vec, K) assert np.all(qfx2_idx2_ == qfx2_idx2) assert np.all(qfx2_dist2_ == qfx2_dist2) logger.info('') logger.info('TESTING ADD AFTER REMOVE') # Is the error here happening because added points seem to # get the ids of the removed points? new_daid_list = [8, 10] nnindexer.add_wbia_support(qreq_, new_daid_list) print_nnindexer(nnindexer) # test after modification (qfx2_idx3, qfx2_dist3) = nnindexer.knn(qfx2_vec, K) qfx2_aid3 = nnindexer.get_nn_aids(qfx2_idx3) found_removed_idxs = np.intersect1d(qfx2_idx3, nnindexer.get_removed_idxs()) if len(found_removed_idxs) != 0: logger.info('found_removed_idxs.max() = %r' % (found_removed_idxs.max(), )) logger.info('found_removed_idxs.min() = %r' % (found_removed_idxs.min(), )) raise AssertionError('found_removed_idxs.shape = %r' % (found_removed_idxs.shape, )) aids3 = set(qfx2_aid3.ravel()) assert aids3.intersection(remove_daid_list) == set( new_daid_list).intersection(remove_daid_list) logger.info('TESTING DUPLICATE ADD') new_daid_list = [8, 10] nnindexer.add_wbia_support(qreq_, new_daid_list) # test after modification print_nnindexer(nnindexer) (qfx2_idx3_, qfx2_dist3_) = nnindexer.knn(qfx2_vec, K) qfx2_aid3_ = nnindexer.get_nn_aids(qfx2_idx3_) assert np.all(qfx2_aid3 == qfx2_aid3_) logger.info('TESTING ADD QUERY TO DATABASE') add_daid_list1 = [qaid] nnindexer.add_wbia_support(qreq_, add_daid_list1) print_nnindexer(nnindexer) (qfx2_idx4_, qfx2_dist4_) = nnindexer.knn(qfx2_vec, K) qfx2_aid4_ = nnindexer.get_nn_aids(qfx2_idx4_) qfx2_fx4_ = nnindexer.get_nn_featxs(qfx2_idx4_) assert np.all(qfx2_aid4_.T[0] == qaid), 'should find self' assert ut.issorted(qfx2_fx4_.T[0]), 'should be in order' logger.info('TESTING REMOVE QUERY POINTS') add_daid_list1 = [qaid] nnindexer.remove_wbia_support(qreq_, add_daid_list1) print_nnindexer(nnindexer) (qfx2_idx5_, qfx2_dist5_) = nnindexer.knn(qfx2_vec, K) issame = qfx2_idx5_ == qfx2_idx3_ percentsame = issame.sum() / issame.size logger.info('percentsame = %r' % (percentsame, )) assert (percentsame > 0.85 ), 'a large majority of the feature idxs should remain the same' print_nnindexer(nnindexer) # Do this multiple times for _ in range(10): add_daid_list1 = [qaid] nnindexer.add_wbia_support(qreq_, add_daid_list1, verbose=False) nnindexer.remove_wbia_support(qreq_, add_daid_list1, verbose=False) (qfx2_idxX_, qfx2_distX_) = nnindexer.knn(qfx2_vec, K) issame = qfx2_idxX_ == qfx2_idx3_ percentsame = issame.sum() / issame.size logger.info('percentsame = %r' % (percentsame, )) assert ( percentsame > 0.85 ), 'a large majority of the feature idxs should remain the same' # Test again with more data logger.info('testing remove query points with more data') nnindexer.add_wbia_support(qreq_, ibs.get_valid_aids()) (qfx2_idx6_, qfx2_dist6_) = nnindexer.knn(qfx2_vec, K) qfx2_aid6_ = nnindexer.get_nn_aids(qfx2_idx6_) assert np.all(qfx2_aid6_.T[0] == qaid), 'should be same' nnindexer.remove_wbia_support(qreq_, add_daid_list1) print_nnindexer(nnindexer) (qfx2_idx7_, qfx2_dist6_) = nnindexer.knn(qfx2_vec, K) qfx2_aid7_ = nnindexer.get_nn_aids(qfx2_idx7_) assert np.all(qfx2_aid7_.T[0] != qaid), 'should not be same' # Do this multiple times for _ in range(10): add_daid_list1 = [qaid] nnindexer.add_wbia_support(qreq_, add_daid_list1, verbose=True) nnindexer.remove_wbia_support(qreq_, add_daid_list1, verbose=True) # weird that all seem to work here (qfx2_idxX_, qfx2_distX_) = nnindexer.knn(qfx2_vec, K) issame = qfx2_idxX_ == qfx2_idx7_ percentsame = issame.sum() / issame.size logger.info('percentsame = %r' % (percentsame, )) print_nnindexer(nnindexer) assert ( percentsame > 0.85 ), 'a large majority of the feature idxs should remain the same' nnindexer, qreq_, ibs = test_nnindexer('PZ_MTEST', use_memcache=False) big_set = ibs.get_valid_aids()[5:] remove_later = big_set[10:14] nnindexer.add_wbia_support(qreq_, big_set) # Try again where remove is not the last operation logger.info('testing remove query points with more op') extra_data = np.setdiff1d(ibs.get_valid_aids()[0:5], add_daid_list1) nnindexer.remove_wbia_support(qreq_, extra_data) nnindexer.add_wbia_support(qreq_, add_daid_list1) nnindexer.add_wbia_support(qreq_, extra_data) (qfx2_idx8_, qfx2_dist8_) = nnindexer.knn(qfx2_vec, K) qfx2_aid8_ = nnindexer.get_nn_aids(qfx2_idx8_) assert np.all(qfx2_aid8_.T[0] == qaid), 'should be same' nnindexer.remove_wbia_support(qreq_, extra_data) (qfx2_idx9_, qfx2_dist9_) = nnindexer.knn(qfx2_vec, K) qfx2_aid9_ = nnindexer.get_nn_aids(qfx2_idx9_) assert np.all(qfx2_aid9_.T[0] == qaid), 'should be same' nnindexer.remove_wbia_support(qreq_, add_daid_list1) nnindexer.add_wbia_support(qreq_, add_daid_list1) nnindexer.add_wbia_support(qreq_, extra_data) nnindexer.remove_wbia_support(qreq_, remove_later) logger.info(nnindexer.ax2_aid) aid_list = nnindexer.get_indexed_aids() # NOQA nnindexer.flann.save_index('test.flann') idx2_vec_masked = nnindexer.idx2_vec idx2_vec_compressed = nnindexer.get_indexed_vecs() from vtool._pyflann_backend import pyflann as pyflann flann1 = pyflann.FLANN() flann1.load_index('test.flann', idx2_vec_masked) from vtool._pyflann_backend import pyflann as pyflann flann2 = pyflann.FLANN() flann2.load_index('test.flann', idx2_vec_compressed) # NOW WE NEED TO TEST THAT WE CAN SAVE AND LOAD THIS DATA # # ax2_nvecs = ut.dict_take(ut.dict_hist(nnindexer.idx2_ax), range(len(nnindexer.ax2_aid))) pass
def make_flann_index(vecs, flann_params): flann = pyflann.FLANN() flann.build_index(vecs, **flann_params) return flann