def new_word_index(aid_list=[], vecs_list=[], flann_params={}, flann_cachedir=None, indexer_cfgstr='', hash_rowids=True, use_cache=not NOCACHE_WORD, use_params_hash=True): print('[windex] building WordIndex object') _check_input(aid_list, vecs_list) # Create indexes into the input aids ax_list = np.arange(len(aid_list)) idx2_vec, idx2_ax, idx2_fx = invert_index(vecs_list, ax_list) if hash_rowids: # Fingerprint aids_hashstr = utool.hashstr_arr(aid_list, '_AIDS') cfgstr = aids_hashstr + indexer_cfgstr else: # Dont hash rowids when given enough info in indexer_cfgstr cfgstr = indexer_cfgstr # Build/Load the flann index flann = nntool.flann_cache( idx2_vec, **{ 'cache_dir': flann_cachedir, 'cfgstr': cfgstr, 'flann_params': flann_params, 'use_cache': use_cache, 'use_params_hash': use_params_hash }) ax2_aid = np.array(aid_list) windex = WordIndex(ax2_aid, idx2_vec, idx2_ax, idx2_fx, flann) return windex
def new_word_index(aid_list=[], vecs_list=[], flann_params={}, flann_cachedir=None, indexer_cfgstr='', hash_rowids=True, use_cache=not NOCACHE_WORD, use_params_hash=True): print('[windex] building WordIndex object') _check_input(aid_list, vecs_list) # Create indexes into the input aids ax_list = np.arange(len(aid_list)) idx2_vec, idx2_ax, idx2_fx = invert_index(vecs_list, ax_list) if hash_rowids: # Fingerprint aids_hashstr = utool.hashstr_arr(aid_list, '_AIDS') cfgstr = aids_hashstr + indexer_cfgstr else: # Dont hash rowids when given enough info in indexer_cfgstr cfgstr = indexer_cfgstr # Build/Load the flann index flann = nntool.flann_cache(idx2_vec, **{ 'cache_dir': flann_cachedir, 'cfgstr': cfgstr, 'flann_params': flann_params, 'use_cache': use_cache, 'use_params_hash': use_params_hash}) ax2_aid = np.array(aid_list) windex = WordIndex(ax2_aid, idx2_vec, idx2_ax, idx2_fx, flann) return windex
def build_flann_inverted_index(ibs, aid_list): """ Build a inverted index (using FLANN) </CYTH> """ try: if len(aid_list) == 0: msg = ('len(aid_list) == 0\n' 'Cannot build inverted index without features!') raise AssertionError(msg) dx2_desc, dx2_aid, dx2_fx = aggregate_descriptors(ibs, aid_list) except Exception as ex: intostr = ibs.get_infostr() # NOQA dbname = ibs.get_dbname() # NOQA num_images = ibs.get_num_images() # NOQA num_annotations = ibs.get_num_annotations() # NOQA num_names = ibs.get_num_names() # NOQA utool.printex(ex, '', 'cannot build inverted index', locals().keys()) raise # Build/Load the flann index flann_cfgstr = get_flann_cfgstr(ibs, aid_list) flann_params = {'algorithm': 'kdtree', 'trees': 4} precomp_kwargs = {'cache_dir': ibs.get_flann_cachedir(), 'cfgstr': flann_cfgstr, 'flann_params': flann_params, 'force_recompute': NOCACHE_FLANN} flann = nntool.flann_cache(dx2_desc, **precomp_kwargs) return dx2_desc, dx2_aid, dx2_fx, flann
def index_data_annots(annots_df, daids, words, with_internals=True, aggregate=False, alpha=3, thresh=0, with_pandas=WITH_PANDAS): """ Builds the initial inverted index from a dataframe, daids, and words. Optionally builds the internals of the inverted structure >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, words = smk_debug.testdata_words() >>> with_internals = False >>> invindex = index_data_annots(annots_df, daids, words, with_internals) #>>> print(utool.hashstr(repr(list(invindex.__dict__.values())))) #v8+i5i8+55j0swio """ if utool.VERBOSE: print('[smk_index] index_data_annots') flann_params = {} _words = pdh.ensure_values(words) wordflann = nntool.flann_cache(_words, flann_params=flann_params, appname='smk') _daids = pdh.ensure_values(daids) _vecs_list = pdh.ensure_2d_values(annots_df['vecs'][_daids]) _idx2_dvec, _idx2_daid, _idx2_dfx = nntool.invertable_stack( _vecs_list, _daids) # Pandasify if with_pandas: idx_series = pdh.IntIndex(np.arange(len(_idx2_daid)), name='idx') idx2_dfx = pdh.IntSeries(_idx2_dfx, index=idx_series, name='fx') idx2_daid = pdh.IntSeries(_idx2_daid, index=idx_series, name='aid') idx2_dvec = pd.DataFrame(_idx2_dvec, index=idx_series, columns=VEC_COLUMNS) else: idx2_dfx = _idx2_dfx idx2_daid = _idx2_daid idx2_dvec = _idx2_dvec pass invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids) if with_internals: compute_data_internals_(invindex, aggregate, alpha, thresh) # 99% return invindex
def build_flann_inverted_index(ibs, aid_list, **kwargs): """ Build a inverted index (using FLANN) """ # Aggregate descriptors dx2_desc, dx2_aid, dx2_fx = build_ibs_inverted_descriptor_index(ibs, aid_list) # hash which annotations are input indexed_cfgstr = get_indexed_cfgstr(ibs, aid_list) flann_params = {'algorithm': 'kdtree', 'trees': 4} flann_cachedir = ibs.get_flann_cachedir() precomp_kwargs = {'cache_dir': flann_cachedir, 'cfgstr': indexed_cfgstr, 'flann_params': flann_params, 'use_cache': kwargs.get('use_cache', not NOCACHE_FLANN)} # Build/Load the flann index flann = nntool.flann_cache(dx2_desc, **precomp_kwargs) return dx2_desc, dx2_aid, dx2_fx, flann
def index_data_annots(annots_df, daids, words, with_internals=True): """ Create inverted index for database annotations >>> from ibeis.model.hots.smk.smk import * # NOQA >>> ibs, annots_df, taids, daids, qaids, nWords = testdata() >>> words = learn_visual_words(annots_df, taids, nWords) >>> with_internals = True >>> invindex = index_data_annots(annots_df, daids, words, with_internals) """ vecs_list = ensure_values(annots_df['vecs'][daids]) flann_params = {} cache_dir = utool.get_app_resource_dir('smk') wordflann = nntool.flann_cache(words, flann_params=flann_params, cache_dir=cache_dir) _daids = ensure_values(daids) idx2_dvec, idx2_daid, idx2_dfx = nntool.invertable_stack(vecs_list, _daids) invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, _daids) if with_internals: invindex.compute_internals() return invindex
def index_data_annots(annots_df, daids, words, with_internals=True, aggregate=False, alpha=3, thresh=0, with_pandas=WITH_PANDAS): """ Builds the initial inverted index from a dataframe, daids, and words. Optionally builds the internals of the inverted structure >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, words = smk_debug.testdata_words() >>> with_internals = False >>> invindex = index_data_annots(annots_df, daids, words, with_internals) #>>> print(utool.hashstr(repr(list(invindex.__dict__.values())))) #v8+i5i8+55j0swio """ if utool.VERBOSE: print('[smk_index] index_data_annots') flann_params = {} _words = pdh.ensure_values(words) wordflann = nntool.flann_cache(_words, flann_params=flann_params, appname='smk') _daids = pdh.ensure_values(daids) _vecs_list = pdh.ensure_2d_values(annots_df['vecs'][_daids]) _idx2_dvec, _idx2_daid, _idx2_dfx = nntool.invertable_stack(_vecs_list, _daids) # Pandasify if with_pandas: idx_series = pdh.IntIndex(np.arange(len(_idx2_daid)), name='idx') idx2_dfx = pdh.IntSeries(_idx2_dfx, index=idx_series, name='fx') idx2_daid = pdh.IntSeries(_idx2_daid, index=idx_series, name='aid') idx2_dvec = pd.DataFrame(_idx2_dvec, index=idx_series, columns=VEC_COLUMNS) else: idx2_dfx = _idx2_dfx idx2_daid = _idx2_daid idx2_dvec = _idx2_dvec pass invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids) if with_internals: compute_data_internals_(invindex, aggregate, alpha, thresh) # 99% return invindex
def index_data_annots(annots_df, daids, words, qparams, with_internals=True, memtrack=None, delete_rawvecs=False): """ Builds the initial inverted index from a dataframe, daids, and words. Optionally builds the internals of the inverted structure Args: annots_df (): daids (): words (): qparams (): with_internals (): memtrack (): memory debugging object Returns: invindex Example: >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words() >>> qparams = qreq_.qparams >>> with_internals = False >>> invindex = index_data_annots(annots_df, daids, words, qparams, with_internals) Ignore: #>>> print(ut.hashstr(repr(list(invindex.__dict__.values())))) #v8+i5i8+55j0swio Auto: from ibeis.algo.hots.smk import smk_repr import utool as ut ut.rrrr() print(ut.make_default_docstr(smk_repr.index_data_annots)) """ if not ut.QUIET: print('[smk_repr] index_data_annots') flann_params = {} # Compute fast lookup index for the words wordflann = nntool.flann_cache(words, flann_params=flann_params, appname='smk') _vecs_list = annots_df['vecs'][daids] _label_list = annots_df['labels'][daids] idx2_dvec, idx2_daid, idx2_dfx = nntool.invertible_stack(_vecs_list, daids) # TODO: # Need to individually cache residual vectors. # rvecs_list = annots_df['rvecs'][daids] # # Residual vectors depend on # * nearest word (word assignment) # * original vectors # * multiassignment daid2_label = dict(zip(daids, _label_list)) invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label) # Decrement reference count so memory can be cleared in the next function del words, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label del _vecs_list, _label_list if with_internals: compute_data_internals_(invindex, qparams, memtrack=memtrack, delete_rawvecs=delete_rawvecs) # 99% return invindex