def index_data_annots(annots_df, daids, words, with_internals=True, aggregate=False, alpha=3, thresh=0, with_pandas=WITH_PANDAS): """ Builds the initial inverted index from a dataframe, daids, and words. Optionally builds the internals of the inverted structure >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, words = smk_debug.testdata_words() >>> with_internals = False >>> invindex = index_data_annots(annots_df, daids, words, with_internals) #>>> print(utool.hashstr(repr(list(invindex.__dict__.values())))) #v8+i5i8+55j0swio """ if utool.VERBOSE: print('[smk_index] index_data_annots') flann_params = {} _words = pdh.ensure_values(words) wordflann = nntool.flann_cache(_words, flann_params=flann_params, appname='smk') _daids = pdh.ensure_values(daids) _vecs_list = pdh.ensure_2d_values(annots_df['vecs'][_daids]) _idx2_dvec, _idx2_daid, _idx2_dfx = nntool.invertable_stack( _vecs_list, _daids) # Pandasify if with_pandas: idx_series = pdh.IntIndex(np.arange(len(_idx2_daid)), name='idx') idx2_dfx = pdh.IntSeries(_idx2_dfx, index=idx_series, name='fx') idx2_daid = pdh.IntSeries(_idx2_daid, index=idx_series, name='aid') idx2_dvec = pd.DataFrame(_idx2_dvec, index=idx_series, columns=VEC_COLUMNS) else: idx2_dfx = _idx2_dfx idx2_daid = _idx2_daid idx2_dvec = _idx2_dvec pass invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids) if with_internals: compute_data_internals_(invindex, aggregate, alpha, thresh) # 99% return invindex
def index_data_annots(annots_df, daids, words, with_internals=True): """ Create inverted index for database annotations >>> from ibeis.model.hots.smk.smk import * # NOQA >>> ibs, annots_df, taids, daids, qaids, nWords = testdata() >>> words = learn_visual_words(annots_df, taids, nWords) >>> with_internals = True >>> invindex = index_data_annots(annots_df, daids, words, with_internals) """ vecs_list = ensure_values(annots_df['vecs'][daids]) flann_params = {} cache_dir = utool.get_app_resource_dir('smk') wordflann = nntool.flann_cache(words, flann_params=flann_params, cache_dir=cache_dir) _daids = ensure_values(daids) idx2_dvec, idx2_daid, idx2_dfx = nntool.invertable_stack(vecs_list, _daids) invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, _daids) if with_internals: invindex.compute_internals() return invindex
def index_data_annots(annots_df, daids, words, with_internals=True, aggregate=False, alpha=3, thresh=0, with_pandas=WITH_PANDAS): """ Builds the initial inverted index from a dataframe, daids, and words. Optionally builds the internals of the inverted structure >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, words = smk_debug.testdata_words() >>> with_internals = False >>> invindex = index_data_annots(annots_df, daids, words, with_internals) #>>> print(utool.hashstr(repr(list(invindex.__dict__.values())))) #v8+i5i8+55j0swio """ if utool.VERBOSE: print('[smk_index] index_data_annots') flann_params = {} _words = pdh.ensure_values(words) wordflann = nntool.flann_cache(_words, flann_params=flann_params, appname='smk') _daids = pdh.ensure_values(daids) _vecs_list = pdh.ensure_2d_values(annots_df['vecs'][_daids]) _idx2_dvec, _idx2_daid, _idx2_dfx = nntool.invertable_stack(_vecs_list, _daids) # Pandasify if with_pandas: idx_series = pdh.IntIndex(np.arange(len(_idx2_daid)), name='idx') idx2_dfx = pdh.IntSeries(_idx2_dfx, index=idx_series, name='fx') idx2_daid = pdh.IntSeries(_idx2_daid, index=idx_series, name='aid') idx2_dvec = pd.DataFrame(_idx2_dvec, index=idx_series, columns=VEC_COLUMNS) else: idx2_dfx = _idx2_dfx idx2_daid = _idx2_daid idx2_dvec = _idx2_dvec pass invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids) if with_internals: compute_data_internals_(invindex, aggregate, alpha, thresh) # 99% return invindex