def test_sccw_cache(): ibs, annots_df, taids, daids, qaids, qreq_, nWords = testdata_dataframe() smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh qparams = qreq_.qparams words = smk_index.learn_visual_words(annots_df, taids, nWords) with_internals = True invindex = smk_repr.index_data_annots(annots_df, daids, words, qparams, with_internals) idx2_daid = invindex.idx2_daid wx2_drvecs = invindex.wx2_drvecs wx2_idf = invindex.wx2_idf wx2_aids = invindex.wx2_aids wx2_dmaws = invindex.wx2_dmaws daids = invindex.daids daid2_sccw1 = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, use_cache=True) daid2_sccw2 = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, use_cache=False) daid2_sccw3 = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, use_cache=True) check_daid2_sccw(daid2_sccw1) check_daid2_sccw(daid2_sccw2) check_daid2_sccw(daid2_sccw3) if not np.all(daid2_sccw2 == daid2_sccw3): raise AssertionError('caching error in sccw') if not np.all(daid2_sccw1 == daid2_sccw2): raise AssertionError('cache outdated in sccw')
def compute_data_internals_(invindex, qparams, memtrack=None, delete_rawvecs=True): """ Builds each of the inverted index internals. invindex (InvertedIndex): object for fast vocab lookup qparams (QueryParams): hyper-parameters memtrack (None): delete_rawvecs (bool): Returns: None Example: >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0() >>> compute_data_internals_(invindex, qreq_.qparams) Ignore: idx2_vec = idx2_dvec wx2_maws = _wx2_maws # NOQA """ # Get information #if memtrack is None: # memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]') #memtrack.report('[DATA INTERNALS1]') # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # vocab_weighting = qparams.vocab_weighting # nAssign = 1 # single assignment for database side idx2_vec = invindex.idx2_dvec idx2_dfx = invindex.idx2_dfx idx2_daid = invindex.idx2_daid daids = invindex.daids wordflann = invindex.wordflann words = invindex.words daid2_label = invindex.daid2_label wx_series = np.arange(len(words)) #memtrack.track_obj(idx2_vec, 'idx2_vec') if not ut.QUIET: print('[smk_repr] compute_data_internals_') if ut.VERBOSE: print('[smk_repr] * len(daids) = %r' % (len(daids),)) print('[smk_repr] * len(words) = %r' % (len(words),)) print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec),)) print('[smk_repr] * aggregate = %r' % (aggregate,)) print('[smk_repr] * smk_alpha = %r' % (smk_alpha,)) print('[smk_repr] * smk_thresh = %r' % (smk_thresh,)) # Try to use the cache #cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr #cachekw = dict( #cfgstr=cfgstr, #appname='smk_test' #) #invindex_cache = ut.Cacher('inverted_index', **cachekw) #try: # raise IOError('cache is off') # #cachetup = invindex_cache.load() # #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup # invindex.idx2_dvec = None #except IOError as ex: # Database word assignments (perform single assignment on database side) wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_( wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) if ut.DEBUG2: assert len(idx2_wxs) == len(idx2_vec) assert len(wx2_idxs.keys()) == len(_wx2_maws.keys()) assert len(wx2_idxs.keys()) <= len(words) try: assert len(wx2_idxs.keys()) == len(words) except AssertionError as ex: ut.printex(ex, iswarning=True) # Database word inverse-document-frequency (idf weights) wx2_idf = smk_index.compute_word_idf_( wx_series, wx2_idxs, idx2_daid, daids, daid2_label, vocab_weighting, verbose=True) if ut.DEBUG2: assert len(wx2_idf) == len(wx2_idf.keys()) # Compute (normalized) residual vectors and inverse mappings wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_( words, wx2_idxs, _wx2_maws, idx2_vec, idx2_daid, idx2_dfx, aggregate, verbose=True) if not ut.QUIET: print('[smk_repr] unloading idx2_vec') if delete_rawvecs: # Try to save some memory del _wx2_maws invindex.idx2_dvec = None del idx2_vec # Compute annotation normalization factor daid2_sccw = smk_index.compute_data_sccw_( idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=True) # Cache save #cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw) #invindex_cache.save(cachetup) # Store information invindex.idx2_wxs = idx2_wxs # stacked index -> word indexes (might not be needed) invindex.wx2_idxs = wx2_idxs invindex.wx2_idf = wx2_idf invindex.wx2_drvecs = wx2_drvecs invindex.wx2_dflags = wx2_dflags # flag nan rvecs invindex.wx2_aids = wx2_aids # needed for asmk invindex.wx2_fxs = wx2_fxs # needed for asmk invindex.wx2_dmaws = wx2_dmaws # needed for awx2_mawssmk invindex.daid2_sccw = daid2_sccw #memtrack.report('[DATA INTERNALS3]') if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_invindex_wx2(invindex)
def compute_data_internals_(invindex, qparams, memtrack=None, delete_rawvecs=True): """ Builds each of the inverted index internals. invindex (InvertedIndex): object for fast vocab lookup qparams (QueryParams): hyper-parameters memtrack (None): delete_rawvecs (bool): Returns: None Example: >>> from ibeis.algo.hots.smk.smk_repr import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0() >>> compute_data_internals_(invindex, qreq_.qparams) Ignore: idx2_vec = idx2_dvec wx2_maws = _wx2_maws # NOQA """ # Get information #if memtrack is None: # memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]') #memtrack.report('[DATA INTERNALS1]') # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # vocab_weighting = qparams.vocab_weighting # nAssign = 1 # single assignment for database side idx2_vec = invindex.idx2_dvec idx2_dfx = invindex.idx2_dfx idx2_daid = invindex.idx2_daid daids = invindex.daids wordflann = invindex.wordflann words = invindex.words daid2_label = invindex.daid2_label wx_series = np.arange(len(words)) #memtrack.track_obj(idx2_vec, 'idx2_vec') if not ut.QUIET: print('[smk_repr] compute_data_internals_') if ut.VERBOSE: print('[smk_repr] * len(daids) = %r' % (len(daids), )) print('[smk_repr] * len(words) = %r' % (len(words), )) print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec), )) print('[smk_repr] * aggregate = %r' % (aggregate, )) print('[smk_repr] * smk_alpha = %r' % (smk_alpha, )) print('[smk_repr] * smk_thresh = %r' % (smk_thresh, )) # Try to use the cache #cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr #cachekw = dict( #cfgstr=cfgstr, #appname='smk_test' #) #invindex_cache = ut.Cacher('inverted_index', **cachekw) #try: # raise IOError('cache is off') # #cachetup = invindex_cache.load() # #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup # invindex.idx2_dvec = None #except IOError as ex: # Database word assignments (perform single assignment on database side) wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_( wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) if ut.DEBUG2: assert len(idx2_wxs) == len(idx2_vec) assert len(wx2_idxs.keys()) == len(_wx2_maws.keys()) assert len(wx2_idxs.keys()) <= len(words) try: assert len(wx2_idxs.keys()) == len(words) except AssertionError as ex: ut.printex(ex, iswarning=True) # Database word inverse-document-frequency (idf weights) wx2_idf = smk_index.compute_word_idf_(wx_series, wx2_idxs, idx2_daid, daids, daid2_label, vocab_weighting, verbose=True) if ut.DEBUG2: assert len(wx2_idf) == len(wx2_idf.keys()) # Compute (normalized) residual vectors and inverse mappings wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_( words, wx2_idxs, _wx2_maws, idx2_vec, idx2_daid, idx2_dfx, aggregate, verbose=True) if not ut.QUIET: print('[smk_repr] unloading idx2_vec') if delete_rawvecs: # Try to save some memory del _wx2_maws invindex.idx2_dvec = None del idx2_vec # Compute annotation normalization factor daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=True) # Cache save #cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw) #invindex_cache.save(cachetup) # Store information invindex.idx2_wxs = idx2_wxs # stacked index -> word indexes (might not be needed) invindex.wx2_idxs = wx2_idxs invindex.wx2_idf = wx2_idf invindex.wx2_drvecs = wx2_drvecs invindex.wx2_dflags = wx2_dflags # flag nan rvecs invindex.wx2_aids = wx2_aids # needed for asmk invindex.wx2_fxs = wx2_fxs # needed for asmk invindex.wx2_dmaws = wx2_dmaws # needed for awx2_mawssmk invindex.daid2_sccw = daid2_sccw #memtrack.report('[DATA INTERNALS3]') if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_invindex_wx2(invindex)