def compute_query_repr(annots_df, qaid, invindex, aggregate=False, alpha=3, thresh=0): """ Gets query read for computations >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, qaid, invindex = smk_debug.testdata_query_repr() >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> query_repr = compute_query_repr(annots_df, qaid, invindex, aggregate, alpha, thresh) >>> (wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma) = query_repr >>> assert smk_debug.check_wx2_rvecs(wx2_qrvecs), 'has nan' >>> invindex_dbgstr.invindex_dbgstr(invindex) idx2_vec = qfx2_vec idx2_aid = qfx2_aid idx2_fx = qfx2_qfx wx2_idxs = wx2_qfxs1 """ if utool.VERBOSE: print('[smk_index] Query Repr qaid=%r' % (qaid,)) wx2_weight = invindex.wx2_weight words = invindex.words wordflann = invindex.wordflann if WITH_PANDAS: qfx2_vec = annots_df['vecs'][qaid] else: qfx2_vec = pdh.ensure_values(annots_df['vecs'][qaid]) # Assign query to words wx2_qfxs1, qfx2_wx = assign_to_words_(wordflann, words, qfx2_vec, idx_name='fx', dense=False) # 71.9 % # Hack to make implementing asmk easier, very redundant #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), index=qfx2_wx.index, name='qfx2_aid') #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), name='qfx2_aid') qfx2_aid = np.array([qaid] * len(qfx2_wx), dtype=INTEGER_TYPE) if WITH_PANDAS: qfx2_qfx = qfx2_vec.index else: qfx2_qfx = np.arange(len(qfx2_vec)) # Compute query residuals wx2_qrvecs, wx2_qaids, wx2_qfxs = compute_residuals_( words, wx2_qfxs1, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate) # 24.8 # Compute query gamma if utool.VERBOSE: print('[smk_index] Query Gamma alpha=%r, thresh=%r' % (alpha, thresh)) wx_sublist = pdh.ensure_index(wx2_qrvecs).astype(np.int32) weight_list = pdh.ensure_values_subset(wx2_weight, wx_sublist) rvecs_list = pdh.ensure_values_subset(wx2_qrvecs, wx_sublist) query_gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) assert query_gamma > 0, 'query gamma is not positive!' return wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, alpha=3, thresh=0): """ Internals step4 Computes gamma normalization scalar for the database annotations >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2() >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> idx2_daid = invindex.idx2_daid >>> wx2_weight = wx2_idf >>> daids = invindex.daids >>> use_cache = USE_CACHE_GAMMA and False >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, daids, use_cache=use_cache) """ # Gropuing by aid and words wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs)) if utool.VERBOSE: print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh)) mark1, end1_ = utool.log_progress( '[smk_index] Gamma Group: ', len(wx_sublist), flushfreq=100, writefreq=50) rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist) aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist) daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list)) # Group by daids first and then by word index for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1): group_aids, groupxs = smk_speed.group_indicies(aids) rvecs_group = smk_speed.apply_grouping(rvecs, groupxs) # 2.9 ms for aid, rvecs_ in zip(group_aids, rvecs_group): daid2_wx2_drvecs[aid][wx] = rvecs_ if utool.VERBOSE: end1_() # For every daid, compute its gamma using pregrouped rvecs # Summation over words for each aid if utool.VERBOSE: mark2, end2_ = utool.log_progress( '[smk_index] Gamma Sum: ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25) aid_list = list(daid2_wx2_drvecs.keys()) wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) aidwxs_list = [list(wx2_aidrvecs.keys()) for wx2_aidrvecs in wx2_aidrvecs_list] aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in wx2_aidrvecs_list] aidweight_list = [[wx2_weight[wx] for wx in aidwxs] for aidwxs in aidwxs_list] #gamma_list = [] #for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list): # assert len(weight_list) == len(rvecs_list), 'one list for each word' # gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) # 66.8 % # #weight_list = np.ones(weight_list.size) # gamma_list.append(gamma) gamma_list = [smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list)] daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma') if utool.VERBOSE: end2_() return daid2_gamma
def compute_query_repr(annots_df, qaid, invindex, aggregate=False, alpha=3, thresh=0): """ Gets query read for computations >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, qaid, invindex = smk_debug.testdata_query_repr() >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> query_repr = compute_query_repr(annots_df, qaid, invindex, aggregate, alpha, thresh) >>> (wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma) = query_repr >>> assert smk_debug.check_wx2_rvecs(wx2_qrvecs), 'has nan' >>> invindex_dbgstr.invindex_dbgstr(invindex) idx2_vec = qfx2_vec idx2_aid = qfx2_aid idx2_fx = qfx2_qfx wx2_idxs = wx2_qfxs1 """ if utool.VERBOSE: print('[smk_index] Query Repr qaid=%r' % (qaid, )) wx2_weight = invindex.wx2_weight words = invindex.words wordflann = invindex.wordflann if WITH_PANDAS: qfx2_vec = annots_df['vecs'][qaid] else: qfx2_vec = pdh.ensure_values(annots_df['vecs'][qaid]) # Assign query to words wx2_qfxs1, qfx2_wx = assign_to_words_(wordflann, words, qfx2_vec, idx_name='fx', dense=False) # 71.9 % # Hack to make implementing asmk easier, very redundant #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), index=qfx2_wx.index, name='qfx2_aid') #qfx2_aid = pdh.IntSeries([qaid] * len(qfx2_wx), name='qfx2_aid') qfx2_aid = np.array([qaid] * len(qfx2_wx), dtype=INTEGER_TYPE) if WITH_PANDAS: qfx2_qfx = qfx2_vec.index else: qfx2_qfx = np.arange(len(qfx2_vec)) # Compute query residuals wx2_qrvecs, wx2_qaids, wx2_qfxs = compute_residuals_( words, wx2_qfxs1, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate) # 24.8 # Compute query gamma if utool.VERBOSE: print('[smk_index] Query Gamma alpha=%r, thresh=%r' % (alpha, thresh)) wx_sublist = pdh.ensure_index(wx2_qrvecs).astype(np.int32) weight_list = pdh.ensure_values_subset(wx2_weight, wx_sublist) rvecs_list = pdh.ensure_values_subset(wx2_qrvecs, wx_sublist) query_gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) assert query_gamma > 0, 'query gamma is not positive!' return wx2_qrvecs, wx2_qaids, wx2_qfxs, query_gamma
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, alpha=3, thresh=0): """ Internals step4 Computes gamma normalization scalar for the database annotations >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2() >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> idx2_daid = invindex.idx2_daid >>> wx2_weight = wx2_idf >>> daids = invindex.daids >>> use_cache = USE_CACHE_GAMMA and False >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_weight, daids, use_cache=use_cache) """ # Gropuing by aid and words wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs)) if utool.VERBOSE: print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh)) mark1, end1_ = utool.log_progress('[smk_index] Gamma Group: ', len(wx_sublist), flushfreq=100, writefreq=50) rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist) aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist) daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list)) # Group by daids first and then by word index for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1): group_aids, groupxs = smk_speed.group_indicies(aids) rvecs_group = smk_speed.apply_grouping(rvecs, groupxs) # 2.9 ms for aid, rvecs_ in zip(group_aids, rvecs_group): daid2_wx2_drvecs[aid][wx] = rvecs_ if utool.VERBOSE: end1_() # For every daid, compute its gamma using pregrouped rvecs # Summation over words for each aid if utool.VERBOSE: mark2, end2_ = utool.log_progress('[smk_index] Gamma Sum: ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25) aid_list = list(daid2_wx2_drvecs.keys()) wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) aidwxs_list = [ list(wx2_aidrvecs.keys()) for wx2_aidrvecs in wx2_aidrvecs_list ] aidrvecs_list = [ list(wx2_aidrvecs.values()) for wx2_aidrvecs in wx2_aidrvecs_list ] aidweight_list = [[wx2_weight[wx] for wx in aidwxs] for aidwxs in aidwxs_list] #gamma_list = [] #for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list): # assert len(weight_list) == len(rvecs_list), 'one list for each word' # gamma = smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) # 66.8 % # #weight_list = np.ones(weight_list.size) # gamma_list.append(gamma) gamma_list = [ smk_core.gamma_summation2(rvecs_list, weight_list, alpha, thresh) for weight_list, rvecs_list in zip(aidweight_list, aidrvecs_list) ] daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma') if utool.VERBOSE: end2_() return daid2_gamma
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, alpha=3, thresh=0): """ Computes gamma normalization scalar for the database annotations Internals step4 >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2() >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> idx2_daid = invindex.idx2_daid >>> wx2_idf = wx2_idf >>> daids = invindex.daids >>> use_cache = USE_CACHE_GAMMA and False >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, daids, use_cache=use_cache) """ if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_rvecs, wx2_aids=wx2_aids) wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs)) if utool.VERBOSE: print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh)) mark1, end1_ = utool.log_progress( '[smk_index] Gamma group (by word): ', len(wx_sublist), flushfreq=100, writefreq=50, with_totaltime=True) # Get list of aids and rvecs w.r.t. words aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist) rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist) # Group by daids first and then by word index daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list)) for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1): group_aids, groupxs = clustertool.group_indicies(aids) rvecs_group = clustertool.apply_grouping(rvecs, groupxs) # 2.9 ms for aid, rvecs_ in zip(group_aids, rvecs_group): daid2_wx2_drvecs[aid][wx] = rvecs_ if utool.VERBOSE: end1_() # For every daid, compute its gamma using pregrouped rvecs # Summation over words for each aid if utool.VERBOSE: mark2, end2_ = utool.log_progress( '[smk_index] Gamma Sum (over daid): ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25, with_totaltime=True) # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] #gamma_list = [] if utool.DEBUG2: try: for count, (idf_list, rvecs_list) in enumerate(zip(aididf_list, aidrvecs_list)): assert len(idf_list) == len(rvecs_list), 'one list for each word' #gamma = smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) except Exception as ex: utool.printex(ex) utool.embed() raise gamma_list = [smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)] if WITH_PANDAS: daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma') else: daid2_gamma = dict(zip(aid_list, gamma_list)) if utool.VERBOSE: end2_() return daid2_gamma
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, alpha=3, thresh=0): """ Computes gamma normalization scalar for the database annotations Internals step4 >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2() >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> idx2_daid = invindex.idx2_daid >>> wx2_idf = wx2_idf >>> daids = invindex.daids >>> use_cache = USE_CACHE_GAMMA and False >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, daids, use_cache=use_cache) """ if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_rvecs, wx2_aids=wx2_aids) wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs)) if utool.VERBOSE: print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh)) mark1, end1_ = utool.log_progress( '[smk_index] Gamma group (by word): ', len(wx_sublist), flushfreq=100, writefreq=50, with_totaltime=True) # Get list of aids and rvecs w.r.t. words aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist) rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist) # Group by daids first and then by word index daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list)) for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1): group_aids, groupxs = clustertool.group_indicies(aids) rvecs_group = clustertool.apply_grouping(rvecs, groupxs) # 2.9 ms for aid, rvecs_ in zip(group_aids, rvecs_group): daid2_wx2_drvecs[aid][wx] = rvecs_ if utool.VERBOSE: end1_() # For every daid, compute its gamma using pregrouped rvecs # Summation over words for each aid if utool.VERBOSE: mark2, end2_ = utool.log_progress( '[smk_index] Gamma Sum (over daid): ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25, with_totaltime=True) # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [ list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list ] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] #gamma_list = [] if utool.DEBUG2: try: for count, (idf_list, rvecs_list) in enumerate( zip(aididf_list, aidrvecs_list)): assert len(idf_list) == len( rvecs_list), 'one list for each word' #gamma = smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) except Exception as ex: utool.printex(ex) utool.embed() raise gamma_list = [ smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list) ] if WITH_PANDAS: daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma') else: daid2_gamma = dict(zip(aid_list, gamma_list)) if utool.VERBOSE: end2_() return daid2_gamma