def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ """ if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) with ut.Timer('timer_orig1'): wx_sublist = np.array(wx2_drvecs.keys()) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if ut.VERBOSE or verbose: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) mark1, end1_ = ut.log_progress( '[smk_index.sccw] SCCW group (by present words): ', len(wx_sublist), freq=100, with_time=WITH_TOTALTIME) # Get list of aids and rvecs w.r.t. words aids_list = [wx2_aids[wx] for wx in wx_sublist] rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist] maws_list = [wx2_dmaws[wx] for wx in wx_sublist] if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.assert_single_assigned_maws(maws_list) # Group by daids first and then by word index daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1) if ut.VERBOSE or verbose: end1_() # For every daid, compute its sccw using pregrouped rvecs # Summation over words for each aid if ut.VERBOSE or verbose: mark2, end2_ = ut.log_progress( '[smk_index.sccw] SCCW Sum (over daid): ', len(daid2_wx2_drvecs), freq=25, with_time=WITH_TOTALTIME) # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] with ut.Timer('timer_orig2'): if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(aididf_list, aidrvecs_list) # TODO: implement database side soft-assign sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)] daid2_sccw = dict(zip(aid_list, sccw_list)) if ut.VERBOSE or verbose: end2_() print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def OLD_compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ """ if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) with ut.Timer('timer_orig1'): wx_sublist = np.array(wx2_drvecs.keys()) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if ut.VERBOSE or verbose: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) # Get list of aids and rvecs w.r.t. words aids_list = [wx2_aids[wx] for wx in wx_sublist] rvecs_list1 = [wx2_drvecs[wx] for wx in wx_sublist] maws_list = [wx2_dmaws[wx] for wx in wx_sublist] if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.assert_single_assigned_maws(maws_list) # Group by daids first and then by word index daid2_wx2_drvecs = clustertool.double_group(wx_sublist, aids_list, rvecs_list1) # For every daid, compute its sccw using pregrouped rvecs # Summation over words for each aid if ut.VERBOSE or verbose: print('[smk_index.sccw] SCCW Sum (over daid): ') # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] with ut.Timer('timer_orig2'): if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(aididf_list, aidrvecs_list) # TODO: implement database side soft-assign sccw_list = [smk_scoring.sccw_summation(rvecs_list, None, idf_list, None, smk_alpha, smk_thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)] daid2_sccw = dict(zip(aid_list, sccw_list)) if ut.VERBOSE or verbose: print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) mark1, end1_ = ut.log_progress( '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs), freq=100, with_time=WITH_TOTALTIME) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: end1_() mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME) progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: end2_() print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate, verbose=False): """ Computes residual vectors based on word assignments returns mapping from word index to a set of residual vectors Args: words (ndarray): wx2_idxs (dict): wx2_maws (dict): idx2_vec (dict): idx2_aid (dict): idx2_fx (dict): aggregate (bool): verbose (bool): Returns: tuple : (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws) formatted as:: * wx2_rvecs - [ ... [ rvec_i1, ..., rvec_Mi ]_i ... ] * wx2_aids - [ ... [ aid_i1, ..., aid_Mi ]_i ... ] * wx2_fxs - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ] For every word:: * list of aggvecs * For every aggvec: * one parent aid, if aggregate is False: assert isunique(aids) * list of parent fxs, if aggregate is True: assert len(fxs) == 1 Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1() >>> words = invindex.words >>> idx2_aid = invindex.idx2_daid >>> idx2_fx = invindex.idx2_dfx >>> idx2_vec = invindex.idx2_dvec >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags = compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate) """ if not ut.QUIET: print('[smk_index.rvec] +--- Start Compute Residuals') wx_sublist = np.array(wx2_idxs.keys()) # Build lists w.r.t. words idxs_list = [wx2_idxs[wx].astype(hstypes.INDEX_TYPE) for wx in wx_sublist] aids_list = [idx2_aid.take(idxs) for idxs in idxs_list] if ut.DEBUG2: #assert np.all(np.diff(wx_sublist) == 1), 'not dense' assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment' assert idx2_vec.shape[0] == idx2_fx.shape[0] assert idx2_vec.shape[0] == idx2_aid.shape[0] # Prealloc output if ut.VERBOSE or verbose: #print('[smk_index.rvec] Residual Vectors for %d words. aggregate=%r' % # (len(wx2_idxs), aggregate,)) lbl = '[smk_index.rvec] agg rvecs' if aggregate else '[smk_index.rvec] nonagg rvecs' mark, end_ = ut.log_progress(lbl, len(wx2_idxs), freq=50, with_time=True) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2_idxs(wx2_idxs, len(words)) # Compute Residuals rvecs_list, flags_list = smk_residuals.compute_nonagg_rvecs(words, idx2_vec, wx_sublist, idxs_list) if ut.VERBOSE: print('Computed size(rvecs_list) = %r' % ut.get_object_size_str(rvecs_list)) print('Computed size(flags_list) = %r' % ut.get_object_size_str(flags_list)) if aggregate: maws_list = [wx2_maws[wx] for wx in wx_sublist] # Aggregate Residuals tup = smk_residuals.compute_agg_rvecs(rvecs_list, idxs_list, aids_list, maws_list) (aggvecs_list, aggaids_list, aggidxs_list, aggmaws_list, aggflags_list) = tup # Pack into common query structure aggfxs_list = [[idx2_fx.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list] wx2_aggvecs = dict(zip(wx_sublist, aggvecs_list)) wx2_aggaids = dict(zip(wx_sublist, aggaids_list)) wx2_aggfxs = dict(zip(wx_sublist, aggfxs_list)) wx2_aggmaws = dict(zip(wx_sublist, aggmaws_list)) wx2_aggflags = dict(zip(wx_sublist, aggflags_list)) (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags) = ( wx2_aggvecs, wx2_aggaids, wx2_aggfxs, wx2_aggmaws, wx2_aggflags) else: # Hack non-aggregate residuals to have the same structure as aggregate # residuals for compatability: i.e. each rvec gets a list of fxs that # contributed to it, and for SMK this is a list of size 1 fxs_list = [[idx2_fx[idx:idx + 1] for idx in idxs] for idxs in idxs_list] wx2_rvecs = dict(zip(wx_sublist, rvecs_list)) wx2_aids = dict(zip(wx_sublist, aids_list)) wx2_fxs = dict(zip(wx_sublist, fxs_list)) wx2_flags = dict(zip(wx_sublist, flags_list)) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs) if ut.VERBOSE or verbose: end_() print('[smk_index.rvec] L___ End Compute Residuals') return wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate, verbose=False): """ Computes residual vectors based on word assignments returns mapping from word index to a set of residual vectors Args: words (ndarray): wx2_idxs (dict): wx2_maws (dict): idx2_vec (dict): idx2_aid (dict): idx2_fx (dict): aggregate (bool): verbose (bool): Returns: tuple : (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws) formatted as:: * wx2_rvecs - [ ... [ rvec_i1, ..., rvec_Mi ]_i ... ] * wx2_aids - [ ... [ aid_i1, ..., aid_Mi ]_i ... ] * wx2_fxs - [ ... [[fxs]_i1, ..., [fxs]_Mi ]_i ... ] For every word:: * list of aggvecs * For every aggvec: * one parent aid, if aggregate is False: assert isunique(aids) * list of parent fxs, if aggregate is True: assert len(fxs) == 1 Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1() >>> words = invindex.words >>> idx2_aid = invindex.idx2_daid >>> idx2_fx = invindex.idx2_dfx >>> idx2_vec = invindex.idx2_dvec >>> aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate >>> wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags = compute_residuals_(words, wx2_idxs, wx2_maws, idx2_vec, idx2_aid, idx2_fx, aggregate) """ if not ut.QUIET: print('[smk_index.rvec] +--- Start Compute Residuals') wx_sublist = np.array(wx2_idxs.keys()) # Build lists w.r.t. words idxs_list = [wx2_idxs[wx].astype(hstypes.INDEX_TYPE) for wx in wx_sublist] aids_list = [idx2_aid.take(idxs) for idxs in idxs_list] if ut.DEBUG2: #assert np.all(np.diff(wx_sublist) == 1), 'not dense' assert all([len(a) == len(b) for a, b in zip(idxs_list, aids_list)]), 'bad alignment' assert idx2_vec.shape[0] == idx2_fx.shape[0] assert idx2_vec.shape[0] == idx2_aid.shape[0] # Prealloc output if ut.VERBOSE or verbose: lbl = '[smk_index.rvec] agg rvecs' if aggregate else '[smk_index.rvec] nonagg rvecs' print(lbl) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2_idxs(wx2_idxs, len(words)) # Compute Residuals rvecs_list, flags_list = smk_residuals.compute_nonagg_rvecs(words, idx2_vec, wx_sublist, idxs_list) if ut.VERBOSE: print('Computed size(rvecs_list) = %r' % ut.get_object_size_str(rvecs_list)) print('Computed size(flags_list) = %r' % ut.get_object_size_str(flags_list)) if aggregate: maws_list = [wx2_maws[wx] for wx in wx_sublist] # Aggregate Residuals tup = smk_residuals.compute_agg_rvecs(rvecs_list, idxs_list, aids_list, maws_list) (aggvecs_list, aggaids_list, aggidxs_list, aggmaws_list, aggflags_list) = tup # Pack into common query structure aggfxs_list = [[idx2_fx.take(idxs) for idxs in aggidxs] for aggidxs in aggidxs_list] wx2_aggvecs = dict(zip(wx_sublist, aggvecs_list)) wx2_aggaids = dict(zip(wx_sublist, aggaids_list)) wx2_aggfxs = dict(zip(wx_sublist, aggfxs_list)) wx2_aggmaws = dict(zip(wx_sublist, aggmaws_list)) wx2_aggflags = dict(zip(wx_sublist, aggflags_list)) (wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags) = ( wx2_aggvecs, wx2_aggaids, wx2_aggfxs, wx2_aggmaws, wx2_aggflags) else: # Hack non-aggregate residuals to have the same structure as aggregate # residuals for compatability: i.e. each rvec gets a list of fxs that # contributed to it, and for SMK this is a list of size 1 fxs_list = [[idx2_fx[idx:idx + 1] for idx in idxs] for idxs in idxs_list] wx2_rvecs = dict(zip(wx_sublist, rvecs_list)) wx2_aids = dict(zip(wx_sublist, aids_list)) wx2_fxs = dict(zip(wx_sublist, fxs_list)) wx2_flags = dict(zip(wx_sublist, flags_list)) if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs) if ut.VERBOSE or verbose: print('[smk_index.rvec] L___ End Compute Residuals') return wx2_rvecs, wx2_aids, wx2_fxs, wx2_maws, wx2_flags