def group_correspondences(all_matches, all_scores, all_daids, daid2_sccw): daid_keys, groupxs = clustertool.group_indices(all_daids) fs_list = clustertool.apply_grouping(all_scores, groupxs) fm_list = clustertool.apply_grouping(all_matches, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = {daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list)} # FIXME: generalize to when nAssign > 1 daid2_fk = {daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list)} daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def group_correspondences(all_matches, all_scores, all_daids, daid2_sccw): daid_keys, groupxs = clustertool.group_indices(all_daids) fs_list = clustertool.apply_grouping(all_scores, groupxs) fm_list = clustertool.apply_grouping(all_matches, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = { daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list) } # FIXME: generalize to when nAssign > 1 daid2_fk = { daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list) } daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw): """ Builds explicit chipmatches that the rest of the pipeline plays nice with Notation: An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches, feature_scores, and feature_ranks. Let N be the number of matches A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first column corresponds to query_feature_indexes (qfx) and the second column corresponds to database_feature_indexes (dfx). A feature score, fs{shape=(N,), dtype=float64} is an array of scores A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks Returns: daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk) Return Format:: daid2_fm (dict): {daid: fm, ...} daid2_fs (dict): {daid: fs, ...} daid2_fk (dict): {daid: fk, ...} Example: >>> from ibeis.algo.hots.smk.smk_core import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2() >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh >>> withinfo = True # takes an 11s vs 2s >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh) >>> retL1 = match_kernel_L1(*args) >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,) = retL1 >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0])) >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2])) >>> print(utool.is_dicteq(daid2_chipmatch_old[1], daid2_chipmatch_new[1])) %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) """ # FIXME: move groupby to vtool if utool.VERBOSE: print('[smk_core] build cmtup_old') wx2_dfxs = invindex.wx2_fxs daid2_sccw = invindex.daid2_sccw qfxs_list = [wx2_qfxs[wx] for wx in common_wxs] dfxs_list = [wx2_dfxs[wx] for wx in common_wxs] shapes_list = [scores.shape for scores in scores_list] # 51us shape_ranges = [(mem_arange(w), mem_arange(h)) for (w, h) in shapes_list] # 230us ijs_list = [ mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges ] # 278us # Normalize scores for words, nMatches, and query sccw (still need daid sccw) nscores_iter = (scores * query_sccw for scores in scores_list) # FIXME: Preflatten all of these lists out_ijs = [list(zip(_is.flat, _js.flat)) for (_is, _js) in ijs_list] out_qfxs = [[qfxs[ix] for (ix, jx) in ijs] for (qfxs, ijs) in zip(qfxs_list, out_ijs)] out_dfxs = [[dfxs[jx] for (ix, jx) in ijs] for (dfxs, ijs) in zip(dfxs_list, out_ijs)] out_daids = ([daids[jx] for (ix, jx) in ijs] for (daids, ijs) in zip(daids_list, out_ijs)) out_scores = ([nscores[ijx] for ijx in ijs] for (nscores, ijs) in zip(nscores_iter, out_ijs)) nested_fm_iter = [[ tuple(product(qfxs_, dfxs_)) for qfxs_, dfxs_ in zip(qfxs, dfxs) ] for qfxs, dfxs in zip(out_qfxs, out_dfxs)] all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))), dtype=hstypes.FM_DTYPE) nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter] nested_daid_iter = ([ [daid] * nMatch for nMatch, daid in zip(nMatch_list, daids) ] for nMatch_list, daids in zip(nested_nmatch_list, out_daids)) nested_score_iter = ([ [score / nMatch] * nMatch for nMatch, score in zip(nMatch_list, scores) ] for nMatch_list, scores in zip(nested_nmatch_list, out_scores)) all_daids_ = np.array(list(utool.iflatten( utool.iflatten(nested_daid_iter))), dtype=hstypes.INDEX_TYPE) all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))), dtype=hstypes.FS_DTYPE) # Filter out 0 scores keep_xs = np.where(all_fss > 0)[0] all_fss = all_fss.take(keep_xs) all_fms = all_fms.take(keep_xs, axis=0) all_daids_ = all_daids_.take(keep_xs) daid_keys, groupxs = clustertool.group_indices(all_daids_) fs_list = clustertool.apply_grouping(all_fss, groupxs) fm_list = clustertool.apply_grouping(all_fms, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = { daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list) } # FIXME: generalize to when nAssign > 1 daid2_fk = { daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list) } daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw): """ Builds explicit chipmatches that the rest of the pipeline plays nice with Notation: An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches, feature_scores, and feature_ranks. Let N be the number of matches A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first column corresponds to query_feature_indexes (qfx) and the second column corresponds to database_feature_indexes (dfx). A feature score, fs{shape=(N,), dtype=float64} is an array of scores A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks Returns: daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk) Return Format:: daid2_fm (dict): {daid: fm, ...} daid2_fs (dict): {daid: fs, ...} daid2_fk (dict): {daid: fk, ...} Example: >>> from ibeis.algo.hots.smk.smk_core import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2() >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh >>> withinfo = True # takes an 11s vs 2s >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh) >>> retL1 = match_kernel_L1(*args) >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,) = retL1 >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0])) >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2])) >>> print(utool.is_dicteq(daid2_chipmatch_old[1], daid2_chipmatch_new[1])) %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) """ # FIXME: move groupby to vtool if utool.VERBOSE: print('[smk_core] build cmtup_old') wx2_dfxs = invindex.wx2_fxs daid2_sccw = invindex.daid2_sccw qfxs_list = [wx2_qfxs[wx] for wx in common_wxs] dfxs_list = [wx2_dfxs[wx] for wx in common_wxs] shapes_list = [scores.shape for scores in scores_list] # 51us shape_ranges = [(mem_arange(w), mem_arange(h)) for (w, h) in shapes_list] # 230us ijs_list = [mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges] # 278us # Normalize scores for words, nMatches, and query sccw (still need daid sccw) nscores_iter = (scores * query_sccw for scores in scores_list) # FIXME: Preflatten all of these lists out_ijs = [ list(zip(_is.flat, _js.flat)) for (_is, _js) in ijs_list ] out_qfxs = [ [qfxs[ix] for (ix, jx) in ijs] for (qfxs, ijs) in zip(qfxs_list, out_ijs) ] out_dfxs = [ [dfxs[jx] for (ix, jx) in ijs] for (dfxs, ijs) in zip(dfxs_list, out_ijs) ] out_daids = ( [daids[jx] for (ix, jx) in ijs] for (daids, ijs) in zip(daids_list, out_ijs) ) out_scores = ( [nscores[ijx] for ijx in ijs] for (nscores, ijs) in zip(nscores_iter, out_ijs) ) nested_fm_iter = [ [ tuple(product(qfxs_, dfxs_)) for qfxs_, dfxs_ in zip(qfxs, dfxs) ] for qfxs, dfxs in zip(out_qfxs, out_dfxs) ] all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))), dtype=hstypes.FM_DTYPE) nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter] nested_daid_iter = ( [ [daid] * nMatch for nMatch, daid in zip(nMatch_list, daids) ] for nMatch_list, daids in zip(nested_nmatch_list, out_daids) ) nested_score_iter = ( [ [score / nMatch] * nMatch for nMatch, score in zip(nMatch_list, scores) ] for nMatch_list, scores in zip(nested_nmatch_list, out_scores) ) all_daids_ = np.array(list(utool.iflatten(utool.iflatten(nested_daid_iter))), dtype=hstypes.INDEX_TYPE) all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))), dtype=hstypes.FS_DTYPE) # Filter out 0 scores keep_xs = np.where(all_fss > 0)[0] all_fss = all_fss.take(keep_xs) all_fms = all_fms.take(keep_xs, axis=0) all_daids_ = all_daids_.take(keep_xs) daid_keys, groupxs = clustertool.group_indices(all_daids_) fs_list = clustertool.apply_grouping(all_fss, groupxs) fm_list = clustertool.apply_grouping(all_fms, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = {daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list)} # FIXME: generalize to when nAssign > 1 daid2_fk = {daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list)} daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) mark1, end1_ = ut.log_progress( '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs), freq=100, with_time=WITH_TOTALTIME) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: end1_() mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME) progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: end2_() print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, alpha=3, thresh=0): """ Computes gamma normalization scalar for the database annotations Internals step4 >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2() >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> idx2_daid = invindex.idx2_daid >>> wx2_idf = wx2_idf >>> daids = invindex.daids >>> use_cache = USE_CACHE_GAMMA and False >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, daids, use_cache=use_cache) """ if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_rvecs, wx2_aids=wx2_aids) wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs)) if utool.VERBOSE: print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh)) mark1, end1_ = utool.log_progress( '[smk_index] Gamma group (by word): ', len(wx_sublist), flushfreq=100, writefreq=50, with_totaltime=True) # Get list of aids and rvecs w.r.t. words aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist) rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist) # Group by daids first and then by word index daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list)) for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1): group_aids, groupxs = clustertool.group_indicies(aids) rvecs_group = clustertool.apply_grouping(rvecs, groupxs) # 2.9 ms for aid, rvecs_ in zip(group_aids, rvecs_group): daid2_wx2_drvecs[aid][wx] = rvecs_ if utool.VERBOSE: end1_() # For every daid, compute its gamma using pregrouped rvecs # Summation over words for each aid if utool.VERBOSE: mark2, end2_ = utool.log_progress( '[smk_index] Gamma Sum (over daid): ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25, with_totaltime=True) # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] #gamma_list = [] if utool.DEBUG2: try: for count, (idf_list, rvecs_list) in enumerate(zip(aididf_list, aidrvecs_list)): assert len(idf_list) == len(rvecs_list), 'one list for each word' #gamma = smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) except Exception as ex: utool.printex(ex) utool.embed() raise gamma_list = [smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list)] if WITH_PANDAS: daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma') else: daid2_gamma = dict(zip(aid_list, gamma_list)) if utool.VERBOSE: end2_() return daid2_gamma
def compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, alpha=3, thresh=0): """ Computes gamma normalization scalar for the database annotations Internals step4 >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_rvecs, wx2_aids = smk_debug.testdata_raw_internals2() >>> alpha = ibs.cfg.query_cfg.smk_cfg.alpha >>> thresh = ibs.cfg.query_cfg.smk_cfg.thresh >>> idx2_daid = invindex.idx2_daid >>> wx2_idf = wx2_idf >>> daids = invindex.daids >>> use_cache = USE_CACHE_GAMMA and False >>> daid2_gamma = compute_data_gamma_(idx2_daid, wx2_rvecs, wx2_aids, wx2_idf, daids, use_cache=use_cache) """ if utool.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.rrr() smk_debug.check_wx2(wx2_rvecs=wx2_rvecs, wx2_aids=wx2_aids) wx_sublist = pdh.ensure_values(pdh.ensure_index(wx2_rvecs)) if utool.VERBOSE: print('[smk_index] Compute Gamma alpha=%r, thresh=%r: ' % (alpha, thresh)) mark1, end1_ = utool.log_progress( '[smk_index] Gamma group (by word): ', len(wx_sublist), flushfreq=100, writefreq=50, with_totaltime=True) # Get list of aids and rvecs w.r.t. words aids_list = pdh.ensure_values_subset(wx2_aids, wx_sublist) rvecs_list1 = pdh.ensure_values_subset(wx2_rvecs, wx_sublist) # Group by daids first and then by word index daid2_wx2_drvecs = utool.ddict(lambda: utool.ddict(list)) for wx, aids, rvecs in zip(wx_sublist, aids_list, rvecs_list1): group_aids, groupxs = clustertool.group_indicies(aids) rvecs_group = clustertool.apply_grouping(rvecs, groupxs) # 2.9 ms for aid, rvecs_ in zip(group_aids, rvecs_group): daid2_wx2_drvecs[aid][wx] = rvecs_ if utool.VERBOSE: end1_() # For every daid, compute its gamma using pregrouped rvecs # Summation over words for each aid if utool.VERBOSE: mark2, end2_ = utool.log_progress( '[smk_index] Gamma Sum (over daid): ', len(daid2_wx2_drvecs), flushfreq=100, writefreq=25, with_totaltime=True) # Get lists w.r.t daids aid_list = list(daid2_wx2_drvecs.keys()) # list of mappings from words to rvecs foreach daid # [wx2_aidrvecs_1, ..., wx2_aidrvecs_nDaids,] _wx2_aidrvecs_list = list(daid2_wx2_drvecs.values()) _aidwxs_iter = (list(wx2_aidrvecs.keys()) for wx2_aidrvecs in _wx2_aidrvecs_list) aidrvecs_list = [ list(wx2_aidrvecs.values()) for wx2_aidrvecs in _wx2_aidrvecs_list ] aididf_list = [[wx2_idf[wx] for wx in aidwxs] for aidwxs in _aidwxs_iter] #gamma_list = [] if utool.DEBUG2: try: for count, (idf_list, rvecs_list) in enumerate( zip(aididf_list, aidrvecs_list)): assert len(idf_list) == len( rvecs_list), 'one list for each word' #gamma = smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) except Exception as ex: utool.printex(ex) utool.embed() raise gamma_list = [ smk_core.gamma_summation2(rvecs_list, idf_list, alpha, thresh) for idf_list, rvecs_list in zip(aididf_list, aidrvecs_list) ] if WITH_PANDAS: daid2_gamma = pdh.IntSeries(gamma_list, index=aid_list, name='gamma') else: daid2_gamma = dict(zip(aid_list, gamma_list)) if utool.VERBOSE: end2_() return daid2_gamma