def get_patches(inva, wx, ibs, verbose=True): """ Loads the patches assigned to a particular word in this stack >>> inva.wx_to_aids = inva.compute_inverted_list() >>> verbose=True """ config = inva.config aid_list = inva.wx_to_aids[wx] X_list = [inva.get_annot(aid) for aid in aid_list] fxs_groups = [X.fxs(wx) for X in X_list] all_kpts_list = ibs.depc.d.get_feat_kpts(aid_list, config=config) sub_kpts_list = vt.ziptake(all_kpts_list, fxs_groups, axis=0) total_patches = sum(ut.lmap(len, fxs_groups)) chip_list = ibs.depc_annot.d.get_chips_img(aid_list, config=config) # convert to approprate colorspace #if colorspace is not None: # chip_list = vt.convert_image_list_colorspace(chip_list, colorspace) # ut.print_object_size(chip_list, 'chip_list') patch_size = 64 shape = (total_patches, patch_size, patch_size, 3) _prog = ut.ProgPartial(enabled=verbose, lbl='warping patches', bs=True) _patchiter = ut.iflatten([ vt.get_warped_patches(chip, kpts, patch_size=patch_size)[0] #vt.get_warped_patches(chip, kpts, patch_size=patch_size, use_cpp=True)[0] for chip, kpts in _prog(zip(chip_list, sub_kpts_list), length=len(aid_list)) ]) word_patches = vt.fromiter_nd(_patchiter, shape, dtype=np.uint8) return word_patches
def ensure_names_are_connected(graph, aids_list): aug_graph = graph.copy().to_undirected() orig_edges = aug_graph.edges() unflat_edges = [list(itertools.product(aids, aids)) for aids in aids_list] aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] new_edges = ut.setdiff_ordered(aid_pairs, aug_graph.edges()) preweighted_edges = nx.get_edge_attributes(aug_graph, 'weight') if preweighted_edges: orig_edges = ut.setdiff(orig_edges, list(preweighted_edges.keys())) aug_graph.add_edges_from(new_edges) # Ensure the largest possible set of original edges is in the MST nx.set_edge_attributes(aug_graph, name='weight', values=dict([(edge, 1.0) for edge in new_edges])) nx.set_edge_attributes(aug_graph, name='weight', values=dict([(edge, 0.1) for edge in orig_edges])) for cc_sub_graph in nx.connected_component_subgraphs(aug_graph): mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph) for edge in mst_sub_graph.edges(): redge = edge[::-1] if not (graph.has_edge(*edge) or graph.has_edge(*redge)): graph.add_edge(*redge, attr_dict={})
def get_name_rowid_edges_from_nids(ibs, nids): aids_list = ibs.get_name_aids(nids) import itertools unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) return aids1, aids2
def get_name_rowid_edges_from_aids2(ibs, aids_list): # grouped version import itertools unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) #if full: aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) return aids1, aids2
def compute_word_weights(inva, method='idf'): """ Compute a per-word weight like idf Example: >>> # xdoctest: +REQUIRES(--slow) >>> # ENABLE_DOCTEST >>> from wbia.algo.smk.inverted_index import * # NOQA >>> qreq_, inva = testdata_inva() >>> wx_to_weight = inva.compute_word_weights() >>> print('wx_to_weight = %r' % (wx_to_weight,)) """ wx_list = sorted(inva.wx_to_aids.keys()) with ut.Timer('Computing %s weights' % (method, )): if method == 'idf': ndocs_total = len(inva.aids) # Unweighted documents ndocs_per_word = np.array( [len(set(inva.wx_to_aids[wx])) for wx in wx_list]) weight_per_word = smk_funcs.inv_doc_freq( ndocs_total, ndocs_per_word) elif method == 'idf-maw': # idf denom (the num of docs containing a word for each word) # The max(maws) denote the prob that this word indexes an annot ndocs_total = len(inva.aids) # Weighted documents wx_to_ndocs = {wx: 0.0 for wx in wx_list} for wx, maws in zip(ut.iflatten(inva.wx_lists), ut.iflatten(inva.maws_lists)): wx_to_ndocs[wx] += min(1.0, max(maws)) ndocs_per_word = ut.take(wx_to_ndocs, wx_list) weight_per_word = smk_funcs.inv_doc_freq( ndocs_total, ndocs_per_word) elif method == 'uniform': weight_per_word = np.ones(len(wx_list)) wx_to_weight = dict(zip(wx_list, weight_per_word)) wx_to_weight = ut.DefaultValueDict(0, wx_to_weight) return wx_to_weight
def translate_all(): """ Translates a all python paths in directory """ dpaths = utool.ls_moduledirs('.') #print('[cyth] translate_all: %r' % (dpaths,)) globkw = {'recursive': True, 'with_dirs': False, 'with_files': True} # Find all unique python files in directory fpaths_iter = [ utool.glob(utool.unixpath(dpath), '*.py', **globkw) for dpath in dpaths ] fpath_iter = utool.iflatten(fpaths_iter) abspath_iter = map(utool.unixpath, fpath_iter) fpath_list = list(set(list(abspath_iter))) #print('[cyth] translate_all: %s' % ('\n'.join(fpath_list),)) # Try to translate each translate(*fpath_list)
def translate_all(): """ Translates a all python paths in directory """ dpaths = utool.ls_moduledirs('.') #print('[cyth] translate_all: %r' % (dpaths,)) globkw = { 'recursive': True, 'with_dirs': False, 'with_files': True } # Find all unique python files in directory fpaths_iter = [utool.glob(utool.unixpath(dpath), '*.py', **globkw) for dpath in dpaths] fpath_iter = utool.iflatten(fpaths_iter) abspath_iter = map(utool.unixpath, fpath_iter) fpath_list = list(set(list(abspath_iter))) #print('[cyth] translate_all: %s' % ('\n'.join(fpath_list),)) # Try to translate each translate(*fpath_list)
def ensure_names_are_connected(graph, aids_list): aug_graph = graph.copy().to_undirected() orig_edges = aug_graph.edges() unflat_edges = [list(itertools.product(aids, aids)) for aids in aids_list] aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] new_edges = ut.setdiff_ordered(aid_pairs, aug_graph.edges()) preweighted_edges = nx.get_edge_attributes(aug_graph, 'weight') if preweighted_edges: orig_edges = ut.setdiff(orig_edges, list(preweighted_edges.keys())) aug_graph.add_edges_from(new_edges) # Ensure the largest possible set of original edges is in the MST nx.set_edge_attributes(aug_graph, 'weight', dict([(edge, 1.0) for edge in new_edges])) nx.set_edge_attributes(aug_graph, 'weight', dict([(edge, 0.1) for edge in orig_edges])) for cc_sub_graph in nx.connected_component_subgraphs(aug_graph): mst_sub_graph = nx.minimum_spanning_tree(cc_sub_graph) for edge in mst_sub_graph.edges(): redge = edge[::-1] if not (graph.has_edge(*edge) or graph.has_edge(*redge)): graph.add_edge(*redge, attr_dict={})
def setup_pzmtest_subgraph(): import ibeis ibs = ibeis.opendb(db='PZ_MTEST') nids = ibs.get_valid_nids() aids_list = ibs.get_name_aids(nids) import itertools unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) rng = np.random.RandomState(0) flags = rng.rand(len(aids1)) > .878 aids1 = ut.compress(aids1, flags) aids2 = ut.compress(aids2, flags) for aid1, aid2 in zip(aids1, aids2): ibs.set_annot_pair_as_positive_match(aid1, aid2) ibs.set_annot_pair_as_positive_match(aid2, aid1) rowids = ibs._get_all_annotmatch_rowids() aids1 = ibs.get_annotmatch_aid1(rowids) aids2 = ibs.get_annotmatch_aid2(rowids)
def build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list): """ helper these list comprehensions replace the prevous for loop they still need to be optimized a little bit (and made clearer) can probably unnest the list comprehensions as well """ """ IGNORE Legacy:: def old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list): fm_nestlist_ = [] fs_nestlist_ = [] daid_nestlist_ = [] for scores, qfxs, dfxs, daids in zip(sparse_list, qfxs_list, dfxs_list, daids_list): for rx, cx, score in zip(scores.row, scores.col, scores.data): _fm = tuple(product(qfxs[rx], dfxs[cx])) _fs = [score / len(_fm)] * len(_fm) _daid = [daids[cx]] * len(_fm) fm_nestlist_.append(_fm) fs_nestlist_.append(_fs) daid_nestlist_.append(_daid) return fm_nestlist_, fs_nestlist_, daid_nestlist_ oldtup_ = old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) fm_nestlist_, fs_nestlist_, daid_nestlist_ = oldtup_ newtup_ = build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) fm_nestlist, fs_nestlist, daid_nestlist = newtup_ assert fm_nestlist == fm_nestlist_ assert fs_nestlist == fs_nestlist_ assert daid_nestlist == daid_nestlist_ 47ms %timeit build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) 59ms %timeit old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) IGNORE """ # FIXME: rewrite double comprehension as a flat comprehension # Build nested feature matches (a single match might have many members) fm_nestlist = [ tuple(product(qfxs[rx], dfxs[cx])) for scores, qfxs, dfxs in zip(sparse_list, qfxs_list, dfxs_list) for rx, cx in zip(scores.row, scores.col) ] nFm_list = [len(fm) for fm in fm_nestlist] #fs_unsplit = (score # for scores in sparse_list # for score in scores.data) #daid_unsplit = (daids[cx] # for scores, daids in zip(sparse_list, daids_list) # for cx in scores.col) # Build nested feature scores fs_unsplit = utool.iflatten((scores.data for scores in sparse_list)) # Build nested feature matches (a single match might have many members) daid_unsplit = utool.iflatten( (daids.take(scores.col) for scores, daids in zip(sparse_list, daids_list))) # Expand feature scores and daids splitting scores amongst match members fs_nestlist = [[score / nFm] * nFm for score, nFm in zip(fs_unsplit, nFm_list)] daid_nestlist = [[daid] * nFm for daid, nFm in zip(daid_unsplit, nFm_list)] if DEBUG_SMK: assert len(fm_nestlist) == len(fs_nestlist), 'inconsistent len' assert len(fm_nestlist) == len(nFm_list), 'inconsistent len' assert len(daid_nestlist) == len(fs_nestlist), 'inconsistent len' min_ = min(2, len(nFm_list)) max_ = min(15, len(nFm_list)) print('nFm_list[_min:_max] = ' + utool.list_str(nFm_list[min_:max_])) print('fm_nestlist[_min:_max] = ' + utool.list_str(fm_nestlist[min_:max_])) print('fs_nestlist[_min:_max] = ' + utool.list_str(fs_nestlist[min_:max_])) print('daid_nestlist[_min:_max] = ' + utool.list_str(daid_nestlist[min_:max_])) for fm_, fs_, daid_ in zip(fm_nestlist, fs_nestlist, daid_nestlist): assert len(fm_) == len(fs_), 'inconsistent len' assert len(fm_) == len(daid_), 'inconsistent len' print('[smk_core] checked build_chipmatch correspondence ...ok') return fm_nestlist, fs_nestlist, daid_nestlist
def invertible_stack(vecs_list, label_list): """ Stacks descriptors into a flat structure and returns inverse mapping from flat database descriptor indexes (dx) to annotation ids (label) and feature indexes (fx). Feature indexes are w.r.t. annotation indexes. Output: idx2_desc - flat descriptor stack idx2_label - inverted index into annotations idx2_fx - inverted index into features # Example with 2D Descriptors Example: >>> # DISABLE_DOCTEST >>> from vtool.nearest_neighbors import * # NOQA >>> DESC_TYPE = np.uint8 >>> label_list = [1, 2, 3, 4, 5] >>> vecs_list = [ ... np.array([[0, 0], [0, 1]], dtype=DESC_TYPE), ... np.array([[5, 3], [2, 30], [1, 1]], dtype=DESC_TYPE), ... np.empty((0, 2), dtype=DESC_TYPE), ... np.array([[5, 3], [2, 30], [1, 1]], dtype=DESC_TYPE), ... np.array([[3, 3], [42, 42], [2, 6]], dtype=DESC_TYPE), ... ] >>> idx2_vec, idx2_label, idx2_fx = invertible_stack(vecs_list, label_list) >>> print(repr(idx2_vec.T)) array([[ 0, 0, 5, 2, 1, 5, 2, 1, 3, 42, 2], [ 0, 1, 3, 30, 1, 3, 30, 1, 3, 42, 6]], dtype=uint8) >>> print(repr(idx2_label)) array([1, 1, 2, 2, 2, 4, 4, 4, 5, 5, 5]) >>> print(repr(idx2_fx)) array([0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 2]) """ # INFER DTYPE? dtype = vecs_list[0].dtype # Build inverted index of (label, fx) pairs nFeats = sum(list(map(len, vecs_list))) nFeat_iter = map(len, vecs_list) label_nFeat_iter = zip(label_list, map(len, vecs_list)) # generate featx inverted index for each feature in each annotation _ax2_fx = [list(range(nFeat)) for nFeat in nFeat_iter] # generate label inverted index for each feature in each annotation ''' # this is not a real test the code just happened to be here. syntax is good though #-ifdef CYTH_TEST_SWAP _ax2_label = [[label] * nFeat for (label, nFeat) in label_nFeat_iter] #-else ''' _ax2_label = [[label] * nFeat for (label, nFeat) in label_nFeat_iter] # endif is optional. the end of the functionscope counts as an #endif '#-endif' # Flatten generators into the inverted index _flatlabels = utool.iflatten(_ax2_label) _flatfeatxs = utool.iflatten(_ax2_fx) idx2_label = np.fromiter(_flatlabels, np.int32, nFeats) idx2_fx = np.fromiter(_flatfeatxs, np.int32, nFeats) # Stack vecsriptors into numpy array corresponding to inverted inexed # This might throw a MemoryError idx2_vec = np.vstack(vecs_list) '#pragma cyth_returntup' return idx2_vec, idx2_label, idx2_fx
def get_annotmatch_rowids_between(ibs, aids1, aids2, method=None): """ Example: >>> # ENABLE_DOCTEST >>> from wbia.annotmatch_funcs import * # NOQA >>> import wbia >>> ibs = wbia.opendb('PZ_MTEST') >>> aids1 = aids2 = [1, 2, 3, 4, 5, 6] >>> rowids_between = ibs.get_annotmatch_rowids_between >>> ams1 = sorted(rowids_between(aids1, aids2, method=1)) >>> ams2 = sorted(rowids_between(aids1, aids2, method=2)) >>> assert len(ub.find_duplicates(ams1)) == 0 >>> assert len(ub.find_duplicates(ams2)) == 0 >>> assert sorted(ams2) == sorted(ams1) """ if method is None: if len(aids1) * len(aids2) > 5000: method = 1 else: method = 2 if method == 1: # Strategy 1: get all existing rows and see what intersects # This is better when the enumerated set of rows would be larger than # the database size unflat_rowids1L = ibs.get_annotmatch_rowids_from_aid1(aids1) unflat_rowids1R = ibs.get_annotmatch_rowids_from_aid2(aids1) unflat_rowids2L = ibs.get_annotmatch_rowids_from_aid1(aids2) unflat_rowids2R = ibs.get_annotmatch_rowids_from_aid2(aids2) am_rowids1L = { r for r in ut.iflatten(unflat_rowids1L) if r is not None } am_rowids1R = { r for r in ut.iflatten(unflat_rowids1R) if r is not None } am_rowids2L = { r for r in ut.iflatten(unflat_rowids2L) if r is not None } am_rowids2R = { r for r in ut.iflatten(unflat_rowids2R) if r is not None } ams12 = am_rowids1L.intersection(am_rowids2R) ams21 = am_rowids2L.intersection(am_rowids1R) ams = sorted(ams12.union(ams21)) # ams = sorted(am_rowids1.intersection(am_rowids2)) # rowids2 = ibs.get_annotmatch_rowids_from_aid2(aid_list) # unflat_rowids1 = ibs.get_annotmatch_rowids_from_aid(aids1) # unflat_rowids2 = ibs.get_annotmatch_rowids_from_aid(aids2) # am_rowids1 = {r for r in ut.iflatten(unflat_rowids1) if r is not None} # am_rowids2 = {r for r in ut.iflatten(unflat_rowids2) if r is not None} # ams = sorted(am_rowids1.intersection(am_rowids2)) # ams = ut.isect(am_rowids1, am_rowids2) elif method == 2: # Strategy 2: enumerate what rows could exist and see what does exist # This is better when the enumerated set of rows would be smaller than # the database size edges = list(ut.product_nonsame(aids1, aids2)) if len(edges) == 0: ams = [] else: aids1_, aids2_ = ut.listT(edges) # ams = ibs.get_annotmatch_rowid_from_undirected_superkey(aids1_, aids2_) ams = ibs.get_annotmatch_rowid_from_superkey(aids1_, aids2_) if ams is None: ams = [] ams = ut.filter_Nones(ams) return ams
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw): """ Builds explicit chipmatches that the rest of the pipeline plays nice with Notation: An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches, feature_scores, and feature_ranks. Let N be the number of matches A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first column corresponds to query_feature_indexes (qfx) and the second column corresponds to database_feature_indexes (dfx). A feature score, fs{shape=(N,), dtype=float64} is an array of scores A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks Returns: daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk) Return Format:: daid2_fm (dict): {daid: fm, ...} daid2_fs (dict): {daid: fs, ...} daid2_fk (dict): {daid: fk, ...} Example: >>> from ibeis.algo.hots.smk.smk_core import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2() >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh >>> withinfo = True # takes an 11s vs 2s >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh) >>> retL1 = match_kernel_L1(*args) >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,) = retL1 >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0])) >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2])) >>> print(utool.is_dicteq(daid2_chipmatch_old[1], daid2_chipmatch_new[1])) %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) """ # FIXME: move groupby to vtool if utool.VERBOSE: print('[smk_core] build cmtup_old') wx2_dfxs = invindex.wx2_fxs daid2_sccw = invindex.daid2_sccw qfxs_list = [wx2_qfxs[wx] for wx in common_wxs] dfxs_list = [wx2_dfxs[wx] for wx in common_wxs] shapes_list = [scores.shape for scores in scores_list] # 51us shape_ranges = [(mem_arange(w), mem_arange(h)) for (w, h) in shapes_list] # 230us ijs_list = [mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges] # 278us # Normalize scores for words, nMatches, and query sccw (still need daid sccw) nscores_iter = (scores * query_sccw for scores in scores_list) # FIXME: Preflatten all of these lists out_ijs = [ list(zip(_is.flat, _js.flat)) for (_is, _js) in ijs_list ] out_qfxs = [ [qfxs[ix] for (ix, jx) in ijs] for (qfxs, ijs) in zip(qfxs_list, out_ijs) ] out_dfxs = [ [dfxs[jx] for (ix, jx) in ijs] for (dfxs, ijs) in zip(dfxs_list, out_ijs) ] out_daids = ( [daids[jx] for (ix, jx) in ijs] for (daids, ijs) in zip(daids_list, out_ijs) ) out_scores = ( [nscores[ijx] for ijx in ijs] for (nscores, ijs) in zip(nscores_iter, out_ijs) ) nested_fm_iter = [ [ tuple(product(qfxs_, dfxs_)) for qfxs_, dfxs_ in zip(qfxs, dfxs) ] for qfxs, dfxs in zip(out_qfxs, out_dfxs) ] all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))), dtype=hstypes.FM_DTYPE) nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter] nested_daid_iter = ( [ [daid] * nMatch for nMatch, daid in zip(nMatch_list, daids) ] for nMatch_list, daids in zip(nested_nmatch_list, out_daids) ) nested_score_iter = ( [ [score / nMatch] * nMatch for nMatch, score in zip(nMatch_list, scores) ] for nMatch_list, scores in zip(nested_nmatch_list, out_scores) ) all_daids_ = np.array(list(utool.iflatten(utool.iflatten(nested_daid_iter))), dtype=hstypes.INDEX_TYPE) all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))), dtype=hstypes.FS_DTYPE) # Filter out 0 scores keep_xs = np.where(all_fss > 0)[0] all_fss = all_fss.take(keep_xs) all_fms = all_fms.take(keep_xs, axis=0) all_daids_ = all_daids_.take(keep_xs) daid_keys, groupxs = clustertool.group_indices(all_daids_) fs_list = clustertool.apply_grouping(all_fss, groupxs) fm_list = clustertool.apply_grouping(all_fms, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = {daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list)} # FIXME: generalize to when nAssign > 1 daid2_fk = {daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list)} daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def stagger_group(list_): return ut.filter_Nones(ut.iflatten(zip_longest(*list_)))
aug_graph = graph.copy() # remove cut edges from augmented graph edge_to_iscut = nx.get_edge_attributes(aug_graph, 'is_cut') cut_edges = [ (u, v) for (u, v, d) in aug_graph.edges(data=True) if not (d.get('is_cut') or d.get('decision', 'unreviewed') in ['nomatch']) ] cut_edges = [edge for edge, flag in edge_to_iscut.items() if flag] aug_graph.remove_edges_from(cut_edges) # Enumerate cliques inside labels unflat_edges = [list(ut.itertwo(nodes)) for nodes in label_to_nodes.values()] node_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] # Remove candidate MST edges that exist in the original graph orig_edges = list(aug_graph.edges()) candidate_mst_edges = [edge for edge in node_pairs if not aug_graph.has_edge(*edge)] # randomness prevents chains and visually looks better rng = np.random.RandomState(42) def _randint(): return 0 return rng.randint(0, 100) aug_graph.add_edges_from(candidate_mst_edges) # Weight edges in aug_graph such that existing edges are chosen
def build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list): """ helper these list comprehensions replace the prevous for loop they still need to be optimized a little bit (and made clearer) can probably unnest the list comprehensions as well """ """ IGNORE Legacy:: def old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list): fm_nestlist_ = [] fs_nestlist_ = [] daid_nestlist_ = [] for scores, qfxs, dfxs, daids in zip(sparse_list, qfxs_list, dfxs_list, daids_list): for rx, cx, score in zip(scores.row, scores.col, scores.data): _fm = tuple(product(qfxs[rx], dfxs[cx])) _fs = [score / len(_fm)] * len(_fm) _daid = [daids[cx]] * len(_fm) fm_nestlist_.append(_fm) fs_nestlist_.append(_fs) daid_nestlist_.append(_daid) return fm_nestlist_, fs_nestlist_, daid_nestlist_ oldtup_ = old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) fm_nestlist_, fs_nestlist_, daid_nestlist_ = oldtup_ newtup_ = build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) fm_nestlist, fs_nestlist, daid_nestlist = newtup_ assert fm_nestlist == fm_nestlist_ assert fs_nestlist == fs_nestlist_ assert daid_nestlist == daid_nestlist_ 47ms %timeit build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) 59ms %timeit old_build_correspondences(sparse_list, qfxs_list, dfxs_list, daids_list) IGNORE """ # FIXME: rewrite double comprehension as a flat comprehension # Build nested feature matches (a single match might have many members) fm_nestlist = [ tuple(product(qfxs[rx], dfxs[cx])) for scores, qfxs, dfxs in zip(sparse_list, qfxs_list, dfxs_list) for rx, cx in zip(scores.row, scores.col) ] nFm_list = [len(fm) for fm in fm_nestlist] #fs_unsplit = (score # for scores in sparse_list # for score in scores.data) #daid_unsplit = (daids[cx] # for scores, daids in zip(sparse_list, daids_list) # for cx in scores.col) # Build nested feature scores fs_unsplit = utool.iflatten( (scores.data for scores in sparse_list)) # Build nested feature matches (a single match might have many members) daid_unsplit = utool.iflatten( (daids.take(scores.col) for scores, daids in zip(sparse_list, daids_list))) # Expand feature scores and daids splitting scores amongst match members fs_nestlist = [ [score / nFm] * nFm for score, nFm in zip(fs_unsplit, nFm_list) ] daid_nestlist = [ [daid] * nFm for daid, nFm in zip(daid_unsplit, nFm_list) ] if DEBUG_SMK: assert len(fm_nestlist) == len(fs_nestlist), 'inconsistent len' assert len(fm_nestlist) == len(nFm_list), 'inconsistent len' assert len(daid_nestlist) == len(fs_nestlist), 'inconsistent len' min_ = min(2, len(nFm_list)) max_ = min(15, len(nFm_list)) print('nFm_list[_min:_max] = ' + utool.list_str(nFm_list[min_:max_])) print('fm_nestlist[_min:_max] = ' + utool.list_str(fm_nestlist[min_:max_])) print('fs_nestlist[_min:_max] = ' + utool.list_str(fs_nestlist[min_:max_])) print('daid_nestlist[_min:_max] = ' + utool.list_str(daid_nestlist[min_:max_])) for fm_, fs_, daid_ in zip(fm_nestlist, fs_nestlist, daid_nestlist): assert len(fm_) == len(fs_), 'inconsistent len' assert len(fm_) == len(daid_), 'inconsistent len' print('[smk_core] checked build_chipmatch correspondence ...ok') return fm_nestlist, fs_nestlist, daid_nestlist
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) mark1, end1_ = ut.log_progress( '[smk_index.sccw] SCCW group (by present words): ', len(wx2_drvecs), freq=100, with_time=WITH_TOTALTIME) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: end1_() mark2, end2_ = ut.log_progress(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=100, with_time=WITH_TOTALTIME) progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: end2_() print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def make_netx_graph_from_aid_groups(ibs, aids_list, only_reviewed_matches=True, invis_edges=None, ensure_edges=None, temp_nids=None, allow_directed=False): r""" Args: ibs (ibeis.IBEISController): image analysis api aids_list (list): Example: >>> # DISABLE_DOCTEST >>> from ibeis.viz.viz_graph import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aids_list = [[1, 2, 3, 4], [5, 6, 7]] >>> invis_edges = [(1, 5)] >>> only_reviewed_matches = True >>> graph = make_netx_graph_from_aid_groups(ibs, aids_list, >>> only_reviewed_matches, >>> invis_edges) >>> list(nx.connected_components(graph.to_undirected())) """ #aids_list, nid_list = ibs.group_annots_by_name(aid_list) unique_aids = list(ut.flatten(aids_list)) # grouped version unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) if only_reviewed_matches: annotmatch_rowids = ibs.get_annotmatch_rowid_from_superkey(aids1, aids2) annotmatch_rowids = ut.filter_Nones(annotmatch_rowids) aids1 = ibs.get_annotmatch_aid1(annotmatch_rowids) aids2 = ibs.get_annotmatch_aid2(annotmatch_rowids) graph = make_netx_graph_from_aidpairs(ibs, aids1, aids2, unique_aids=unique_aids) if ensure_edges is not None: if ensure_edges == 'all': ensure_edges = list(ut.upper_diag_self_prodx(list(graph.nodes()))) ensure_edges_ = [] for edge in ensure_edges: edge = tuple(edge) redge = tuple(edge[::-1]) # HACK if graph.has_edge(*edge): ensure_edges_.append(edge) pass #nx.set_edge_attributes(graph, 'weight', {edge: .001}) elif (not allow_directed) and graph.has_edge(*redge): ensure_edges_.append(redge) #nx.set_edge_attributes(graph, 'weight', {redge: .001}) pass else: ensure_edges_.append(edge) #graph.add_edge(*edge, weight=.001) graph.add_edge(*edge) if temp_nids is None: unique_nids = ibs.get_annot_nids(list(graph.nodes())) else: # HACK unique_nids = [1] * len(list(graph.nodes())) #unique_nids = temp_nids nx.set_node_attributes(graph, 'nid', dict(zip(graph.nodes(), unique_nids))) import plottool as pt ensure_names_are_connected(graph, aids_list) # Color edges by nid color_by_nids(graph, unique_nids=unique_nids) if invis_edges: for edge in invis_edges: if graph.has_edge(*edge): nx.set_edge_attributes(graph, 'style', {edge: 'invis'}) nx.set_edge_attributes(graph, 'invisible', {edge: True}) else: graph.add_edge(*edge, style='invis', invisible=True) # Hack color images orange if ensure_edges: nx.set_edge_attributes(graph, 'color', {tuple(edge): pt.ORANGE for edge in ensure_edges_}) return graph
def compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=False): """ Computes sccw normalization scalar for the database annotations. This is gamma from the SMK paper. sccw is a self consistency critiron weight --- a scalar which ensures the score of K(X, X) = 1 Args: idx2_daid (): wx2_drvecs (): wx2_aids (): wx2_idf (): wx2_dmaws (): smk_alpha (): smk_thresh (): Returns: daid2_sccw Example: >>> # SLOW_DOCTEST >>> from ibeis.algo.hots.smk.smk_index import * # NOQA >>> from ibeis.algo.hots.smk import smk_index >>> from ibeis.algo.hots.smk import smk_debug >>> #tup = smk_debug.testdata_compute_data_sccw(db='testdb1') >>> tup = smk_debug.testdata_compute_data_sccw(db='PZ_MTEST') >>> ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams = tup >>> wx2_dflags = invindex.wx2_dflags >>> ws2_idxs = invindex.wx2_idxs >>> wx2_dmaws = invindex.wx2_dmaws >>> idx2_daid = invindex.idx2_daid >>> daids = invindex.daids >>> smk_alpha = qparams.smk_alpha >>> smk_thresh = qparams.smk_thresh >>> wx2_idf = wx2_idf >>> verbose = True >>> invindex.invindex_dbgstr() >>> invindex.report_memory() >>> invindex.report_memsize() >>> daid2_sccw = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose) """ #for wx in wx_sublist: # print(len(wx2_dmaws verbose_ = ut.VERBOSE or verbose if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_wx2(wx2_rvecs=wx2_drvecs, wx2_aids=wx2_aids) if not ut.QUIET: print('\n[smk_index.sccw] +--- Start Compute Data Self Consistency Weight') if verbose_: print('[smk_index.sccw] Compute SCCW smk_alpha=%r, smk_thresh=%r: ' % (smk_alpha, smk_thresh)) # Group by daids first and then by word index # Get list of aids and rvecs w.r.t. words (ie one item per word) wx_sublist = np.array(list(wx2_drvecs.keys())) aids_perword = [wx2_aids[wx] for wx in wx_sublist] # wx_list1: Lays out word indexes for each annotation # tx_list1: Temporary within annotation subindex + wx uniquely identifies # item in wx2_drvecs, wx2_dflags, and wx2_dmaws # Flatten out indexes to perform grouping flat_aids = np.hstack(aids_perword) count = len(flat_aids) txs_perword = [np.arange(aids.size) for aids in aids_perword] flat_txs = np.hstack(txs_perword) # fromiter is faster for flat_wxs because is not a list of numpy arrays wxs_perword = ([wx] * len(aids) for wx, aids in zip(wx_sublist, aids_perword)) flat_wxs = np.fromiter(ut.iflatten(wxs_perword), hstypes.INDEX_TYPE, count) # Group flat indexes by annotation id unique_aids, annot_groupxs = clustertool.group_indices(flat_aids) # Wxs and Txs grouped by annotation id wxs_perannot = clustertool.apply_grouping_iter(flat_wxs, annot_groupxs) txs_perannot = clustertool.apply_grouping_iter(flat_txs, annot_groupxs) # Group by word inside each annotation group wxsubgrouping_perannot = [clustertool.group_indices(wxs) for wxs in wxs_perannot] word_groupxs_perannot = (groupxs for wxs, groupxs in wxsubgrouping_perannot) txs_perword_perannot = [clustertool.apply_grouping(txs, groupxs) for txs, groupxs in zip(txs_perannot, word_groupxs_perannot)] wxs_perword_perannot = [wxs for wxs, groupxs in wxsubgrouping_perannot] # Group relavent data for sccw measure by word for each annotation grouping def _vector_subgroup_by_wx(wx2_arr, wxs_perword_perannot, txs_perword_perannot): return [[wx2_arr[wx].take(txs, axis=0) for wx, txs in zip(wx_perword_, txs_perword_)] for wx_perword_, txs_perword_ in zip(wxs_perword_perannot, txs_perword_perannot)] def _scalar_subgroup_by_wx(wx2_scalar, wxs_perword_perannot): return [[wx2_scalar[wx] for wx in wxs] for wxs in wxs_perword_perannot] subgrouped_drvecs = _vector_subgroup_by_wx(wx2_drvecs, wxs_perword_perannot, txs_perword_perannot) subgrouped_dmaws = _vector_subgroup_by_wx(wx2_dmaws, wxs_perword_perannot, txs_perword_perannot) # If we aren't using dmaws replace it with an infinite None iterator #subgrouped_dmaws = iter(lambda: None, 1) subgrouped_dflags = _vector_subgroup_by_wx(wx2_dflags, wxs_perword_perannot, txs_perword_perannot) #subgrouped_dflags = iter(lambda: None, 1) subgrouped_idfs = _scalar_subgroup_by_wx(wx2_idf, wxs_perword_perannot) if verbose_: progiter = ut.ProgressIter(lbl='[smk_index.sccw] SCCW Sum (over daid): ', total=len(unique_aids), freq=10, with_time=WITH_TOTALTIME) else: progiter = ut.identity if ut.DEBUG2: from ibeis.algo.hots.smk import smk_debug smk_debug.check_data_smksumm(subgrouped_idfs, subgrouped_drvecs) sccw_list = [ smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) for rvecs_list, flags_list, maws_list, idf_list in progiter(zip(subgrouped_drvecs, subgrouped_dflags, subgrouped_dmaws, subgrouped_idfs)) ] daid2_sccw = dict(zip(unique_aids, sccw_list)) if verbose_: print('[smk_index.sccw] L___ End Compute Data SCCW\n') return daid2_sccw
def sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh): r""" Computes gamma from "To Aggregate or not to aggregate". Every component in each list is with repsect to a different word. scc = self consistency criterion It is a scalar which ensure K(X, X) = 1 Args: rvecs_list (list of ndarrays): residual vectors for every word idf_list (list of floats): idf weight for each word maws_list (list of ndarrays): multi-assign weights for each word for each residual vector smk_alpha (float): selectivity power smk_thresh (float): selectivity threshold Returns: float: sccw self-consistency-criterion weight Math: \begin{equation} \gamma(X) = (\sum_{c \in \C} w_c M(X_c, X_c))^{-.5} \end{equation} Example: >>> from ibeis.algo.hots.smk.smk_scoring import * # NOQA >>> from ibeis.algo.hots.smk import smk_scoring >>> from ibeis.algo.hots.smk import smk_debug >>> #idf_list, rvecs_list, maws_list, smk_alpha, smk_thresh, wx2_flags = smk_debug.testdata_sccw_sum(db='testdb1') >>> tup = smk_debug.testdata_sccw_sum(db='PZ_MTEST', nWords=128000) >>> idf_list, rvecs_list, flags_list, maws_list, smk_alpha, smk_thresh = tup >>> sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) >>> print(sccw) 0.0201041835751 CommandLine: python smk_match.py --db PZ_MOTHERS --nWords 128 Ignore: 0.0384477314197 qmaws_list = dmaws_list = maws_list drvecs_list = qrvecs_list = rvecs_list dflags_list = qflags_list = flags_list flags_list = flags_list[7:10] maws_list = maws_list[7:10] idf_list = idf_list[7:10] rvecs_list = rvecs_list[7:10] """ num_rvecs = len(rvecs_list) if DEBUG_SMK: assert maws_list is None or len(maws_list) == num_rvecs, 'inconsistent lengths' assert num_rvecs == len(idf_list), 'inconsistent lengths' assert maws_list is None or list(map(len, maws_list)) == list(map(len, rvecs_list)), 'inconsistent per word lengths' assert flags_list is None or list(map(len, maws_list)) == list(map(len, flags_list)), 'inconsistent per word lengths' assert flags_list is None or len(flags_list) == num_rvecs, 'inconsistent lengths' # Indexing with asymetric multi-assignment might get you a non 1 self score? # List of scores for every word. scores_list = score_matches(rvecs_list, rvecs_list, flags_list, flags_list, maws_list, maws_list, smk_alpha, smk_thresh, idf_list) if DEBUG_SMK: assert len(scores_list) == num_rvecs, 'bad rvec and score' assert len(idf_list) == len(scores_list), 'bad weight and score' # Summation over all residual vector scores _count = sum((scores.size for scores in scores_list)) _iter = utool.iflatten(scores.ravel() for scores in scores_list) self_rawscore = np.fromiter(_iter, np.float64, _count).sum() # Square root inverse to enforce normalized self-score is 1.0 sccw = np.reciprocal(np.sqrt(self_rawscore)) try: assert not np.isinf(sccw), 'sccw cannot be infinite' assert not np.isnan(sccw), 'sccw cannot be nan' except AssertionError as ex: utool.printex(ex, 'problem computing self consistency criterion weight', keys=['num_rvecs'], iswarning=True) if num_rvecs > 0: raise else: sccw = 1 return sccw
def build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw): """ Builds explicit chipmatches that the rest of the pipeline plays nice with Notation: An explicit cmtup_old is a tuple (fm, fs, fk) feature_matches, feature_scores, and feature_ranks. Let N be the number of matches A feature match, fm{shape=(N, 2), dtype=int32}, is an array where the first column corresponds to query_feature_indexes (qfx) and the second column corresponds to database_feature_indexes (dfx). A feature score, fs{shape=(N,), dtype=float64} is an array of scores A feature rank, fk{shape=(N,), dtype=int16} is an array of ranks Returns: daid2_chipmatch (dict) : (daid2_fm, daid2_fs, daid2_fk) Return Format:: daid2_fm (dict): {daid: fm, ...} daid2_fs (dict): {daid: fs, ...} daid2_fk (dict): {daid: fk, ...} Example: >>> from ibeis.algo.hots.smk.smk_core import * # NOQA >>> from ibeis.algo.hots.smk import smk_debug >>> ibs, invindex, qindex, qparams = smk_debug.testdata_match_kernel_L2() >>> wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw = qindex >>> smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha >>> smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh >>> withinfo = True # takes an 11s vs 2s >>> args = (wx2_qrvecs, wx2_qmaws, wx2_qaids, wx2_qfxs, query_sccw, invindex, withinfo, smk_alpha, smk_thresh) >>> retL1 = match_kernel_L1(*args) >>> (daid2_totalscore, common_wxs, scores_list, daids_list, idf_list, daid_agg_keys,) = retL1 >>> daid2_chipmatch_old = build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> daid2_chipmatch_new = build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) >>> print(utool.is_dicteq(daid2_chipmatch_old[0], daid2_chipmatch_new[0])) >>> print(utool.is_dicteq(daid2_chipmatch_old[2], daid2_chipmatch_new[2])) >>> print(utool.is_dicteq(daid2_chipmatch_old[1], daid2_chipmatch_new[1])) %timeit build_daid2_chipmatch2(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) %timeit build_daid2_chipmatch3(invindex, common_wxs, wx2_qaids, wx2_qfxs, scores_list, daids_list, query_sccw) """ # FIXME: move groupby to vtool if utool.VERBOSE: print('[smk_core] build cmtup_old') wx2_dfxs = invindex.wx2_fxs daid2_sccw = invindex.daid2_sccw qfxs_list = [wx2_qfxs[wx] for wx in common_wxs] dfxs_list = [wx2_dfxs[wx] for wx in common_wxs] shapes_list = [scores.shape for scores in scores_list] # 51us shape_ranges = [(mem_arange(w), mem_arange(h)) for (w, h) in shapes_list] # 230us ijs_list = [ mem_meshgrid(wrange, hrange) for (wrange, hrange) in shape_ranges ] # 278us # Normalize scores for words, nMatches, and query sccw (still need daid sccw) nscores_iter = (scores * query_sccw for scores in scores_list) # FIXME: Preflatten all of these lists out_ijs = [list(zip(_is.flat, _js.flat)) for (_is, _js) in ijs_list] out_qfxs = [[qfxs[ix] for (ix, jx) in ijs] for (qfxs, ijs) in zip(qfxs_list, out_ijs)] out_dfxs = [[dfxs[jx] for (ix, jx) in ijs] for (dfxs, ijs) in zip(dfxs_list, out_ijs)] out_daids = ([daids[jx] for (ix, jx) in ijs] for (daids, ijs) in zip(daids_list, out_ijs)) out_scores = ([nscores[ijx] for ijx in ijs] for (nscores, ijs) in zip(nscores_iter, out_ijs)) nested_fm_iter = [[ tuple(product(qfxs_, dfxs_)) for qfxs_, dfxs_ in zip(qfxs, dfxs) ] for qfxs, dfxs in zip(out_qfxs, out_dfxs)] all_fms = np.array(list(utool.iflatten(utool.iflatten(nested_fm_iter))), dtype=hstypes.FM_DTYPE) nested_nmatch_list = [[len(fm) for fm in fms] for fms in nested_fm_iter] nested_daid_iter = ([ [daid] * nMatch for nMatch, daid in zip(nMatch_list, daids) ] for nMatch_list, daids in zip(nested_nmatch_list, out_daids)) nested_score_iter = ([ [score / nMatch] * nMatch for nMatch, score in zip(nMatch_list, scores) ] for nMatch_list, scores in zip(nested_nmatch_list, out_scores)) all_daids_ = np.array(list(utool.iflatten( utool.iflatten(nested_daid_iter))), dtype=hstypes.INDEX_TYPE) all_fss = np.array(list(utool.iflatten(utool.iflatten(nested_score_iter))), dtype=hstypes.FS_DTYPE) # Filter out 0 scores keep_xs = np.where(all_fss > 0)[0] all_fss = all_fss.take(keep_xs) all_fms = all_fms.take(keep_xs, axis=0) all_daids_ = all_daids_.take(keep_xs) daid_keys, groupxs = clustertool.group_indices(all_daids_) fs_list = clustertool.apply_grouping(all_fss, groupxs) fm_list = clustertool.apply_grouping(all_fms, groupxs) daid2_fm = {daid: fm for daid, fm in zip(daid_keys, fm_list)} daid2_fs = { daid: fs * daid2_sccw[daid] for daid, fs in zip(daid_keys, fs_list) } # FIXME: generalize to when nAssign > 1 daid2_fk = { daid: np.ones(fs.size, dtype=hstypes.FK_DTYPE) for daid, fs in zip(daid_keys, fs_list) } daid2_chipmatch = (daid2_fm, daid2_fs, daid2_fk) return daid2_chipmatch
def get_annotmatch_subgraph(ibs): r""" http://bokeh.pydata.org/en/latest/ https://github.com/jsexauer/networkx_viewer TODO: Need a special visualization In the web I need: * graph of annotations matches. * can move them around. * edit lines between them. * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool This should share functionality with a name view. Args: ibs (IBEISController): ibeis controller object CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show # Networkx example python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show Ignore: from ibeis import viz Example: >>> # ENABLE_DOCTEST >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> result = get_annotmatch_subgraph(ibs) >>> ut.show_if_requested() """ #import ibeis #ibs = ibeis.opendb(db='PZ_MTEST') #rowids = ibs._get_all_annotmatch_rowids() #aids1 = ibs.get_annotmatch_aid1(rowids) #aids2 = ibs.get_annotmatch_aid2(rowids) # # nids = ibs.get_valid_nids() nids = nids[0:5] aids_list = ibs.get_name_aids(nids) import itertools unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) # Enumerate annotmatch properties rng = np.random.RandomState(0) edge_props = { 'weight': rng.rand(len(aids1)), 'reviewer_confidence': rng.rand(len(aids1)), 'algo_confidence': rng.rand(len(aids1)), } # Remove data that does not need to be visualized # (dont show all the aids if you dont have to) thresh = .5 flags = edge_props['weight'] > thresh aids1 = ut.compress(aids1, flags) aids2 = ut.compress(aids2, flags) edge_props = {key: ut.compress(val, flags) for key, val in edge_props.items()} edge_keys = list(edge_props.keys()) edge_vals = ut.dict_take(edge_props, edge_keys) unique_aids = list(set(aids1 + aids2)) # Make a graph between the chips nodes = list(zip(unique_aids)) edges = list(zip(aids1, aids2, *edge_vals)) node_lbls = [('aid', 'int')] edge_lbls = [('weight', 'float')] from ibeis.viz import viz_graph netx_graph = viz_graph.make_netx_graph(nodes, edges, node_lbls, edge_lbls) fnum = None #zoom = kwargs.get('zoom', .4) zoom = .4 viz_graph.viz_netx_chipgraph(ibs, netx_graph, fnum=fnum, with_images=True, zoom=zoom)
def get_annotmatch_subgraph(ibs): r""" http://bokeh.pydata.org/en/latest/ https://github.com/jsexauer/networkx_viewer TODO: Need a special visualization In the web I need: * graph of annotations matches. * can move them around. * edit lines between them. * http://stackoverflow.com/questions/15373530/web-graph-visualization-tool This should share functionality with a name view. Args: ibs (IBEISController): ibeis controller object CommandLine: python -m ibeis.annotmatch_funcs --exec-get_annotmatch_subgraph --show # Networkx example python -m ibeis.viz.viz_graph --test-show_chipmatch_graph:0 --show Ignore: from ibeis import viz Example: >>> # ENABLE_DOCTEST >>> from ibeis.annotmatch_funcs import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> result = get_annotmatch_subgraph(ibs) >>> ut.show_if_requested() """ #import ibeis #ibs = ibeis.opendb(db='PZ_MTEST') #rowids = ibs._get_all_annotmatch_rowids() #aids1 = ibs.get_annotmatch_aid1(rowids) #aids2 = ibs.get_annotmatch_aid2(rowids) # # nids = ibs.get_valid_nids() nids = nids[0:5] aids_list = ibs.get_name_aids(nids) import itertools unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) # Enumerate annotmatch properties rng = np.random.RandomState(0) edge_props = { 'weight': rng.rand(len(aids1)), 'reviewer_confidence': rng.rand(len(aids1)), 'algo_confidence': rng.rand(len(aids1)), } # Remove data that does not need to be visualized # (dont show all the aids if you dont have to) thresh = .5 flags = edge_props['weight'] > thresh aids1_ = ut.compress(aids1, flags) aids2_ = ut.compress(aids2, flags) chosen_props = ut.dict_subset(edge_props, ['weight']) edge_props = ut.map_dict_vals(ut.partial(ut.compress, flag_list=flags), chosen_props) edge_keys = list(edge_props.keys()) edge_vals = ut.dict_take(edge_props, edge_keys) edge_attr_list = [dict(zip(edge_keys, vals_)) for vals_ in zip(*edge_vals)] unique_aids = list(set(aids1_ + aids2_)) # Make a graph between the chips nodes = unique_aids edges = list(zip(aids1_, aids2_, edge_attr_list)) import networkx as nx graph = nx.DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) from ibeis.viz import viz_graph fnum = None #zoom = kwargs.get('zoom', .4) viz_graph.viz_netx_chipgraph(ibs, graph, fnum=fnum, with_images=True, augment_graph=False)
def invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=ut.NOT_QUIET): r""" Aggregates descriptors of input annotations and returns inverted information Args: vecs_list (list): fgws_list (list): ax_list (list): fxs_list (list): verbose (bool): verbosity flag(default = True) Returns: tuple: (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) CommandLine: python -m ibeis.algo.hots.neighbor_index invert_index Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> rng = np.random.RandomState(42) >>> DIM_SIZE = 16 >>> nFeat_list = [3, 0, 4, 1] >>> vecs_list = [rng.randn(nFeat, DIM_SIZE) for nFeat in nFeat_list] >>> fgws_list = [rng.randn(nFeat) for nFeat in nFeat_list] >>> fxs_list = [np.arange(nFeat) for nFeat in nFeat_list] >>> ax_list = np.arange(len(vecs_list)) >>> fgws_list = None >>> verbose = True >>> tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list) >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),) >>> print(result) output depth_profile = [(8, 16), 1, 8, 8] Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> import ibeis >>> qreq_ = ibeis.testdata_qreq_(defaultdb='testdb1', a='default:species=zebra_plains', p='default:fgw_thresh=.999') >>> vecs_list, fgws_list, fxs_list = get_support_data(qreq_, qreq_.daids) >>> ax_list = np.arange(len(vecs_list)) >>> input_ = vecs_list, fgws_list, ax_list, fxs_list >>> print('input depth_profile = %s' % (ut.depth_profile(input_),)) >>> tup = invert_index(*input_) >>> (idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = tup >>> result = 'output depth_profile = %s' % (ut.depth_profile(tup),) >>> print(result) output depth_profile = [(1912, 128), 1912, 1912, 1912] """ if ut.VERYVERBOSE: print('[nnindex] stacking descriptors from %d annotations' % len(ax_list)) try: nFeat_list = np.array(list(map(len, vecs_list))) # Remove input without any features is_valid = nFeat_list > 0 nFeat_list = nFeat_list.compress(is_valid) vecs_list = ut.compress(vecs_list, is_valid) if fgws_list is not None: fgws_list = ut.compress(fgws_list, is_valid) ax_list = ut.compress(ax_list, is_valid) fxs_list = ut.compress(fxs_list, is_valid) # Flatten into inverted index axs_list = [[ax] * nFeat for (ax, nFeat) in zip(ax_list, nFeat_list)] nFeats = sum(nFeat_list) idx2_ax = np.fromiter(ut.iflatten(axs_list), np.int32, nFeats) idx2_fx = np.fromiter(ut.iflatten(fxs_list), np.int32, nFeats) idx2_vec = np.vstack(vecs_list) if fgws_list is None: idx2_fgw = None else: idx2_fgw = np.hstack(fgws_list) try: assert len(idx2_fgw) == len(idx2_vec), 'error. weights and vecs do not correspond' except Exception as ex: ut.printex(ex, keys=[(len, 'idx2_fgw'), (len, 'idx2_vec')]) raise assert idx2_vec.shape[0] == idx2_ax.shape[0] assert idx2_vec.shape[0] == idx2_fx.shape[0] except MemoryError as ex: ut.printex(ex, 'cannot build inverted index', '[!memerror]') raise if ut.VERYVERBOSE or verbose: print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format( nVecs=len(idx2_vec), nAnnots=len(ax_list))) print('[nnindex] idx2_vecs dtype={}, memory={}'.format( idx2_vec.dtype, ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize))) return idx2_vec, idx2_fgw, idx2_ax, idx2_fx
def make_netx_graph_from_aid_groups(ibs, aids_list, only_reviewed_matches=True, invis_edges=None, ensure_edges=None, temp_nids=None, allow_directed=False): r""" Args: ibs (ibeis.IBEISController): image analysis api aids_list (list): Example: >>> # DISABLE_DOCTEST >>> from ibeis.viz.viz_graph import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aids_list = [[1, 2, 3, 4], [5, 6, 7]] >>> invis_edges = [(1, 5)] >>> only_reviewed_matches = True >>> graph = make_netx_graph_from_aid_groups(ibs, aids_list, >>> only_reviewed_matches, >>> invis_edges) >>> list(nx.connected_components(graph.to_undirected())) """ #aids_list, nid_list = ibs.group_annots_by_name(aid_list) unique_aids = list(ut.flatten(aids_list)) # grouped version unflat_edges = (list(itertools.product(aids, aids)) for aids in aids_list) aid_pairs = [tup for tup in ut.iflatten(unflat_edges) if tup[0] != tup[1]] aids1 = ut.get_list_column(aid_pairs, 0) aids2 = ut.get_list_column(aid_pairs, 1) if only_reviewed_matches: annotmatch_rowids = ibs.get_annotmatch_rowid_from_superkey( aids1, aids2) annotmatch_rowids = ut.filter_Nones(annotmatch_rowids) aids1 = ibs.get_annotmatch_aid1(annotmatch_rowids) aids2 = ibs.get_annotmatch_aid2(annotmatch_rowids) graph = make_netx_graph_from_aidpairs(ibs, aids1, aids2, unique_aids=unique_aids) if ensure_edges is not None: if ensure_edges == 'all': ensure_edges = list(ut.upper_diag_self_prodx(list(graph.nodes()))) ensure_edges_ = [] for edge in ensure_edges: edge = tuple(edge) redge = tuple(edge[::-1]) # HACK if graph.has_edge(*edge): ensure_edges_.append(edge) pass #nx.set_edge_attributes(graph, 'weight', {edge: .001}) elif (not allow_directed) and graph.has_edge(*redge): ensure_edges_.append(redge) #nx.set_edge_attributes(graph, 'weight', {redge: .001}) pass else: ensure_edges_.append(edge) #graph.add_edge(*edge, weight=.001) graph.add_edge(*edge) if temp_nids is None: unique_nids = ibs.get_annot_nids(list(graph.nodes())) else: # HACK unique_nids = [1] * len(list(graph.nodes())) #unique_nids = temp_nids nx.set_node_attributes(graph, 'nid', dict(zip(graph.nodes(), unique_nids))) import plottool as pt ensure_names_are_connected(graph, aids_list) # Color edges by nid color_by_nids(graph, unique_nids=unique_nids) if invis_edges: for edge in invis_edges: if graph.has_edge(*edge): nx.set_edge_attributes(graph, 'style', {edge: 'invis'}) nx.set_edge_attributes(graph, 'invisible', {edge: True}) else: graph.add_edge(*edge, style='invis', invisible=True) # Hack color images orange if ensure_edges: nx.set_edge_attributes( graph, 'color', {tuple(edge): pt.ORANGE for edge in ensure_edges_}) return graph
def match_single(smk, qaid, daids, qreq_, verbose=True): """ CommandLine: python -m wbia.algo.smk.smk_pipeline SMK.match_single --profile python -m wbia.algo.smk.smk_pipeline SMK.match_single --show python -m wbia SMK.match_single -a ctrl:qmingt=2 --profile --db PZ_Master1 python -m wbia SMK.match_single -a ctrl --profile --db GZ_ALL Example: >>> # FUTURE_ENABLE >>> from wbia.algo.smk.smk_pipeline import * # NOQA >>> import wbia >>> qreq_ = wbia.testdata_qreq_(defaultdb='PZ_MTEST') >>> ibs = qreq_.ibs >>> daids = qreq_.daids >>> #ibs, daids = wbia.testdata_aids(defaultdb='PZ_MTEST', default_set='dcfg') >>> qreq_ = SMKRequest(ibs, daids[0:1], daids, {'agg': True, >>> 'num_words': 1000, >>> 'sv_on': True}) >>> qreq_.ensure_data() >>> qaid = qreq_.qaids[0] >>> daids = qreq_.daids >>> daid = daids[1] >>> verbose = True >>> cm = qreq_.smk.match_single(qaid, daids, qreq_) >>> ut.quit_if_noshow() >>> ut.qtensure() >>> cm.ishow_analysis(qreq_) >>> ut.show_if_requested() """ from wbia.algo.hots import chip_match from wbia.algo.hots import pipeline alpha = qreq_.qparams['smk_alpha'] thresh = qreq_.qparams['smk_thresh'] agg = qreq_.qparams['agg'] # nAnnotPerName = qreq_.qparams.nAnnotPerNameSVER sv_on = qreq_.qparams.sv_on if sv_on: nNameShortList = qreq_.qparams.nNameShortlistSVER shortsize = nNameShortList else: shortsize = None X = qreq_.qinva.get_annot(qaid) # Determine which database annotations need to be checked # with ut.Timer('searching qaid=%r' % (qaid,), verbose=verbose): hit_inva_wxs = ut.take(qreq_.dinva.wx_to_aids, X.wx_list) hit_daids = np.array(list(set(ut.iflatten(hit_inva_wxs)))) # Mark impossible daids # with ut.Timer('checking impossible daids=%r' % (qaid,), verbose=verbose): valid_flags = check_can_match(qaid, hit_daids, qreq_) valid_daids = hit_daids.compress(valid_flags) shortlist = ut.Shortlist(shortsize) # gammaX = smk.gamma(X, wx_to_weight, agg, alpha, thresh) _prog = ut.ProgPartial(lbl='smk scoring qaid=%r' % (qaid, ), enabled=verbose, bs=True, adjust=True) wx_to_weight = qreq_.dinva.wx_to_weight debug = False if debug: qnid = qreq_.get_qreq_annot_nids([qaid])[0] daids = np.array(qreq_.daids) dnids = qreq_.get_qreq_annot_nids(daids) correct_aids = daids[np.where(dnids == qnid)[0]] daid = correct_aids[0] if agg: for daid in _prog(valid_daids): Y = qreq_.dinva.get_annot(daid) item = match_kernel_agg(X, Y, wx_to_weight, alpha, thresh) shortlist.insert(item) else: for daid in _prog(valid_daids): Y = qreq_.dinva.get_annot(daid) item = match_kernel_sep(X, Y, wx_to_weight, alpha, thresh) shortlist.insert(item) # Build chipmatches for the shortlist results # with ut.Timer('build cms', verbose=verbose): cm = chip_match.ChipMatch(qaid=qaid, fsv_col_lbls=['smk']) cm.daid_list = [] cm.fm_list = [] cm.fsv_list = [] _prog = ut.ProgPartial(lbl='smk build cm qaid=%r' % (qaid, ), enabled=verbose, bs=True, adjust=True) for item in _prog(shortlist): (score, score_list, Y, X_idx, Y_idx) = item X_fxs = ut.take(X.fxs_list, X_idx) Y_fxs = ut.take(Y.fxs_list, Y_idx) # Only build matches for those that sver will use if agg: X_maws = ut.take(X.maws_list, X_idx) Y_maws = ut.take(Y.maws_list, Y_idx) fm, fs = smk_funcs.build_matches_agg(X_fxs, Y_fxs, X_maws, Y_maws, score_list) else: fm, fs = smk_funcs.build_matches_sep(X_fxs, Y_fxs, score_list) if len(fm) > 0: # assert not np.any(np.isnan(fs)) daid = Y.aid fsv = fs[:, None] cm.daid_list.append(daid) cm.fm_list.append(fm) cm.fsv_list.append(fsv) cm._update_daid_index() cm.arraycast_self() cm.score_name_maxcsum(qreq_) # if False: # cm.assert_self(qreq_=qreq_, verbose=True) if sv_on: cm = pipeline.sver_single_chipmatch(qreq_, cm, verbose=verbose) cm.score_name_maxcsum(qreq_) return cm
def sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh): r""" Computes gamma from "To Aggregate or not to aggregate". Every component in each list is with repsect to a different word. scc = self consistency criterion It is a scalar which ensure K(X, X) = 1 Args: rvecs_list (list of ndarrays): residual vectors for every word idf_list (list of floats): idf weight for each word maws_list (list of ndarrays): multi-assign weights for each word for each residual vector smk_alpha (float): selectivity power smk_thresh (float): selectivity threshold Returns: float: sccw self-consistency-criterion weight Math: \begin{equation} \gamma(X) = (\sum_{c \in \C} w_c M(X_c, X_c))^{-.5} \end{equation} Example: >>> from ibeis.algo.hots.smk.smk_scoring import * # NOQA >>> from ibeis.algo.hots.smk import smk_scoring >>> from ibeis.algo.hots.smk import smk_debug >>> #idf_list, rvecs_list, maws_list, smk_alpha, smk_thresh, wx2_flags = smk_debug.testdata_sccw_sum(db='testdb1') >>> tup = smk_debug.testdata_sccw_sum(db='PZ_MTEST', nWords=128000) >>> idf_list, rvecs_list, flags_list, maws_list, smk_alpha, smk_thresh = tup >>> sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) >>> print(sccw) 0.0201041835751 CommandLine: python smk_match.py --db PZ_MOTHERS --nWords 128 Ignore: 0.0384477314197 qmaws_list = dmaws_list = maws_list drvecs_list = qrvecs_list = rvecs_list dflags_list = qflags_list = flags_list flags_list = flags_list[7:10] maws_list = maws_list[7:10] idf_list = idf_list[7:10] rvecs_list = rvecs_list[7:10] """ num_rvecs = len(rvecs_list) if DEBUG_SMK: assert maws_list is None or len( maws_list) == num_rvecs, 'inconsistent lengths' assert num_rvecs == len(idf_list), 'inconsistent lengths' assert maws_list is None or list(map(len, maws_list)) == list( map(len, rvecs_list)), 'inconsistent per word lengths' assert flags_list is None or list(map(len, maws_list)) == list( map(len, flags_list)), 'inconsistent per word lengths' assert flags_list is None or len( flags_list) == num_rvecs, 'inconsistent lengths' # Indexing with asymetric multi-assignment might get you a non 1 self score? # List of scores for every word. scores_list = score_matches(rvecs_list, rvecs_list, flags_list, flags_list, maws_list, maws_list, smk_alpha, smk_thresh, idf_list) if DEBUG_SMK: assert len(scores_list) == num_rvecs, 'bad rvec and score' assert len(idf_list) == len(scores_list), 'bad weight and score' # Summation over all residual vector scores _count = sum((scores.size for scores in scores_list)) _iter = utool.iflatten(scores.ravel() for scores in scores_list) self_rawscore = np.fromiter(_iter, np.float64, _count).sum() # Square root inverse to enforce normalized self-score is 1.0 sccw = np.reciprocal(np.sqrt(self_rawscore)) try: assert not np.isinf(sccw), 'sccw cannot be infinite' assert not np.isnan(sccw), 'sccw cannot be nan' except AssertionError as ex: utool.printex(ex, 'problem computing self consistency criterion weight', keys=['num_rvecs'], iswarning=True) if num_rvecs > 0: raise else: sccw = 1 return sccw