def isect_info(self, other): set1 = set(self.rel_fpath_list) set2 = set(other.rel_fpath_list) set_comparisons = ut.odict([ ('s1', set1), ('s2', set2), ('union', set1.union(set2)), ('isect', set1.intersection(set2)), ('s1 - s2', set1.difference(set2)), ('s2 - s1', set1.difference(set1)), ]) stat_stats = ut.map_vals(len, set_comparisons) print(ut.repr4(stat_stats)) return set_comparisons if False: idx_lookup1 = ut.make_index_lookup(self.rel_fpath_list) idx_lookup2 = ut.make_index_lookup(other.rel_fpath_list) uuids1 = ut.take(self.uuids, ut.take(idx_lookup1, set_comparisons['union'])) uuids2 = ut.take(other.uuids, ut.take(idx_lookup2, set_comparisons['union'])) uuids1 == uuids2
def remove_support(nnindexer, remove_daid_list, verbose=ut.NOT_QUIET): r""" CommandLine: python -m wbia.algo.hots.neighbor_index --test-remove_support SeeAlso: ~/code/flann/src/python/pyflann/index.py Example: >>> # SLOW_DOCTEST >>> # xdoctest: +SKIP >>> # (IMPORTANT) >>> from wbia.algo.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = testdata_nnindexer(use_memcache=False) >>> remove_daid_list = [8, 9, 10, 11] >>> K = 2 >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> # get before data >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K) >>> # execute test function >>> nnindexer.remove_support(remove_daid_list) >>> # test before data vs after data >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K) >>> ax2_nvecs = ut.dict_take(ut.dict_hist(nnindexer.idx2_ax), range(len(nnindexer.ax2_aid))) >>> assert qfx2_idx2.max() < ax2_nvecs[0], 'should only get points from aid 7' >>> assert qfx2_idx1.max() > ax2_nvecs[0], 'should get points from everyone' """ if ut.DEBUG2: logger.info('REMOVING POINTS') # TODO: ensure no duplicates ax2_remove_flag = np.in1d(nnindexer.ax2_aid, remove_daid_list) remove_ax_list = np.nonzero(ax2_remove_flag)[0] idx2_remove_flag = np.in1d(nnindexer.idx2_ax, remove_ax_list) remove_idx_list = np.nonzero(idx2_remove_flag)[0] if verbose: logger.info('[nnindex] Found %d / %d annots that need removing' % (len(remove_ax_list), len(remove_daid_list))) logger.info('[nnindex] Removing %d indexed features' % (len(remove_idx_list), )) # FIXME: indicies may need adjustment after remove points # Currently this is not being done and the data is just being left alone # This should be ok temporarilly because removed ids should not # be returned by the flann object nnindexer.flann.remove_points(remove_idx_list) # FIXME: # nnindexer.ax2_aid if True: nnindexer.ax2_aid[remove_ax_list] = -1 nnindexer.idx2_fx[remove_idx_list] = -1 nnindexer.idx2_vec[remove_idx_list] = 0 if nnindexer.idx2_fgw is not None: nnindexer.idx2_fgw[remove_idx_list] = np.nan nnindexer.aid2_ax = ut.make_index_lookup(nnindexer.ax2_aid) # FIXME: This will definitely bug out if you remove points and then try # to add the same points back again. if ut.DEBUG2: logger.info('DONE REMOVE POINTS')
def lookup_cm(infr, aid1, aid2): """ Get chipmatch object associated with an edge if one exists. """ if infr.cm_list is None: return None, aid1, aid2 # TODO: keep chip matches in dictionary by default? aid2_idx = ut.make_index_lookup([cm.qaid for cm in infr.cm_list]) switch_order = False if aid1 in aid2_idx: idx = aid2_idx[aid1] cm = infr.cm_list[idx] if aid2 not in cm.daid2_idx: switch_order = True # raise KeyError('switch order') else: switch_order = True if switch_order: # switch order aid1, aid2 = aid2, aid1 idx = aid2_idx[aid1] cm = infr.cm_list[idx] if aid2 not in cm.daid2_idx: raise KeyError('No ChipMatch for edge (%r, %r)' % (aid1, aid2)) return cm, aid1, aid2
def load_ordered_annots(data_uri_order, query_uri_order): # Open the wbia version of oxford import wbia ibs = wbia.opendb('Oxford') def reorder_annots(_annots, uri_order): intern_uris = get_annots_imgid(_annots) lookup = ut.make_index_lookup(intern_uris) _reordered = _annots.take(ut.take(lookup, uri_order)) return _reordered # Load database annotations and reorder them to agree with internals _dannots = ibs.annots(ibs.filter_annots_general(has_none='query')) data_annots = reorder_annots(_dannots, data_uri_order) # Load query annototations and reorder to standard order _qannots = ibs.annots(ibs.filter_annots_general(has_any='query')) query_annots = reorder_annots(_qannots, query_uri_order) # Map each query annot to its corresponding data index dgid_to_dx = ut.make_index_lookup(data_annots.gids) qx_to_dx = ut.take(dgid_to_dx, query_annots.gids) return ibs, query_annots, data_annots, qx_to_dx
def new_query_request( cls, qaid_list, daid_list, qparams, qresdir, ibs, query_config2_, data_config2_, _indexer_request_params, custom_nid_lookup=None, ): """ old way of calling new Args: qaid_list (list): daid_list (list): qparams (QueryParams): query hyper-parameters qresdir (str): ibs (wbia.IBEISController): image analysis api _indexer_request_params (dict): Returns: wbia.QueryRequest """ qreq_ = cls() qreq_.ibs = ibs qreq_.qparams = qparams # Parameters relating to pipeline execution qreq_.query_config2_ = query_config2_ qreq_.data_config2_ = data_config2_ qreq_.qresdir = qresdir qreq_._indexer_request_params = _indexer_request_params qreq_.set_external_daids(daid_list) qreq_.set_external_qaids(qaid_list) # Load name information so it can change in the database and that's ok. # I'm not 100% liking how this works. qreq_.unique_aids = np.union1d(qreq_.qaids, qreq_.daids) qreq_.unique_aids.sort() # Internal caching objects and views _annots = ibs.annots(qreq_.unique_aids) # I think the views copy the original cache qreq_._unique_annots = _annots.view(_annots.aids) qreq_._unique_dannots = qreq_._unique_annots.view(sorted(qreq_.daids)) qreq_.aid_to_idx = ut.make_index_lookup(qreq_.unique_aids) if custom_nid_lookup is None: qreq_.unique_nids = ibs.get_annot_nids(qreq_.unique_aids) else: qreq_.unique_nids = ut.dict_take(custom_nid_lookup, qreq_.unique_aids) qreq_.unique_nids = np.array(qreq_.unique_nids) # qreq_.nid_to_groupuuid = qreq_._make_namegroup_uuids() # qreq_.dnid_to_groupuuid = qreq_._make_namegroup_data_uuids() qreq_.nid_to_grouphash = qreq_._make_namegroup_hashes() qreq_.dnid_to_grouphash = qreq_._make_namegroup_data_hashes() return qreq_
def nx_make_adj_matrix(G): import utool as ut nodes = list(G.nodes()) node2_idx = ut.make_index_lookup(nodes) edges = list(G.edges()) edge2_idx = ut.partial(ut.dict_take, node2_idx) uv_list = ut.lmap(edge2_idx, edges) A = np.zeros((len(nodes), len(nodes))) A[tuple(np.array(uv_list).T)] = 1 return A
def nx_topsort_rank(graph, nodes=None): """ graph = inputs.exi_graph.reverse() nodes = flat_node_order_ """ import networkx as nx import utool as ut topsort = list(nx.topological_sort(graph)) node_to_top_rank = ut.make_index_lookup(topsort) toprank = ut.dict_take(node_to_top_rank, nodes) return toprank
def new_external_annot(aid, fx_to_wxs, fx_to_maws, int_rvec): wx_to_fxs, wx_to_maws = smk_funcs.invert_assigns(fx_to_wxs, fx_to_maws) X = inverted_index.SingleAnnot() X.aid = aid # Build Aggregate Residual Vectors X.wx_list = np.array(sorted(wx_to_fxs.keys()), dtype=np.int32) X.wx_to_idx = ut.make_index_lookup(X.wx_list) X.int_rvec = int_rvec X.wx_set = set(X.wx_list) # TODO: maybe use offset list structure instead of heavy nesting X.fxs_list = ut.take(wx_to_fxs, X.wx_list) X.maws_list = ut.take(wx_to_maws, X.wx_list) return X
def simplify_graph(graph): """ strips out everything but connectivity Args: graph (nx.Graph): Returns: nx.Graph: new_graph CommandLine: python3 -m utool.util_graph simplify_graph --show python2 -m utool.util_graph simplify_graph --show python2 -c "import networkx as nx; print(nx.__version__)" python3 -c "import networkx as nx; print(nx.__version__)" Example: >>> # ENABLE_DOCTEST >>> from utool.util_graph import * # NOQA >>> import utool as ut >>> import networkx as nx >>> graph = nx.DiGraph([('a', 'b'), ('a', 'c'), ('a', 'e'), >>> ('a', 'd'), ('b', 'd'), ('c', 'e'), >>> ('d', 'e'), ('c', 'e'), ('c', 'd')]) >>> new_graph = simplify_graph(graph) >>> result = ut.repr2(list(new_graph.edges())) >>> #adj_list = sorted(list(nx.generate_adjlist(new_graph))) >>> #result = ut.repr2(adj_list) >>> print(result) [(0, 1), (0, 2), (0, 3), (0, 4), (1, 3), (2, 3), (2, 4), (3, 4)] ['0 1 2 3 4', '1 3 4', '2 4', '3', '4 3'] """ import utool as ut nodes = sorted(list(graph.nodes())) node_lookup = ut.make_index_lookup(nodes) if graph.is_multigraph(): edges = list(graph.edges(keys=True)) else: edges = list(graph.edges()) new_nodes = ut.take(node_lookup, nodes) if graph.is_multigraph(): new_edges = [(node_lookup[e[0]], node_lookup[e[1]], e[2], {}) for e in edges] else: new_edges = [(node_lookup[e[0]], node_lookup[e[1]]) for e in edges] cls = graph.__class__ new_graph = cls() new_graph.add_nodes_from(new_nodes) new_graph.add_edges_from(new_edges) return new_graph
def from_inva(cls, inva, idx): X = cls() X.aid = inva.aids[idx] X.wx_list = inva.wx_lists[idx] X.fxs_list = inva.fxs_lists[idx] X.maws_list = inva.maws_lists[idx] X.agg_rvecs = inva.agg_rvecs[idx] X.agg_flags = inva.agg_flags[idx] if inva.gamma_list is not None: X.gamma = inva.gamma_list[idx] X.wx_to_idx = ut.make_index_lookup(X.wx_list) X.int_rvec = inva.int_rvec X.wx_set = set(X.wx_list) return X
def init_support(indexer, aid_list, vecs_list, fgws_list, fxs_list, verbose=True): r""" prepares inverted indicies and FLANN data structure flattens vecs_list and builds a reverse index from the flattened indices (idx) to the original aids and fxs """ assert indexer.flann is None, 'already initalized' logger.info('[nnindex] Preparing data for indexing / loading index') # Check input assert len(aid_list) == len(vecs_list), 'invalid input. bad len' assert len(aid_list) > 0, ('len(aid_list) == 0.' 'Cannot invert index without features!') # Create indexes into the input aids ax_list = np.arange(len(aid_list)) # Invert indicies tup = invert_index(vecs_list, fgws_list, ax_list, fxs_list, verbose=verbose) idx2_vec, idx2_fgw, idx2_ax, idx2_fx = tup ax2_aid = np.array(aid_list) indexer.flann = pyflann.FLANN() # Approximate search structure indexer.ax2_aid = ax2_aid # (A x 1) Mapping to original annot ids indexer.idx2_vec = idx2_vec # (M x D) Descriptors to index indexer.idx2_fgw = idx2_fgw # (M x 1) Descriptor forground weight indexer.idx2_ax = idx2_ax # (M x 1) Index into the aid_list indexer.idx2_fx = idx2_fx # (M x 1) Index into the annot's features indexer.aid2_ax = ut.make_index_lookup(indexer.ax2_aid) indexer.num_indexed = indexer.idx2_vec.shape[0] if indexer.idx2_vec.dtype == hstypes.VEC_TYPE: # these are sift descriptors indexer.max_distance_sqrd = hstypes.VEC_PSEUDO_MAX_DISTANCE_SQRD else: # FIXME: hacky way to support siam128 descriptors. # raise AssertionError( # 'NNindexer should get uint8s right now unless the algorithm has # changed') indexer.max_distance_sqrd = None
def make_prob_annots(infr): cm_list = infr.cm_list unique_aids = sorted(ut.list_union(*[cm.daid_list for cm in cm_list] + [[cm.qaid for cm in cm_list]])) aid2_aidx = ut.make_index_lookup(unique_aids) prob_annots = np.zeros((len(unique_aids), len(unique_aids))) for count, cm in enumerate(cm_list): idx = aid2_aidx[cm.qaid] annot_scores = ut.dict_take(cm.aid2_annot_score, unique_aids, 0) prob_annots[idx][:] = annot_scores prob_annots[np.diag_indices(len(prob_annots))] = np.inf prob_annots += 1E-9 #print(ut.hz_str('prob_names = ', ut.array2string2(prob_names, #precision=2, max_line_width=140, suppress_small=True))) return unique_aids, prob_annots
def isect_info(self, other): set1 = set(self.rel_fpath_list) set2 = set(other.rel_fpath_list) set_comparisons = ut.odict([ ('s1', set1), ('s2', set2), ('union', set1.union(set2)), ('isect', set1.intersection(set2)), ('s1 - s2', set1.difference(set2)), ('s2 - s1', set1.difference(set1)), ]) stat_stats = ut.map_vals(len, set_comparisons) print(ut.repr4(stat_stats)) return set_comparisons if False: idx_lookup1 = ut.make_index_lookup(self.rel_fpath_list) idx_lookup2 = ut.make_index_lookup(other.rel_fpath_list) uuids1 = ut.take(self.uuids, ut.take(idx_lookup1, set_comparisons['union'])) uuids2 = ut.take(other.uuids, ut.take(idx_lookup2, set_comparisons['union'])) uuids1 == uuids2
def make_prob_annots(infr): cm_list = infr.cm_list unique_aids = sorted( ut.list_union(*[cm.daid_list for cm in cm_list] + [[cm.qaid for cm in cm_list]])) aid2_aidx = ut.make_index_lookup(unique_aids) prob_annots = np.zeros((len(unique_aids), len(unique_aids))) for count, cm in enumerate(cm_list): idx = aid2_aidx[cm.qaid] annot_scores = ut.dict_take(cm.aid2_annot_score, unique_aids, 0) prob_annots[idx][:] = annot_scores prob_annots[np.diag_indices(len(prob_annots))] = np.inf prob_annots += 1E-9 #print(ut.hz_str('prob_names = ', ut.array2string2(prob_names, #precision=2, max_line_width=140, suppress_small=True))) return unique_aids, prob_annots
def from_depc(cls, depc, aids, vocab_aids, config): inva = cls() vocab_rowid = depc.get_rowids('vocab', (vocab_aids,), config=config)[0] inva.vocab_rowid = vocab_rowid tablename = 'inverted_agg_assign' table = depc[tablename] input_tuple = (aids, [vocab_rowid] * len(aids)) tbl_rowids = depc.get_rowids( tablename, input_tuple, config=config, _hack_rootmost=True, _debug=False ) # input_tuple = (aids, [vocab_aids]) # tbl_rowids = depc.get_rowids(tablename, input_tuple, config=config) logger.info('Reading data') inva.aids = aids inva.wx_lists = [ np.array(wx_list_, dtype=np.int32) for wx_list_ in table.get_row_data(tbl_rowids, 'wx_list', showprog='load wxs') ] inva.fxs_lists = [ [np.array(fxs, dtype=np.uint16) for fxs in fxs_list] for fxs_list in table.get_row_data( tbl_rowids, 'fxs_list', showprog='load fxs' ) ] inva.maws_lists = [ [np.array(m, dtype=np.float32) for m in maws] for maws in table.get_row_data(tbl_rowids, 'maws_list', showprog='load maws') ] inva.agg_rvecs = table.get_row_data( tbl_rowids, 'agg_rvecs', showprog='load agg_rvecs' ) inva.agg_flags = table.get_row_data( tbl_rowids, 'agg_flags', showprog='load agg_flags' ) # less memory hogs inva.aid_to_idx = ut.make_index_lookup(inva.aids) inva.int_rvec = config['int_rvec'] inva.gamma_list = None # Inverted list inva.wx_to_weight = None inva.wx_to_aids = None inva.config = config return inva
def make_temporary_annot(aid, vocab, wx_to_weight, ibs, config): nAssign = config.get('nAssign', 1) alpha = config.get('smk_alpha', 3.0) thresh = config.get('smk_thresh', 3.0) # Compute assignments fx_to_vecs = ibs.get_annot_vecs(aid, config2_=config) fx_to_wxs, fx_to_maws = smk_funcs.assign_to_words(vocab, fx_to_vecs, nAssign) wx_to_fxs, wx_to_maws = smk_funcs.invert_assigns(fx_to_wxs, fx_to_maws) # Build Aggregate Residual Vectors wx_list = sorted(wx_to_fxs.keys()) word_list = ut.take(vocab.wx_to_word, wx_list) fxs_list = ut.take(wx_to_fxs, wx_list) maws_list = ut.take(wx_to_maws, wx_list) agg_rvecs = np.empty((len(wx_list), fx_to_vecs.shape[1]), dtype=np.float) agg_flags = np.empty((len(wx_list), 1), dtype=np.bool) for idx in range(len(wx_list)): word = word_list[idx] fxs = fxs_list[idx] maws = maws_list[idx] vecs = fx_to_vecs.take(fxs, axis=0) _rvecs, _flags = smk_funcs.compute_rvec(vecs, word) _agg_rvec, _agg_flag = smk_funcs.aggregate_rvecs(_rvecs, maws, _flags) agg_rvecs[idx] = _agg_rvec agg_flags[idx] = _agg_flag X = inverted_index.SingleAnnot() X.aid = aid X.wx_list = wx_list X.fxs_list = fxs_list X.maws_list = maws_list X.agg_rvecs = agg_rvecs X.agg_flags = agg_flags X.wx_to_idx = ut.make_index_lookup(X.wx_list) X.int_rvec = False X.wx_set = set(X.wx_list) weight_list = np.array(ut.take(wx_to_weight, wx_list)) X.gamma = smk_funcs.gamma_agg(X.agg_rvecs, X.agg_flags, weight_list, alpha, thresh) return X
def initialize_graph_and_model(infr): """ Unused in internal split stuff pt.qt4ensure() layout_info = pt.show_nx(graph, as_directed=False, fnum=1, layoutkw=dict(prog='neato'), use_image=True, verbose=0) ax = pt.gca() pt.zoom_factory() pt.interactions.PanEvents() """ #import networkx as nx #import itertools cm_list = infr.cm_list hack = True hack = False if hack: cm_list = cm_list[:10] qaid_list = [cm.qaid for cm in cm_list] daids_list = [cm.daid_list for cm in cm_list] unique_aids = sorted(ut.list_union(*daids_list + [qaid_list])) if hack: unique_aids = sorted(ut.isect(unique_aids, qaid_list)) aid2_aidx = ut.make_index_lookup(unique_aids) # Construct K-broken graph edges = [] edge_weights = [] #top = (infr.qreq_.qparams.K + 1) * 2 #top = (infr.qreq_.qparams.K) * 2 top = (infr.qreq_.qparams.K + 2) for count, cm in enumerate(cm_list): qidx = aid2_aidx[cm.qaid] score_list = cm.annot_score_list sortx = ut.argsort(score_list)[::-1] score_list = ut.take(score_list, sortx)[:top] daid_list = ut.take(cm.daid_list, sortx)[:top] for score, daid in zip(score_list, daid_list): if daid not in qaid_list: continue didx = aid2_aidx[daid] edge_weights.append(score) edges.append((qidx, didx)) # make symmetric directed_edges = dict(zip(edges, edge_weights)) # Find edges that point in both directions undirected_edges = {} for (u, v), w in directed_edges.items(): if (v, u) in undirected_edges: undirected_edges[(v, u)] += w undirected_edges[(v, u)] /= 2 else: undirected_edges[(u, v)] = w edges = list(undirected_edges.keys()) edge_weights = list(undirected_edges.values()) nodes = list(range(len(unique_aids))) nid_labeling = infr.qreq_.ibs.get_annot_nids(unique_aids) labeling = ut.rebase_labels(nid_labeling) import networkx as nx from ibeis.viz import viz_graph set_node_attrs = nx.set_node_attributes set_edge_attrs = nx.set_edge_attributes # Create match-based graph structure graph = nx.DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) # Important properties nid_list = infr.qreq_.ibs.get_annot_nids(unique_aids) labeling = ut.rebase_labels(nid_list) set_node_attrs(graph, 'name_label', dict(zip(nodes, labeling))) set_edge_attrs(graph, 'weight', dict(zip(edges, edge_weights))) # Visualization properties import plottool as pt ax2_aid = ut.invert_dict(aid2_aidx) set_node_attrs(graph, 'aid', ax2_aid) viz_graph.ensure_node_images(infr.qreq_.ibs, graph) set_node_attrs(graph, 'framewidth', dict(zip(nodes, [3.0] * len(nodes)))) set_node_attrs(graph, 'framecolor', dict(zip(nodes, [pt.DARK_BLUE] * len(nodes)))) ut.color_nodes(graph, labelattr='name_label') edge_colors = pt.scores_to_color(np.array(edge_weights), cmap_='viridis') #import utool #utool.embed() #edge_colors = [pt.color_funcs.ensure_base255(color) for color in edge_colors] #print('edge_colors = %r' % (edge_colors,)) set_edge_attrs(graph, 'color', dict(zip(edges, edge_colors))) # Build inference model from ibeis.algo.hots import graph_iden #graph_iden.rrr() model = graph_iden.InfrModel(graph) #model = graph_iden.InfrModel(len(nodes), edges, edge_weights, labeling=labeling) infr.model = model
def make_inference(infr): cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() cluster_tuples = infr.make_clusters() # Make pair list for output if infr.user_feedback is not None: keys = list(zip(infr.user_feedback['aid1'], infr.user_feedback['aid2'])) feedback_lookup = ut.make_index_lookup(keys) user_feedback = infr.user_feedback p_bg = 0 user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 else: feedback_lookup = {} infr.user_feedback needs_review_list = [] num_top = 4 for cm, row in zip(cm_list, prob_names): # Find top scoring names for this chip match in the posterior distribution idxs = row.argsort()[::-1] top_idxs = idxs[:num_top] nids = ut.take(unique_nids, top_idxs) # Find the matched annotations in the pairwise prior distributions nidxs = ut.dict_take(cm.nid2_nidx, nids, None) name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs)) daids_list = ut.take(cm.daid_list, name_groupxs) for daids in daids_list: ut.take(cm.score_list, ut.take(cm.daid2_idx, daids)) scores_all = cm.annot_score_list / cm.annot_score_list.sum() idxs = ut.take(cm.daid2_idx, daids) scores = scores_all.take(idxs) raw_scores = cm.score_list.take(idxs) scorex = scores.argmax() raw_score = raw_scores[scorex] daid = daids[scorex] import scipy.special # SUPER HACK: these are not probabilities # TODO: set a and b based on dbsize and param configuration # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)" #a = 2.0 a = 1.5 b = 2 p_same = scipy.special.expit(b * raw_score - a) #confidence = scores[scorex] #p_diff = 1 - p_same #decision = 'same' if confidence > thresh else 'diff' #confidence = p_same if confidence > thresh else p_diff #tup = (cm.qaid, daid, decision, confidence, raw_score) confidence = (2 * np.abs(0.5 - p_same)) ** 2 #if infr.user_feedback is not None: # import utool # utool.embed( key = (cm.qaid, daid) fb_idx = feedback_lookup.get(key) if fb_idx is not None: confidence = p_same_list[fb_idx] tup = (cm.qaid, daid, p_same, confidence, raw_score) needs_review_list.append(tup) # Sort resulting list by confidence sortx = ut.argsort(ut.take_column(needs_review_list, 3)) needs_review_list = ut.take(needs_review_list, sortx) infr.needs_review_list = needs_review_list infr.cluster_tuples = cluster_tuples
def lookup_idxs(self, rowids): """ Lookup subset indicies by rowids """ if self._rowid_to_idx is None: self._rowid_to_idx = ut.make_index_lookup(self._rowids) idx_list = ut.take(self._rowid_to_idx, rowids) return idx_list
def find_consistent_labeling(grouped_oldnames): """ Solves a a maximum bipirtite matching problem to find a consistent name assignment. Notes: # Install module containing the Hungarian algorithm for matching pip install munkres Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'b', u'c', u'a'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] """ import numpy as np try: import munkres except ImportError: print('Need to install Hungrian algorithm bipartite matching solver.') print('Run:') print('pip install munkres') raise unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) num_new_names = len(grouped_oldnames) num_old_names = len(unique_old_names) extra_oldnames = [] # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_extra = num_new_names - num_old_names if num_extra > 0: extra_oldnames = ['_extra_name%d' % (count,) for count in range(num_extra)] elif num_extra < 0: pass else: extra_oldnames = [] assignable_names = unique_old_names + extra_oldnames total = len(assignable_names) # Allocate assignment matrix profit_matrix = np.zeros((total, total), dtype=np.int) # Populate assignment profit matrix oldname2_idx = ut.make_index_lookup(assignable_names) name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames] for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] += freq # Add extra profit for using a previously used name profit_matrix[profit_matrix > 0] += 2 # Add small profit for using an extra name extra_colxs = ut.take(oldname2_idx, extra_oldnames) profit_matrix[:, extra_colxs] += 1 # Convert to minimization problem big_value = (profit_matrix.max()) cost_matrix = big_value - profit_matrix m = munkres.Munkres() indexes = m.compute(cost_matrix) # Map output to be aligned with input rx2_cx = dict(indexes) assignment = [assignable_names[rx2_cx[rx]] for rx in range(num_new_names)] return assignment
def find_consistent_labeling_old(grouped_oldnames, extra_prefix='_extra_name', verbose=False): import numpy as np import scipy.optimize unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) # TODO: find names that are only used once, and just ignore those for # optimization. # unique_set = set(unique_old_names) oldname_sets = list(map(set, grouped_oldnames)) usage_hist = ut.dict_hist(ut.flatten(oldname_sets)) conflicts = {k for k, v in usage_hist.items() if v > 1} # nonconflicts = {k for k, v in usage_hist.items() if v == 1} conflict_groups = [] orig_idxs = [] assignment = [None] * len(grouped_oldnames) ntrivial = 0 for idx, group in enumerate(grouped_oldnames): if set(group).intersection(conflicts): orig_idxs.append(idx) conflict_groups.append(group) else: ntrivial += 1 if len(group) > 0: h = ut.dict_hist(group) hitems = list(h.items()) hvals = [i[1] for i in hitems] maxval = max(hvals) g = min([k for k, v in hitems if v == maxval]) assignment[idx] = g else: assignment[idx] = None if verbose: print('rectify %d non-trivial groups' % (len(conflict_groups), )) print('rectify %d trivial groups' % (ntrivial, )) num_extra = 0 if len(conflict_groups) > 0: grouped_oldnames_ = conflict_groups unique_old_names = ut.unique(ut.flatten(grouped_oldnames_)) num_new_names = len(grouped_oldnames_) num_old_names = len(unique_old_names) extra_oldnames = [] # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_extra = num_new_names - num_old_names if num_extra > 0: extra_oldnames = [ '%s%d' % ( extra_prefix, count, ) for count in range(num_extra) ] elif num_extra < 0: pass else: extra_oldnames = [] assignable_names = unique_old_names + extra_oldnames total = len(assignable_names) # Allocate assignment matrix # Start with a large negative value indicating # that you must select from your assignments only profit_matrix = -np.ones((total, total), dtype=np.int) * (2 * total) # Populate assignment profit matrix oldname2_idx = ut.make_index_lookup(assignable_names) name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames_] # Initialize base profit for using a previously used name for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] = 1 # Now add in the real profit for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] += freq # Set a small profit for using an extra name extra_colxs = ut.take(oldname2_idx, extra_oldnames) profit_matrix[:, extra_colxs] = 1 # Convert to minimization problem big_value = (profit_matrix.max()) - (profit_matrix.min()) cost_matrix = big_value - profit_matrix # Don't use munkres, it is pure python and very slow. Use scipy instead indexes = list(zip(*scipy.optimize.linear_sum_assignment(cost_matrix))) # Map output to be aligned with input rx2_cx = dict(indexes) assignment_ = [ assignable_names[rx2_cx[rx]] for rx in range(num_new_names) ] # Reintegrate trivial values for idx, g in zip(orig_idxs, assignment_): assignment[idx] = g for idx, val in enumerate(assignment): if val is None: assignment[idx] = '%s%d' % ( extra_prefix, num_extra, ) num_extra += 1 return assignment
def simple_munkres(part_oldnames): """ Defines a munkres problem to solve name rectification. Notes: We create a matrix where each rows represents a group of annotations in the same PCC and each column represents an original name. If there are more PCCs than original names the columns are padded with extra values. The matrix is first initialized to be negative infinity representing impossible assignments. Then for each column representing a padded name, we set we its value to $1$ indicating that each new name could be assigned to a padded name for some small profit. Finally, let $f_{rc}$ be the the number of annotations in row $r$ with an original name of $c$. Each matrix value $(r, c)$ is set to $f_{rc} + 1$ if $f_{rc} > 0$, to represent how much each name ``wants'' to be labeled with a particular original name, and the extra one ensures that these original names are always preferred over padded names. CommandLine: python -m ibeis.scripts.name_recitifer simple_munkres Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> part_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']] >>> new_names = simple_munkres(part_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['b', 'c', 'a'] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> part_oldnames = [[], ['a', 'a'], [], >>> ['a', 'a', 'a', 'a', 'a', 'a', 'a', 'b'], ['a']] >>> new_names = simple_munkres(part_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) [None, 'a', None, 'b', None] Example: >>> # ENABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> part_oldnames = [[], ['b'], ['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(part_oldnames) >>> result = ut.repr2(new_names) >>> print(new_names) ['_extra_name0', 'b', 'a', 'c', 'e'] Profit Matrix b a c e _0 0 -10 -10 -10 -10 1 1 2 -10 -10 -10 1 2 2 2 2 -10 1 3 2 -10 2 -10 1 4 -10 -10 2 3 1 """ import numpy as np import scipy.optimize unique_old_names = ut.unique(ut.flatten(part_oldnames)) num_new_names = len(part_oldnames) num_old_names = len(unique_old_names) # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_pad = max(num_new_names - num_old_names, 0) total = num_old_names + num_pad shape = (total, total) # Allocate assignment matrix. # rows are new-names and cols are old-names. # Initially the profit of any assignment is effectively -inf # This effectively marks all assignments as invalid profit_matrix = np.full(shape, -2 * total, dtype=np.int) # Overwrite valid assignments with positive profits oldname2_idx = ut.make_index_lookup(unique_old_names) name_freq_list = [ut.dict_hist(names) for names in part_oldnames] # Initialize profit of a valid assignment as 1 + freq # This incentivizes using a previously used name for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] = freq + 1 # Set a much smaller profit for using an extra name # This allows the solution to always exist profit_matrix[:, num_old_names:total] = 1 # Convert to minimization problem big_value = (profit_matrix.max()) - (profit_matrix.min()) cost_matrix = big_value - profit_matrix # Use scipy implementation of munkres algorithm. rx2_cx = dict(zip(*scipy.optimize.linear_sum_assignment(cost_matrix))) # Each row (new-name) has now been assigned a column (old-name) # Map this back to the input-space (using None to indicate extras) cx2_name = dict(enumerate(unique_old_names)) if False: import pandas as pd columns = unique_old_names + ['_%r' % x for x in range(num_pad)] print('Profit Matrix') print(pd.DataFrame(profit_matrix, columns=columns)) print('Cost Matrix') print(pd.DataFrame(cost_matrix, columns=columns)) assignment_ = [ cx2_name.get(rx2_cx[rx], None) for rx in range(num_new_names) ] return assignment_
def ensure_nids(qreq_): # Hacked over from hotspotter, seriously hacky ibs = qreq_.ibs qreq_.unique_aids = np.union1d(qreq_.qaids, qreq_.daids) qreq_.unique_nids = ibs.get_annot_nids(qreq_.unique_aids) qreq_.aid_to_idx = ut.make_index_lookup(qreq_.unique_aids)
def parse_column_tuples( self, col_name_list, col_types_dict, col_getter_dict, col_bgrole_dict, col_ider_dict, col_setter_dict, editable_colnames, sortby, sort_reverse=True, strict=False, **kwargs, ): """ parses simple lists into information suitable for making guitool headers """ # Unpack the column tuples into names, getters, and types if not strict: # slopply colname definitions flag_list = [colname in col_getter_dict for colname in col_name_list] if not all(flag_list): invalid_colnames = ut.compress(col_name_list, ut.not_list(flag_list)) logger.info( '[api_item_widget] Warning: colnames=%r have no getters' % (invalid_colnames,) ) col_name_list = ut.compress(col_name_list, flag_list) # sloppy type inference for colname in col_name_list: getter_ = col_getter_dict[colname] if colname not in col_types_dict: type_ = ut.get_homogenous_list_type(getter_) if type_ is not None: col_types_dict[colname] = type_ # sloppy kwargs. # FIXME: explicitly list col_nice_dict col_nice_dict = kwargs.get('col_nice_dict', {}) self.col_nice_list = [col_nice_dict.get(name, name) for name in col_name_list] self.col_name_list = col_name_list self.col_type_list = [ col_types_dict.get(colname, str) for colname in col_name_list ] # First col is always a getter self.col_getter_list = [ col_getter_dict.get(colname, str) for colname in col_name_list ] # Get number of rows / columns self.nCols = len(self.col_getter_list) if self.nCols == 0: self.nRows = 0 else: for getter in self.col_getter_list: if ut.isiterable(getter): break getter = None # FIXME assert getter is not None, 'at least one getter must be an array/list' self.nRows = len(getter) # self.nRows = 0 if self.nCols == 0 else len(self.col_getter_list[0]) # FIXME # Init iders to default and then overwite based on dict inputs self.col_ider_list = [None] * self.nCols # ut.alloc_nones(self.nCols) # for colname, ider_colnames in six.iteritems(col_ider_dict): # import utool # utool.embed() colname2_colx = ut.make_index_lookup(self.col_name_list) for colname, ider_colnames in six.iteritems(col_ider_dict): if colname not in colname2_colx: continue # for colname in self.col_name_list: ider_colnames = col_ider_dict[colname] try: colx = colname2_colx[colname] # Col iders might have tuple input ider_cols = self._uinput_1to1(self.col_name_list.index, ider_colnames) col_ider = self._uinput_1to1(lambda c: ut.partial(self.get, c), ider_cols) self.col_ider_list[colx] = col_ider del col_ider del ider_cols del colx del colname except Exception as ex: ut.printex( ex, keys=['colname', 'ider_colnames', 'colx', 'col_ider', 'ider_cols'], ) raise # Init setters to data, and then overwrite based on dict inputs self.col_setter_list = list(self.col_getter_list) for colname, col_setter in six.iteritems(col_setter_dict): colx = colname2_colx[colname] self.col_setter_list[colx] = col_setter # Init bgrole_getters to None, and then overwrite based on dict inputs self.col_bgrole_getter_list = [ col_bgrole_dict.get(colname, None) for colname in self.col_name_list ] # Mark edtiable columns self.col_edit_list = [name in editable_colnames for name in col_name_list] # Mark the sort column index if sortby is None: self.col_sort_index = 0 elif ut.is_str(sortby): self.col_sort_index = self.col_name_list.index(sortby) else: self.col_sort_index = sortby self.col_sort_reverse = sort_reverse # Hacks for tree widget self._iders = kwargs.get('iders', None) col_level_dict = kwargs.get('col_level_dict', None) if col_level_dict is None: self.col_level_list = None else: self.col_level_list = ut.take(col_level_dict, col_name_list)
def add_support(nnindexer, new_daid_list, new_vecs_list, new_fgws_list, new_fxs_list, verbose=ut.NOT_QUIET): r""" adds support data (aka data to be indexed) Args: new_daid_list (list): list of annotation ids that are being added new_vecs_list (list): list of descriptor vectors for each annotation new_fgws_list (list): list of weights per vector for each annotation verbose (bool): verbosity flag(default = True) CommandLine: python -m ibeis.algo.hots.neighbor_index --test-add_support Example: >>> # ENABLE_DOCTEST >>> from ibeis.algo.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = testdata_nnindexer(use_memcache=False) >>> new_daid_list = [2, 3, 4] >>> K = 2 >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> # get before data >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K) >>> new_vecs_list, new_fgws_list, new_fxs_list = get_support_data(qreq_, new_daid_list) >>> # execute test function >>> nnindexer.add_support(new_daid_list, new_vecs_list, new_fgws_list, new_fxs_list) >>> # test before data vs after data >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K) >>> assert qfx2_idx2.max() > qfx2_idx1.max() """ # TODO: ensure no duplicates nAnnots = nnindexer.num_indexed_annots() nVecs = nnindexer.num_indexed_vecs() nNewAnnots = len(new_daid_list) new_ax_list = np.arange(nAnnots, nAnnots + nNewAnnots) tup = invert_index(new_vecs_list, new_fgws_list, new_ax_list, new_fxs_list, verbose=verbose) new_idx2_vec, new_idx2_fgw, new_idx2_ax, new_idx2_fx = tup nNewVecs = len(new_idx2_vec) if verbose or ut.VERYVERBOSE: print(('[nnindex] Adding %d vecs from %d annots to nnindex ' 'with %d vecs and %d annots') % (nNewVecs, nNewAnnots, nVecs, nAnnots)) if ut.DEBUG2: print('STACKING') # Stack inverted information old_idx2_vec = nnindexer.idx2_vec if nnindexer.idx2_fgw is not None: new_idx2_fgw = np.hstack(new_fgws_list) #nnindexer.old_vecs.append(new_idx2_fgw) ##--- _ax2_aid = np.hstack((nnindexer.ax2_aid, new_daid_list)) _idx2_ax = np.hstack((nnindexer.idx2_ax, new_idx2_ax)) _idx2_fx = np.hstack((nnindexer.idx2_fx, new_idx2_fx)) _idx2_vec = np.vstack((old_idx2_vec, new_idx2_vec)) if nnindexer.idx2_fgw is not None: _idx2_fgw = np.hstack((nnindexer.idx2_fgw, new_idx2_fgw)) if ut.DEBUG2: print('REPLACING') nnindexer.ax2_aid = _ax2_aid nnindexer.idx2_ax = _idx2_ax nnindexer.idx2_vec = _idx2_vec nnindexer.idx2_fx = _idx2_fx nnindexer.aid2_ax = ut.make_index_lookup(nnindexer.ax2_aid) if nnindexer.idx2_fgw is not None: nnindexer.idx2_fgw = _idx2_fgw #nnindexer.idx2_kpts = None #nnindexer.idx2_oris = None # Add new points to flann structure if ut.DEBUG2: print('ADD POINTS (FIXME: SOMETIMES SEGFAULT OCCURS)') print('new_idx2_vec.dtype = %r' % new_idx2_vec.dtype) print('new_idx2_vec.shape = %r' % (new_idx2_vec.shape,)) nnindexer.flann.add_points(new_idx2_vec) if ut.DEBUG2: print('DONE ADD POINTS')
def find_consistent_labeling(grouped_oldnames): """ Solves a a maximum bipirtite matching problem to find a consistent name assignment. Notes: # Install module containing the Hungarian algorithm for matching pip install munkres Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'b', u'c', u'a'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] """ import numpy as np try: import munkres except ImportError: print('Need to install Hungrian algorithm bipartite matching solver.') print('Run:') print('pip install munkres') raise unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) num_new_names = len(grouped_oldnames) num_old_names = len(unique_old_names) extra_oldnames = [] # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_extra = num_new_names - num_old_names if num_extra > 0: extra_oldnames = [ '_extra_name%d' % (count, ) for count in range(num_extra) ] elif num_extra < 0: pass else: extra_oldnames = [] assignable_names = unique_old_names + extra_oldnames total = len(assignable_names) # Allocate assignment matrix profit_matrix = np.zeros((total, total), dtype=np.int) # Populate assignment profit matrix oldname2_idx = ut.make_index_lookup(assignable_names) name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames] for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] += freq # Add extra profit for using a previously used name profit_matrix[profit_matrix > 0] += 2 # Add small profit for using an extra name extra_colxs = ut.take(oldname2_idx, extra_oldnames) profit_matrix[:, extra_colxs] += 1 # Convert to minimization problem big_value = (profit_matrix.max()) cost_matrix = big_value - profit_matrix m = munkres.Munkres() indexes = m.compute(cost_matrix) # Map output to be aligned with input rx2_cx = dict(indexes) assignment = [assignable_names[rx2_cx[rx]] for rx in range(num_new_names)] return assignment
def make_inference(infr): cm_list = infr.cm_list unique_nids, prob_names = infr.make_prob_names() cluster_tuples = infr.make_clusters() # Make pair list for output if infr.user_feedback is not None: keys = list( zip(infr.user_feedback['aid1'], infr.user_feedback['aid2'])) feedback_lookup = ut.make_index_lookup(keys) user_feedback = infr.user_feedback p_bg = 0 user_feedback = ut.map_dict_vals(np.array, infr.user_feedback) part1 = user_feedback['p_match'] * (1 - user_feedback['p_notcomp']) part2 = p_bg * user_feedback['p_notcomp'] p_same_list = part1 + part2 else: feedback_lookup = {} infr.user_feedback needs_review_list = [] num_top = 4 for cm, row in zip(cm_list, prob_names): # Find top scoring names for this chip match in the posterior distribution idxs = row.argsort()[::-1] top_idxs = idxs[:num_top] nids = ut.take(unique_nids, top_idxs) # Find the matched annotations in the pairwise prior distributions nidxs = ut.dict_take(cm.nid2_nidx, nids, None) name_groupxs = ut.take(cm.name_groupxs, ut.filter_Nones(nidxs)) daids_list = ut.take(cm.daid_list, name_groupxs) for daids in daids_list: ut.take(cm.score_list, ut.take(cm.daid2_idx, daids)) scores_all = cm.annot_score_list / cm.annot_score_list.sum() idxs = ut.take(cm.daid2_idx, daids) scores = scores_all.take(idxs) raw_scores = cm.score_list.take(idxs) scorex = scores.argmax() raw_score = raw_scores[scorex] daid = daids[scorex] import scipy.special # SUPER HACK: these are not probabilities # TODO: set a and b based on dbsize and param configuration # python -m plottool.draw_func2 --exec-plot_func --show --range=0,3 --func="lambda x: scipy.special.expit(2 * x - 2)" #a = 2.0 a = 1.5 b = 2 p_same = scipy.special.expit(b * raw_score - a) #confidence = scores[scorex] #p_diff = 1 - p_same #decision = 'same' if confidence > thresh else 'diff' #confidence = p_same if confidence > thresh else p_diff #tup = (cm.qaid, daid, decision, confidence, raw_score) confidence = (2 * np.abs(0.5 - p_same))**2 #if infr.user_feedback is not None: # import utool # utool.embed( key = (cm.qaid, daid) fb_idx = feedback_lookup.get(key) if fb_idx is not None: confidence = p_same_list[fb_idx] tup = (cm.qaid, daid, p_same, confidence, raw_score) needs_review_list.append(tup) # Sort resulting list by confidence sortx = ut.argsort(ut.take_column(needs_review_list, 3)) needs_review_list = ut.take(needs_review_list, sortx) infr.needs_review_list = needs_review_list infr.cluster_tuples = cluster_tuples
def nx_transitive_reduction(G, mode=1): """ References: https://en.wikipedia.org/wiki/Transitive_reduction#Computing_the_reduction_using_the_closure http://dept-info.labri.fr/~thibault/tmp/0201008.pdf http://stackoverflow.com/questions/17078696/im-trying-to-perform-the-transitive-reduction-of-directed-graph-in-python CommandLine: python -m utool.util_graph nx_transitive_reduction --show Example: >>> # DISABLE_DOCTEST >>> from utool.util_graph import * # NOQA >>> import utool as ut >>> import networkx as nx >>> G = nx.DiGraph([('a', 'b'), ('a', 'c'), ('a', 'e'), >>> ('a', 'd'), ('b', 'd'), ('c', 'e'), >>> ('d', 'e'), ('c', 'e'), ('c', 'd')]) >>> G = testdata_graph()[1] >>> G_tr = nx_transitive_reduction(G, mode=1) >>> G_tr2 = nx_transitive_reduction(G, mode=1) >>> ut.quit_if_noshow() >>> import plottool as pt >>> G_ = nx.dag.transitive_closure(G) >>> pt.show_nx(G , pnum=(1, 5, 1), fnum=1) >>> pt.show_nx(G_tr , pnum=(1, 5, 2), fnum=1) >>> pt.show_nx(G_tr2 , pnum=(1, 5, 3), fnum=1) >>> pt.show_nx(G_ , pnum=(1, 5, 4), fnum=1) >>> pt.show_nx(nx.dag.transitive_closure(G_tr), pnum=(1, 5, 5), fnum=1) >>> ut.show_if_requested() """ import utool as ut import networkx as nx has_cycles = not nx.is_directed_acyclic_graph(G) if has_cycles: # FIXME: this does not work for cycle graphs. # Need to do algorithm on SCCs G_orig = G G = nx.condensation(G_orig) nodes = list(G.nodes()) node2_idx = ut.make_index_lookup(nodes) # For each node u, perform DFS consider its set of (non-self) children C. # For each descendant v, of a node in C, remove any edge from u to v. if mode == 1: G_tr = G.copy() for parent in G_tr.nodes(): # Remove self loops if G_tr.has_edge(parent, parent): G_tr.remove_edge(parent, parent) # For each child of the parent for child in list(G_tr.successors(parent)): # Preorder nodes includes its argument (no added complexity) for gchild in list(G_tr.successors(child)): # Remove all edges from parent to non-child descendants for descendant in nx.dfs_preorder_nodes(G_tr, gchild): if G_tr.has_edge(parent, descendant): G_tr.remove_edge(parent, descendant) if has_cycles: # Uncondense graph uncondensed_G_tr = G.__class__() mapping = G.graph['mapping'] uncondensed_G_tr.add_nodes_from(mapping.keys()) inv_mapping = ut.invert_dict(mapping, unique_vals=False) for u, v in G_tr.edges(): u_ = inv_mapping[u][0] v_ = inv_mapping[v][0] uncondensed_G_tr.add_edge(u_, v_) for key, path in inv_mapping.items(): if len(path) > 1: directed_cycle = list(ut.itertwo(path, wrap=True)) uncondensed_G_tr.add_edges_from(directed_cycle) G_tr = uncondensed_G_tr else: def make_adj_matrix(G): edges = list(G.edges()) edge2_idx = ut.partial(ut.dict_take, node2_idx) uv_list = ut.lmap(edge2_idx, edges) A = np.zeros((len(nodes), len(nodes))) A[tuple(np.array(uv_list).T)] = 1 return A G_ = nx.dag.transitive_closure(G) A = make_adj_matrix(G) B = make_adj_matrix(G_) #AB = A * B #AB = A.T.dot(B) AB = A.dot(B) #AB = A.dot(B.T) A_and_notAB = np.logical_and(A, np.logical_not(AB)) tr_uvs = np.where(A_and_notAB) #nodes = G.nodes() edges = list(zip(*ut.unflat_take(nodes, tr_uvs))) G_tr = G.__class__() G_tr.add_nodes_from(nodes) G_tr.add_edges_from(edges) if has_cycles: # Uncondense graph uncondensed_G_tr = G.__class__() mapping = G.graph['mapping'] uncondensed_G_tr.add_nodes_from(mapping.keys()) inv_mapping = ut.invert_dict(mapping, unique_vals=False) for u, v in G_tr.edges(): u_ = inv_mapping[u][0] v_ = inv_mapping[v][0] uncondensed_G_tr.add_edge(u_, v_) for key, path in inv_mapping.items(): if len(path) > 1: directed_cycle = list(ut.itertwo(path, wrap=True)) uncondensed_G_tr.add_edges_from(directed_cycle) G_tr = uncondensed_G_tr return G_tr
def reorder_annots(_annots, uri_order): intern_uris = get_annots_imgid(_annots) lookup = ut.make_index_lookup(intern_uris) _reordered = _annots.take(ut.take(lookup, uri_order)) return _reordered
def get_rowids(depc, tablename, root_rowids, config=None, ensure=True, eager=True, nInput=None, _debug=None, recompute=False, recompute_all=False): """ Returns the rowids of `tablename` that correspond to `root_rowids` using `config`. Ignore: tablename = 'nnindexer' multi_rowids = (1, 2, 3, 4, 5) root_rowids = [[multi_rowids]] import plottool as pt pt.ensureqt() from dtool.depcache_control import * # NOQA from dtool.example_depcache import testdata_depc depc = testdata_depc() exec(ut.execstr_funckw(depc.get_rowids), globals()) print(ut.depth_profile(root_rowids)) tablename = 'neighbs' table = depc[tablename] # NOQA import plottool as pt pt.ensureqt() _debug = depc._debug = True depc.get_rowids(tablename, root_rowids, config, _debug=_debug) pt.show_nx(depc.graph) for key, val in table.type_to_subgraph.items(): pt.show_nx(val) pt.set_title(key) CommandLine: python -m dtool.depcache_control --exec-get_rowids python -m dtool.depcache_control --dump-get_rowids python -m dtool.depcache_control --exec-get_rowids:0 GridParams: >>> param_grid = dict( >>> tablename=[ 'spam', 'neighbs'] # 'spam', 'multitest_score','keypoint'], >>> #tablename=['neighbs', 'keypoint', 'spam', 'multitest_score','keypoint'], >>> ) >>> flat_root_ids = [1, 2, 3] >>> combos = ut.all_dict_combinations(param_grid) >>> index = 0 >>> keys = 'tablename'.split(', ') >>> tablename, = ut.dict_take(combos[index], keys) Setup: >>> # DISABLE_GRID_DOCTEST >>> from dtool.depcache_control import * # NOQA >>> from dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> exec(ut.execstr_funckw(depc.get_rowids), globals()) >>> import plottool as pt >>> pt.ensureqt() >>> #pt.show_nx(depc.graph) GridExample0: >>> table = depc[tablename] # NOQA >>> flat_root_ids = [1, 2, 3] >>> root_rowids = [flat_root_ids for _ in table.input_order] >>> print('root_rowids = %r' % (root_rowids,)) >>> #root_rowids = [[flat_root_ids], [(flat_root_ids,)]] >>> #root_rowids = [list(zip(flat_root_ids)), (flat_root_ids,)] >>> _debug = True >>> depc.get_rowids(tablename, root_rowids, config, _debug=_debug) >>> for key, val in table.type_to_subgraph.items(): >>> pt.show_nx(val) >>> pt.set_title(key) Example1: >>> # ENABLE_DOCTEST >>> from dtool.depcache_control import * # NOQA >>> from dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> exec(ut.execstr_funckw(depc.get_rowids), globals()) >>> root_rowids = [1, 2, 3] >>> tablename = 'spam' >>> table = depc[tablename] >>> kp_rowids = depc.get_rowids(tablename, root_rowids) >>> #result = ('prop_list = %s' % (ut.repr2(prop_list),)) >>> #print(result) Example: >>> # ENABLE_DOCTEST >>> from dtool.depcache_control import * # NOQA >>> from dtool.example_depcache import testdata_depc >>> depc = testdata_depc() >>> exec(ut.execstr_funckw(depc.get_rowids), globals()) >>> flat_root_ids = [1, 2, 3] >>> kp_rowids = depc.get_rowids('keypoint', flat_root_ids) >>> root_rowids = [flat_root_ids] * 8 >>> _debug = True >>> tablename = 'nnindexer' >>> tablename = 'multitest_score' >>> table = depc[tablename] # NOQA >>> #result = ('prop_list = %s' % (ut.repr2(prop_list),)) >>> # print(result) """ _debug = depc._debug if _debug is None else _debug if _debug: print(' * root_rowids=%s' % (ut.trunc_repr(root_rowids),)) print(' * config = %r' % (config,)) table = depc[tablename] # NOQA INDEXER_VERSION = False if tablename == 'neighbor_index': """ python -m ibeis.core_annots --exec-compute_neighbor_index --show """ import utool utool.embed() if INDEXER_VERSION or tablename == 'neighbs': compute_order = table.compute_order depend_order = compute_order['depend_compute_ids'] input_order = compute_order['input_compute_ids'] if _debug: print(' * input_order = %s' % (ut.repr3(input_order, nl=1),)) print(' * depend_order = %s' % (ut.repr3(depend_order, nl=1),)) if len(input_order) > 1: assert ut.depth_atleast(root_rowids, 2), ( 'input_order = %r' % (input_order,)) with ut.Indenter('[GetRowID-%s]' % (tablename,), enabled=_debug): # New way to get rowids input_level = depend_order[0] mid_levels = depend_order[1:-1] output_level = depend_order[-1] # List that holds a mapping from input order to input "name" input_order_lookup = ut.make_index_lookup(input_order) # Dictionary that holds the rowids computed for each table # while tracing the dependencies. rowid_lookup = ut.odict([(key, ut.odict()) for key in input_order]) # Need to split each path into parts. # Each part represents another level of unflattening # (because root indicies are all flat) # Handle input level assert input_level[0] == depc.root for compute_id in input_order: # for name in input_names: argx = input_order_lookup[compute_id] rowid_lookup[compute_id] = root_rowids[argx] # HACK: Flatten to scalars # The inputs should just be given in the "correct" nesting. # TODO: determine what correct nesting is. for i in range(5): try: current = rowid_lookup[compute_id] rowid_lookup[compute_id] = ut.flatten(current) except Exception: pass level = 0 if _debug: print('input_order_lookup = %r' % (input_order_lookup,)) ut.printdict(rowid_lookup, 'rowid_lookup') def handle_level(compute_id, rowid_lookup, _recompute, level): print('+--- HANDLE LEVEL %d -------' % (level,)) tablekey = compute_id[0] input_suff = compute_id[1] config_ = depc._ensure_config(tablekey, config) table = depc[tablekey] lookupkeys = [(n, input_suff) for n in table.parent_id_tablenames] # ordering = ut.dict_take(input_order_lookup, input_names) # sortx = ut.argsort(ordering) # FIXME: get inputs for each table. # input_names = ut.take(input_names, sortx) # lookupkeys = list(ut.iprod(table.parent_id_tablenames, input_names)) # lookupkeys = list(zip(table.parent_id_tablenames, input_types)) if _debug: print('---- LOCALS ------') ut.print_locals(compute_id, tablekey, lookupkeys, table) print('L----------') # FIXME generalize _parent_ids = [rowid_lookup[tblkey] for tblkey in lookupkeys] if table.ismulti: parent_rowidsT = [[tuple(x)] for x in _parent_ids] else: parent_rowidsT = _parent_ids parent_rowidsT = np.broadcast_arrays(*parent_rowidsT) parent_rowids = list(zip(*parent_rowidsT)) # Probably not right for general multi-input import utool with utool.embed_on_exception_context: next_rowids = table.get_rowid( parent_rowids, config=config_, eager=eager, nInput=nInput, ensure=ensure, recompute=_recompute) rowid_lookup[compute_id] = next_rowids if _debug: ut.printdict(rowid_lookup, 'rowid_lookup') if _debug: print('L___ HANDLE LEVEL %d -------' % (level,)) return next_rowids # Handle mid levels _recompute = recompute_all for level, compute_id in enumerate(mid_levels, start=1): handle_level(compute_id, rowid_lookup, _recompute, level) level += 1 # Handel final (requested) level compute_id = output_level _recompute = recompute rowid_list = handle_level(compute_id, rowid_lookup, _recompute, level) else: with ut.Indenter('[GetRowID-%s]' % (tablename,), enabled=_debug): # TODO: Get nonself rowids first # THen get self rowids for debugging ease try: if False: recompute_ = recompute or recompute_all parent_rowids = depc._get_parent_input( tablename, root_rowids, config, ensure=True, _debug=None, recompute=False, recompute_all=False, eager=True, nInput=None) config_ = depc._ensure_config(tablename, config) #if onthefly: # pass table = depc[tablename] rowid_list = table.get_rowid( parent_rowids, config=config_, eager=eager, nInput=nInput, ensure=ensure, recompute=recompute_) else: # Compute everything from the root to the requested table rowid_dict = depc.get_all_descendant_rowids( tablename, root_rowids, config=config, ensure=ensure, eager=eager, nInput=nInput, recompute=recompute, recompute_all=recompute_all, _debug=ut.countdown_flag(_debug)) rowid_list = rowid_dict[tablename] except depcache_table.ExternalStorageException: print('EXTERNAL EXCEPTION One retry in get_rowids') rowid_dict = depc.get_all_descendant_rowids( tablename, root_rowids, config=config, ensure=ensure, eager=eager, nInput=nInput, recompute=recompute, recompute_all=recompute_all, _debug=ut.countdown_flag(_debug)) rowid_list = rowid_dict[tablename] if _debug: print(' * return rowid_list = %s' % (ut.trunc_repr(rowid_list),)) return rowid_list
def ensure_data(qreq_): """ >>> import wbia qreq_ = wbia.testdata_qreq_( defaultdb='Oxford', a='oxford', p='default:proot=smk,nAssign=1,num_words=64000,SV=False,can_match_sameimg=True,dim_size=None') """ logger.info('Ensure data for %s' % (qreq_, )) # qreq_.cachedir = ut.ensuredir((ibs.cachedir, 'smk')) qreq_.ensure_nids() def make_cacher(name, cfgstr=None): if cfgstr is None: cfgstr = ut.hashstr27(qreq_.get_cfgstr()) if False and ut.is_developer(): return ut.Cacher( fname=name + '_' + qreq_.ibs.get_dbname(), cfgstr=cfgstr, cache_dir=ut.ensuredir(ut.truepath('~/Desktop/smkcache')), ) else: wrp = ut.DynStruct() def ensure(func): return func() wrp.ensure = ensure return wrp import copy dconfig = copy.deepcopy(qreq_.qparams) qconfig = qreq_.qparams if qreq_.qparams['data_ma']: # Disable database-dise multi-assignment dconfig['nAssign'] = 1 wwm = qreq_.qparams['word_weight_method'] depc = qreq_.ibs.depc vocab_aids = qreq_.daids cheat = False if cheat: import wbia ut.cprint('CHEATING', 'red') vocab_aids = wbia.init.filter_annots.sample_annots_wrt_ref( qreq_.ibs, qreq_.daids, {'exclude_ref_contact': True}, qreq_.qaids, verbose=1, ) vocab_rowid = depc.get_rowids('vocab', (vocab_aids, ), config=dconfig, ensure=False)[0] assert vocab_rowid is not None depc = qreq_.ibs.depc dinva_pcfgstr = depc.stacked_config(None, 'inverted_agg_assign', config=dconfig).get_cfgstr() qinva_pcfgstr = depc.stacked_config(None, 'inverted_agg_assign', config=qconfig).get_cfgstr() dannot_vuuid = qreq_.ibs.get_annot_hashid_visual_uuid( qreq_.daids).strip('_') qannot_vuuid = qreq_.ibs.get_annot_hashid_visual_uuid( qreq_.qaids).strip('_') tannot_vuuid = dannot_vuuid dannot_suuid = qreq_.ibs.get_annot_hashid_semantic_uuid( qreq_.daids).strip('_') qannot_suuid = qreq_.ibs.get_annot_hashid_semantic_uuid( qreq_.qaids).strip('_') dinva_phashid = ut.hashstr27(dinva_pcfgstr + tannot_vuuid) qinva_phashid = ut.hashstr27(qinva_pcfgstr + tannot_vuuid) dinva_cfgstr = '_'.join([dannot_vuuid, dinva_phashid]) qinva_cfgstr = '_'.join([qannot_vuuid, qinva_phashid]) # vocab = inverted_index.new_load_vocab(ibs, qreq_.daids, config) dinva_cacher = make_cacher('inva', dinva_cfgstr) qinva_cacher = make_cacher('inva', qinva_cfgstr) dwwm_cacher = make_cacher('word_weight', wwm + dinva_cfgstr) gamma_phashid = ut.hashstr27(qreq_.get_pipe_cfgstr() + tannot_vuuid) dgamma_cfgstr = '_'.join([dannot_suuid, gamma_phashid]) qgamma_cfgstr = '_'.join([qannot_suuid, gamma_phashid]) dgamma_cacher = make_cacher('dgamma', cfgstr=dgamma_cfgstr) qgamma_cacher = make_cacher('qgamma', cfgstr=qgamma_cfgstr) dinva = dinva_cacher.ensure( lambda: inverted_index.InvertedAnnots.from_depc( depc, qreq_.daids, vocab_aids, dconfig)) qinva = qinva_cacher.ensure( lambda: inverted_index.InvertedAnnots.from_depc( depc, qreq_.qaids, vocab_aids, qconfig)) dinva.wx_to_aids = dinva.compute_inverted_list() wx_to_weight = dwwm_cacher.ensure( lambda: dinva.compute_word_weights(wwm)) dinva.wx_to_weight = wx_to_weight qinva.wx_to_weight = wx_to_weight thresh = qreq_.qparams['smk_thresh'] alpha = qreq_.qparams['smk_alpha'] dinva.gamma_list = dgamma_cacher.ensure( lambda: dinva.compute_gammas(alpha, thresh)) qinva.gamma_list = qgamma_cacher.ensure( lambda: qinva.compute_gammas(alpha, thresh)) qreq_.qinva = qinva qreq_.dinva = dinva logger.info('loading keypoints') if qreq_.qparams.sv_on: qreq_.data_kpts = qreq_.ibs.get_annot_kpts( qreq_.daids, config2_=qreq_.extern_data_config2) logger.info('building aid index') qreq_.daid_to_didx = ut.make_index_lookup(qreq_.daids)
def initialize_graph_and_model(infr): """ Unused in internal split stuff pt.qt4ensure() layout_info = pt.show_nx(graph, as_directed=False, fnum=1, layoutkw=dict(prog='neato'), use_image=True, verbose=0) ax = pt.gca() pt.zoom_factory() pt.interactions.PanEvents() """ #import networkx as nx #import itertools cm_list = infr.cm_list hack = True hack = False if hack: cm_list = cm_list[:10] qaid_list = [cm.qaid for cm in cm_list] daids_list = [cm.daid_list for cm in cm_list] unique_aids = sorted(ut.list_union(*daids_list + [qaid_list])) if hack: unique_aids = sorted(ut.isect(unique_aids, qaid_list)) aid2_aidx = ut.make_index_lookup(unique_aids) # Construct K-broken graph edges = [] edge_weights = [] #top = (infr.qreq_.qparams.K + 1) * 2 #top = (infr.qreq_.qparams.K) * 2 top = (infr.qreq_.qparams.K + 2) for count, cm in enumerate(cm_list): qidx = aid2_aidx[cm.qaid] score_list = cm.annot_score_list sortx = ut.argsort(score_list)[::-1] score_list = ut.take(score_list, sortx)[:top] daid_list = ut.take(cm.daid_list, sortx)[:top] for score, daid in zip(score_list, daid_list): if daid not in qaid_list: continue didx = aid2_aidx[daid] edge_weights.append(score) edges.append((qidx, didx)) # make symmetric directed_edges = dict(zip(edges, edge_weights)) # Find edges that point in both directions undirected_edges = {} for (u, v), w in directed_edges.items(): if (v, u) in undirected_edges: undirected_edges[(v, u)] += w undirected_edges[(v, u)] /= 2 else: undirected_edges[(u, v)] = w edges = list(undirected_edges.keys()) edge_weights = list(undirected_edges.values()) nodes = list(range(len(unique_aids))) nid_labeling = infr.qreq_.ibs.get_annot_nids(unique_aids) labeling = ut.rebase_labels(nid_labeling) import networkx as nx from ibeis.viz import viz_graph set_node_attrs = nx.set_node_attributes set_edge_attrs = nx.set_edge_attributes # Create match-based graph structure graph = nx.DiGraph() graph.add_nodes_from(nodes) graph.add_edges_from(edges) # Important properties nid_list = infr.qreq_.ibs.get_annot_nids(unique_aids) labeling = ut.rebase_labels(nid_list) set_node_attrs(graph, 'name_label', dict(zip(nodes, labeling))) set_edge_attrs(graph, 'weight', dict(zip(edges, edge_weights))) # Visualization properties import plottool as pt ax2_aid = ut.invert_dict(aid2_aidx) set_node_attrs(graph, 'aid', ax2_aid) viz_graph.ensure_node_images(infr.qreq_.ibs, graph) set_node_attrs(graph, 'framewidth', dict(zip(nodes, [3.0] * len(nodes)))) set_node_attrs(graph, 'framecolor', dict(zip(nodes, [pt.DARK_BLUE] * len(nodes)))) ut.color_nodes(graph, labelattr='name_label') edge_colors = pt.scores_to_color(np.array(edge_weights), cmap_='viridis') #import utool #utool.embed() #edge_colors = [pt.color_funcs.ensure_base255(color) for color in edge_colors] #print('edge_colors = %r' % (edge_colors,)) set_edge_attrs(graph, 'color', dict(zip(edges, edge_colors))) # Build inference model from ibeis.algo.hots import graph_iden #graph_iden.rrr() model = graph_iden.InfrModel(graph) #model = graph_iden.InfrModel(len(nodes), edges, edge_weights, labeling=labeling) infr.model = model