def _set_pos_redun_flag(infr, nid, flag): """ Flags or unflags an nid as positive redundant. """ was_pos_redun = nid in infr.pos_redun_nids if flag: if not was_pos_redun: infr.print('pos_redun flag=T nid=%r' % (nid,), 5) else: infr.print('pos_redun flag=T nid=%r (already done)' % (nid,), 6) infr.pos_redun_nids.add(nid) cc = infr.pos_graph.component(nid) infr.remove_internal_priority(cc) if infr.params['inference.update_attrs']: infr.set_edge_attrs( 'inferred_state', ub.dzip(nxu.edges_inside(infr.graph, cc), ['same']) ) else: if was_pos_redun: infr.print('pos_redun flag=F nid=%r' % (nid,), 5) else: infr.print('pos_redun flag=F nid=%r (already done)' % (nid,), 6) cc = infr.pos_graph.component(nid) infr.pos_redun_nids -= {nid} infr.reinstate_internal_priority(cc) if infr.params['inference.update_attrs']: infr.set_edge_attrs( 'inferred_state', ub.dzip(nxu.edges_inside(infr.graph, cc), [None]) )
def find_connecting_edges(infr): """ Searches for a small set of edges, which if reviewed as positive would ensure that each PCC is k-connected. Note that in somes cases this is not possible """ label = 'name_label' node_to_label = infr.get_node_attrs(label) label_to_nodes = ub.group_items(node_to_label.keys(), node_to_label.values()) # k = infr.params['redun.pos'] k = 1 new_edges = [] prog = ub.ProgIter(list(label_to_nodes.keys()), desc='finding connecting edges', enabled=infr.verbose > 0) for nid in prog: nodes = set(label_to_nodes[nid]) G = infr.pos_graph.subgraph(nodes, dynamic=False) impossible = nxu.edges_inside(infr.neg_graph, nodes) impossible |= nxu.edges_inside(infr.incomp_graph, nodes) candidates = set(nx.complement(G).edges()) candidates.difference_update(impossible) aug_edges = nxu.k_edge_augmentation(G, k=k, avail=candidates) new_edges += aug_edges prog.ensure_newline() return new_edges
def _check_inconsistency(infr, nid, cc=None): """ Check if a PCC contains an error """ if cc is None: cc = infr.pos_graph.component(nid) was_clean = infr._purge_error_edges(nid) neg_edges = list(nxu.edges_inside(infr.neg_graph, cc)) if neg_edges: pos_subgraph_ = infr.pos_graph.subgraph(cc, dynamic=False).copy() if not nx.is_connected(pos_subgraph_): print('cc = %r' % (cc, )) print('pos_subgraph_ = %r' % (pos_subgraph_, )) raise AssertionError('must be connected') hypothesis = dict(infr.hypothesis_errors(pos_subgraph_, neg_edges)) assert len(hypothesis) > 0, 'must have at least one' infr._set_error_edges(nid, set(hypothesis.keys())) is_clean = False else: infr.recover_graph.remove_nodes_from(cc) num = infr.recover_graph.number_of_components() # num = len(list(nx.connected_components(infr.recover_graph))) msg = ('An inconsistent PCC recovered, ' '{} inconsistent PCC(s) remain').format(num) infr.print(msg, 2, color='green') infr.update_pos_redun(nid, force=True) infr.update_extern_neg_redun(nid, force=True) is_clean = True return (was_clean, is_clean)
def subgraph(self, nbunch, dynamic=False): if dynamic is False: H = nx.Graph() nbunch = set(nbunch) H.add_nodes_from(nbunch) H.add_edges_from(nxu.edges_inside(self, nbunch)) else: H = super(DynConnGraph, self).subgraph(nbunch) for n in nbunch: # need to add individual nodes H._add_node(n) # Recreate the connected compoment structure for u, v in H.edges(): H._union(u, v) return H
def _cut(self, u, v): """ Decremental connectivity (slow) """ old_nid1 = self._union_find[u] old_nid2 = self._union_find[v] if old_nid1 != old_nid2: return # Need to break appart entire component and then reconstruct it old_cc = self._ccs[old_nid1] del self._ccs[old_nid1] self._union_find.remove_entire_cc(old_cc) # Might be faster to just do DFS to find the CC internal_edges = nxu.edges_inside(self, old_cc) # Add nodes in case there are no edges to it for n in old_cc: self._add_node(n) for edge in internal_edges: self._union(*edge)
def find_pos_augment_edges(infr, pcc, k=None): """ # [[1, 0], [0, 2], [1, 2], [3, 1]] pos_sub = nx.Graph([[0, 1], [1, 2], [0, 2], [1, 3]]) """ if k is None: pos_k = infr.params['redun.pos'] else: pos_k = k pos_sub = infr.pos_graph.subgraph(pcc) # TODO: # weight by pairs most likely to be comparable # First try to augment only with unreviewed existing edges unrev_avail = list(nxu.edges_inside(infr.unreviewed_graph, pcc)) try: check_edges = list( nxu.k_edge_augmentation(pos_sub, k=pos_k, avail=unrev_avail, partial=False)) except nx.NetworkXUnfeasible: check_edges = None if not check_edges: # Allow new edges to be introduced full_sub = infr.graph.subgraph(pcc).copy() new_avail = util.estarmap(infr.e_, nx.complement(full_sub).edges()) full_avail = unrev_avail + new_avail n_max = (len(pos_sub) * (len(pos_sub) - 1)) // 2 n_complement = n_max - pos_sub.number_of_edges() if len(full_avail) == n_complement: # can use the faster algorithm check_edges = list( nxu.k_edge_augmentation(pos_sub, k=pos_k, partial=True)) else: # have to use the slow approximate algo check_edges = list( nxu.k_edge_augmentation(pos_sub, k=pos_k, avail=full_avail, partial=True)) check_edges = set(it.starmap(e_, check_edges)) return check_edges
def is_pos_redundant(infr, cc, k=None, relax=None, assume_connected=False): """ Tests if a group of nodes is positive redundant. (ie. if the group is k-edge-connected) CommandLine: python -m graphid.core.mixin_dynamic _RedundancyComputers.is_pos_redundant Example: >>> from graphid import demo >>> infr = demo.demodata_infr(ccs=[(1, 2, 3)], pos_redun=1) >>> cc = infr.pos_graph.connected_to(1) >>> flag1 = infr.is_pos_redundant(cc) >>> infr.add_feedback((1, 3), POSTV) >>> flag2 = infr.is_pos_redundant(cc, k=2) >>> flags = [flag1, flag2] >>> print('flags = %r' % (flags,)) flags = [False, True] >>> # xdoc: +REQUIRES(--show) >>> from graphid import util >>> infr.show() >>> util.show_if_requested() """ if k is None: k = infr.params['redun.pos'] if assume_connected and k == 1: return True # assumes cc is connected if relax is None: relax = True pos_subgraph = infr.pos_graph.subgraph(cc, dynamic=False) if relax: # If we cannot add any more edges to the subgraph then we consider # it positive redundant. n_incomp = sum(1 for _ in nxu.edges_inside(infr.incomp_graph, cc)) n_pos = pos_subgraph.number_of_edges() n_nodes = pos_subgraph.number_of_nodes() n_max = (n_nodes * (n_nodes - 1)) // 2 if n_max == (n_pos + n_incomp): return True # In all other cases test edge-connectivity return nxu.is_k_edge_connected(pos_subgraph, k=k)
def find_mst_edges(infr, label='name_label'): """ Returns edges to augment existing PCCs (by label) in order to ensure they are connected with positive edges. Example: >>> # DISABLE_DOCTEST >>> from graphid.core.mixin_helpers import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='PZ_MTEST') >>> infr = ibeis.AnnotInference(ibs, 'all', autoinit=True) >>> label = 'orig_name_label' >>> label = 'name_label' >>> infr.find_mst_edges() >>> infr.ensure_mst() Ignore: old_mst_edges = [ e for e, d in infr.edges(data=True) if d.get('user_id', None) == 'algo:mst' ] infr.graph.remove_edges_from(old_mst_edges) infr.pos_graph.remove_edges_from(old_mst_edges) infr.neg_graph.remove_edges_from(old_mst_edges) infr.incomp_graph.remove_edges_from(old_mst_edges) """ # Find clusters by labels node_to_label = infr.get_node_attrs(label) label_to_nodes = ub.group_items(node_to_label.keys(), node_to_label.values()) weight_heuristic = False # infr.ibs is not None if weight_heuristic: annots = infr.ibs.annots(infr.aids) node_to_time = ub.dzip(annots, annots.time) node_to_view = ub.dzip(annots, annots.viewpoint_code) enabled_heuristics = { 'view_weight', 'time_weight', } def _heuristic_weighting(nodes, avail_uv): avail_uv = np.array(avail_uv) weights = np.ones(len(avail_uv)) if 'view_weight' in enabled_heuristics: from graphid.core import _rhomb_dist view_edge = [(node_to_view[u], node_to_view[v]) for (u, v) in avail_uv] view_weight = np.array([ _rhomb_dist.VIEW_CODE_DIST[(v1, v2)] for (v1, v2) in view_edge ]) # Assume comparable by default and prefer undefined # more than probably not, but less than definately so. view_weight[np.isnan(view_weight)] = 1.5 # Prefer viewpoint 10x more than time weights += 10 * view_weight if 'time_weight' in enabled_heuristics: # Prefer linking annotations closer in time times = list(ub.take(node_to_time, nodes)) maxtime = util.safe_max(times, fill=1, nans=False) mintime = util.safe_min(times, fill=0, nans=False) time_denom = maxtime - mintime # Try linking by time for lynx data time_delta = np.array([ abs(node_to_time[u] - node_to_time[v]) for u, v in avail_uv ]) time_weight = time_delta / time_denom weights += time_weight weights = np.array(weights) weights[np.isnan(weights)] = 1.0 avail = [(u, v, { 'weight': w }) for (u, v), w in zip(avail_uv, weights)] return avail new_edges = [] prog = ub.ProgIter(list(label_to_nodes.keys()), desc='finding mst edges', enabled=infr.verbose > 0) for nid in prog: nodes = set(label_to_nodes[nid]) if len(nodes) == 1: continue # We want to make this CC connected pos_sub = infr.pos_graph.subgraph(nodes, dynamic=False) impossible = set( it.starmap( e_, it.chain( nxu.edges_inside(infr.neg_graph, nodes), nxu.edges_inside(infr.incomp_graph, nodes), # nxu.edges_inside(infr.unknown_graph, nodes), ))) if len(impossible) == 0 and not weight_heuristic: # Simple mst augmentation aug_edges = list(nxu.k_edge_augmentation(pos_sub, k=1)) else: complement = it.starmap(e_, nxu.complement_edges(pos_sub)) avail_uv = [(u, v) for u, v in complement if (u, v) not in impossible] if weight_heuristic: # Can do heuristic weighting to improve the MST avail = _heuristic_weighting(nodes, avail_uv) else: avail = avail_uv # print(len(pos_sub)) try: aug_edges = list( nxu.k_edge_augmentation(pos_sub, k=1, avail=avail)) except nx.NetworkXUnfeasible: print('Warning: MST augmentation is not feasible') print('explicit negative edges might disconnect a PCC') aug_edges = list( nxu.k_edge_augmentation(pos_sub, k=1, avail=avail, partial=True)) new_edges.extend(aug_edges) prog.ensure_newline() for edge in new_edges: assert not infr.graph.has_edge(*edge), ( 'alrady have edge={}'.format(edge)) return new_edges
def reinstate_internal_priority(infr, cc): if infr.queue is not None: # Reinstate the appropriate edges into the queue edges = nxu.edges_inside(infr.unreviewed_graph, cc) infr._reinstate_edge_priority(edges)
def remove_internal_priority(infr, cc): if infr.queue is not None: infr._remove_edge_priority(nxu.edges_inside(infr.graph, cc))
def apply_nondynamic_update(infr, graph=None): """ Recomputes all dynamic bookkeeping for a graph in any state. This ensures that subsequent dyanmic inference can be applied. Example: >>> from graphid import demo >>> num_pccs = 250 >>> kwargs = dict(num_pccs=100, p_incon=.3) >>> infr = demo.demodata_infr(infer=False, **kwargs) >>> graph = None >>> infr.apply_nondynamic_update() >>> infr.assert_neg_metagraph() """ # Cluster edges by category ne_to_edges = infr.collapsed_meta_edges() categories = infr.categorize_edges(graph, ne_to_edges) infr.set_edge_attrs( 'inferred_state', ub.dzip(ub.flatten(categories[POSTV].values()), ['same'])) infr.set_edge_attrs( 'inferred_state', ub.dzip(ub.flatten(categories[NEGTV].values()), ['diff'])) infr.set_edge_attrs( 'inferred_state', ub.dzip(ub.flatten(categories[INCMP].values()), [INCMP])) infr.set_edge_attrs( 'inferred_state', ub.dzip(ub.flatten(categories[UNKWN].values()), [UNKWN])) infr.set_edge_attrs( 'inferred_state', ub.dzip(ub.flatten(categories[UNREV].values()), [None])) infr.set_edge_attrs( 'inferred_state', ub.dzip(ub.flatten(categories['inconsistent_internal'].values()), ['inconsistent_internal'])) infr.set_edge_attrs( 'inferred_state', ub.dzip(ub.flatten(categories['inconsistent_external'].values()), ['inconsistent_external'])) # Ensure bookkeeping is taken care of # * positive redundancy # * negative redundancy # * inconsistency infr.pos_redun_nids = set(infr.find_pos_redun_nids()) infr.neg_redun_metagraph = infr._graph_cls( list(infr.find_neg_redun_nids())) # make a node for each PCC, and place an edge between any pccs with at # least one negative edge, with weight being the number of negative # edges. Self loops indicate inconsistency. infr.neg_metagraph = infr._graph_cls() infr.neg_metagraph.add_nodes_from(infr.pos_graph.component_labels()) for (nid1, nid2), edges in ne_to_edges[NEGTV].items(): infr.neg_metagraph.add_edge(nid1, nid2, weight=len(edges)) infr.recover_graph.clear() nid_to_errors = {} for nid, intern_edges in categories['inconsistent_internal'].items(): cc = infr.pos_graph.component_nodes(nid) pos_subgraph = infr.pos_graph.subgraph(cc, dynamic=False).copy() neg_edges = list(nxu.edges_inside(infr.neg_graph, cc)) recover_hypothesis = dict( infr.hypothesis_errors(pos_subgraph, neg_edges)) nid_to_errors[nid] = set(recover_hypothesis.keys()) infr.recover_graph.add_edges_from(pos_subgraph.edges()) # Delete old hypothesis infr.set_edge_attrs( 'maybe_error', ub.dzip(ub.flatten(infr.nid_to_errors.values()), [None])) # Set new hypothesis infr.set_edge_attrs( 'maybe_error', ub.dzip(ub.flatten(nid_to_errors.values()), [True])) infr.nid_to_errors = nid_to_errors # no longer dirty if graph is None: infr.dirty = False
def _positive_decision(infr, edge): """ Logic for a dynamic positive decision. A positive decision is evidence that two annots should be in the same PCC Note, this could be an incomparable edge, but with a meta_decision of same. Ignore: >>> from graphid import demo >>> kwargs = dict(num_pccs=3, p_incon=0, size=100) >>> infr = demo.demodata_infr(infer=False, **kwargs) >>> infr.apply_nondynamic_update() >>> cc1 = next(infr.positive_components()) %timeit list(infr.pos_graph.subgraph(cc1, dynamic=True).edges()) %timeit list(infr.pos_graph.subgraph(cc1, dynamic=False).edges()) %timeit list(nxu.edges_inside(infr.pos_graph, cc1)) """ decision = POSTV nid1, nid2 = infr.pos_graph.node_labels(*edge) incon1, incon2 = infr.recover_graph.has_nodes(edge) all_consistent = not (incon1 or incon2) was_within = nid1 == nid2 print_ = partial(infr.print, level=4) prev_decision = infr._get_current_decision(edge) if was_within: infr._add_review_edge(edge, decision) if all_consistent: print_('pos-within-clean') infr.update_pos_redun(nid1, may_remove=False) else: print_('pos-within-dirty') infr._check_inconsistency(nid1) action = infr.on_within(edge, decision, prev_decision, nid1, None) else: # print_('Merge case') cc1 = infr.pos_graph.component(nid1) cc2 = infr.pos_graph.component(nid2) if not all_consistent: # We are merging PCCs that are not all consistent # This will keep us in a dirty state. print_('pos-between-dirty-merge') if not incon1: recover_edges = list(nxu.edges_inside(infr.pos_graph, cc1)) else: recover_edges = list(nxu.edges_inside(infr.pos_graph, cc2)) infr.recover_graph.add_edges_from(recover_edges) infr._purge_redun_flags(nid1) infr._purge_redun_flags(nid2) infr._add_review_edge(edge, decision) infr.recover_graph.add_edge(*edge) new_nid = infr.pos_graph.node_label(edge[0]) # purge and re-add the inconsistency # (Note: the following three lines were added to fix # a neg_meta_graph test, and may not be the best way to do it) infr._purge_error_edges(nid1) infr._purge_error_edges(nid2) infr._new_inconsistency(new_nid) elif any(nxu.edges_cross(infr.neg_graph, cc1, cc2)): # There are negative edges bridging these PCCS # this will put the graph into a dirty (inconsistent) state. print_('pos-between-clean-merge-dirty') infr._purge_redun_flags(nid1) infr._purge_redun_flags(nid2) infr._add_review_edge(edge, decision) new_nid = infr.pos_graph.node_label(edge[0]) infr._new_inconsistency(new_nid) else: # We are merging two clean PCCs, everything is good print_('pos-between-clean-merge-clean') infr._purge_redun_flags(nid1) infr._purge_redun_flags(nid2) infr._add_review_edge(edge, decision) new_nid = infr.pos_graph.node_label(edge[0]) infr.update_extern_neg_redun(new_nid, may_remove=False) infr.update_pos_redun(new_nid, may_remove=False) action = infr.on_between(edge, decision, prev_decision, nid1, nid2, merge_nid=new_nid) return action