def test_neg_metagraph_split_and_merge(): """ Test that the negative metagraph tracks the number of negative edges between PCCs through label-changing split and merge operations """ # Create a graph with 4 CCs, with 3-pos-redun, and no negative edges infr = demo.demodata_infr(num_pccs=4, pcc_size=5, pos_redun=3, ignore_pair=True, infer=True) cc_a, cc_b, cc_c, cc_d = infr.positive_components() a1, a2, a3, a4, a5 = cc_a b1, b2, b3, b4, b5 = cc_b c1, c2, c3, c4, c5 = cc_c d1, d2, d3, d4, d5 = cc_d nmg = infr.neg_metagraph # Add three negative edges between a and b # one between (a, c), (b, d), (a, d), and (c, d) A, B, C, D = infr.node_labels(a1, b1, c1, d1) infr.add_feedback((a1, b1), NEGTV) infr.add_feedback((a2, b2), NEGTV) infr.add_feedback((a3, b3), NEGTV) infr.add_feedback((a4, c4), NEGTV) infr.add_feedback((b4, d4), NEGTV) infr.add_feedback((c1, d1), NEGTV) infr.add_feedback((a4, d4), NEGTV) assert nmg.edges[(A, B)]['weight'] == 3 assert nmg.edges[(A, C)]['weight'] == 1 assert (B, C) not in nmg.edges assert nmg.edges[(A, D)]['weight'] == 1 assert nmg.edges[(B, D)]['weight'] == 1 assert nmg.number_of_edges() == 5 assert nmg.number_of_nodes() == 4 # merge A and B infr.add_feedback((a1, b1), POSTV) assert nmg.number_of_edges() == 4 assert nmg.number_of_nodes() == 3 AB = infr.node_label(a1) assert nmg.edges[(AB, AB)]['weight'] == 2 # split A and B # the number of nodes should increase, but the edges should stay the # same because we added an incmp edge infr.add_feedback((a1, b1), INCMP) assert nmg.number_of_edges() == 5 assert nmg.number_of_nodes() == 4 assert nmg.edges[(A, B)]['weight'] == 2 infr.assert_neg_metagraph() # remove all positive edges for edge in list(infr.pos_graph.edges()): infr.add_feedback(edge, INCMP) # metagraph should not be isomorphic to infr.neg_graph assert nmg.number_of_nodes() == infr.neg_graph.number_of_nodes() assert nmg.number_of_edges() == infr.neg_graph.number_of_edges() infr.assert_neg_metagraph()
def test_neg_metagraph_simple_add_remove(): """ Test that the negative metagraph tracks the number of negative edges between PCCs through non-label-changing operations """ # Create a graph with 5-sized CCs, with 3-pos-redun, and no negative edges infr = demo.demodata_infr(num_pccs=4, pcc_size=5, pos_redun=3, ignore_pair=True, infer=True) cc_a, cc_b, cc_c, cc_d = infr.positive_components() a1, a2, a3, a4, a5 = cc_a b1, b2, b3, b4, b5 = cc_b nmg = infr.neg_metagraph # Check there are 4 meta-nodes and no edges assert nmg.number_of_edges() == 0 assert nmg.number_of_nodes() == 4 # Should add 1 edge to the negative metagraph u, v = a1, b1 infr.add_feedback((u, v), NEGTV) nid1, nid2 = infr.node_labels(u, v) assert nmg.edges[nid1, nid2]['weight'] == 1 assert nmg.number_of_edges() == 1 assert nmg.number_of_nodes() == 4 # Adding a second time should do nothing edge = a1, b1 infr.add_feedback(edge, NEGTV) name_edge = infr.node_labels(*edge) assert nmg.edges[name_edge]['weight'] == 1 assert nmg.number_of_edges() == 1 assert nmg.number_of_nodes() == 4 # But adding a second between different nodes will increase the weight edge = a1, b2 infr.add_feedback(edge, NEGTV) name_edge = infr.node_labels(*edge) assert nmg.edges[name_edge]['weight'] == 2 assert nmg.number_of_edges() == 1 assert nmg.number_of_nodes() == 4 infr.add_feedback((u, v), NEGTV) assert nmg.edges[name_edge]['weight'] == 2 # Removing or relabeling the edge will decrease the weight infr.add_feedback((a1, b2), INCMP) assert nmg.edges[name_edge]['weight'] == 1 # And removing all will remove the negative edge infr.add_feedback((a1, b1), INCMP) assert not nmg.has_edge(*name_edge) infr.assert_neg_metagraph()
def test_neg_metagraph_split_incomp(): infr = demo.demodata_infr(num_pccs=4, pcc_size=5, pos_redun=3, ignore_pair=True, infer=True) nmg = infr.neg_metagraph assert nmg.number_of_nodes() < infr.neg_graph.number_of_nodes() assert nmg.number_of_edges() == 0 # remove all positive edges for edge in list(infr.pos_graph.edges()): infr.add_feedback(edge, INCMP) # metagraph should not be isomorphic to infr.neg_graph assert nmg.number_of_nodes() == infr.neg_graph.number_of_nodes() assert nmg.number_of_edges() == 0 infr.assert_neg_metagraph()
def test_pos_neg(): """ python ~/code/graphid/tests/test_graph_iden.py test_pos_neg """ infr = demo.demodata_infr(num_pccs=0) # Make 3 inconsistent CCs infr.add_feedback((1, 2), POSTV) infr.add_feedback((2, 3), POSTV) infr.add_feedback((3, 4), POSTV) infr.add_feedback((4, 1), POSTV) infr.add_feedback((1, 3), NEGTV) # ----- infr.add_feedback((11, 12), POSTV) infr.add_feedback((12, 13), POSTV) infr.add_feedback((13, 11), NEGTV) # ----- infr.add_feedback((21, 22), POSTV) infr.add_feedback((22, 23), POSTV) infr.add_feedback((23, 21), NEGTV) # ----- # Fix inconsistency infr.add_feedback((23, 21), POSTV) # Merge inconsistent CCS infr.add_feedback((1, 11), POSTV) # Negative edge within an inconsistent CC infr.add_feedback((2, 13), NEGTV) # Negative edge external to an inconsistent CC infr.add_feedback((12, 21), NEGTV) # ----- # Make inconsistency from positive infr.add_feedback((31, 32), POSTV) infr.add_feedback((33, 34), POSTV) infr.add_feedback((31, 33), NEGTV) infr.add_feedback((32, 34), NEGTV) infr.add_feedback((31, 34), POSTV) # Fix everything infr.add_feedback((1, 3), POSTV) infr.add_feedback((2, 4), POSTV) infr.add_feedback((32, 34), POSTV) infr.add_feedback((31, 33), POSTV) infr.add_feedback((13, 11), POSTV) infr.add_feedback((23, 21), POSTV) infr.add_feedback((1, 11), NEGTV) print('Final state:') print(ub.repr2(sorted(infr.gen_edge_attrs('decision'))))
def test_neg_metagraph_split_neg(): """ Test that the negative metagraph tracks the number of negative edges between PCCs through label-changing split operations """ # Create a graph with 4 CCs, with 3-pos-redun, and no negative edges infr = demo.demodata_infr(num_pccs=4, pcc_size=5, pos_redun=3, ignore_pair=True, infer=True) nmg = infr.neg_metagraph assert nmg.number_of_nodes() != infr.neg_graph.number_of_nodes() assert nmg.number_of_edges() == 0 # remove all positive edges for edge in list(infr.pos_graph.edges()): infr.add_feedback(edge, NEGTV) # metagraph should not be isomorphic to infr.neg_graph assert nmg.number_of_nodes() == infr.neg_graph.number_of_nodes() assert nmg.number_of_edges() > 0 assert nmg.number_of_edges() == infr.neg_graph.number_of_edges() infr.assert_neg_metagraph()
def test_incomp_inference(): infr = demo.demodata_infr(num_pccs=0) # Make 2 consistent and 2 inconsistent CCs infr.add_feedback((1, 2), POSTV) infr.add_feedback((2, 3), POSTV) infr.add_feedback((3, 4), POSTV) infr.add_feedback((4, 1), POSTV) # ----- infr.add_feedback((11, 12), POSTV) infr.add_feedback((12, 13), POSTV) infr.add_feedback((13, 14), POSTV) infr.add_feedback((14, 11), POSTV) infr.add_feedback((12, 14), NEGTV) # ----- infr.add_feedback((21, 22), POSTV) infr.add_feedback((22, 23), POSTV) infr.add_feedback((23, 21), NEGTV) # ----- infr.add_feedback((31, 32), POSTV) infr.add_feedback((32, 33), POSTV) infr.add_feedback((33, 31), POSTV) infr.add_feedback((2, 32), NEGTV) infr.add_feedback((3, 33), NEGTV) infr.add_feedback((12, 21), NEGTV) # ----- # Incomparable within CCs print('==========================') infr.add_feedback((1, 3), INCMP) infr.add_feedback((1, 4), INCMP) infr.add_feedback((1, 2), INCMP) infr.add_feedback((11, 13), INCMP) infr.add_feedback((11, 14), INCMP) infr.add_feedback((11, 12), INCMP) infr.add_feedback((1, 31), INCMP) infr.add_feedback((2, 32), INCMP) infr.add_feedback((12, 21), INCMP) infr.add_feedback((23, 21), INCMP) infr.add_feedback((12, 14), INCMP) print('Final state:') print(ub.repr2(sorted(infr.gen_edge_attrs('decision'))))
def run_demo(): """ CommandLine: python -m graphid.demo.demo_script run_demo --viz python -m graphid.demo.demo_script run_demo Example: >>> run_demo() """ from graphid import demo import matplotlib as mpl TMP_RC = { 'axes.titlesize': 12, 'axes.labelsize': int(ub.argval('--labelsize', default=8)), 'font.family': 'sans-serif', 'font.serif': 'CMU Serif', 'font.sans-serif': 'CMU Sans Serif', 'font.monospace': 'CMU Typewriter Text', 'xtick.labelsize': 12, 'ytick.labelsize': 12, # 'legend.alpha': .8, 'legend.fontsize': 12, 'legend.facecolor': 'w', } mpl.rcParams.update(TMP_RC) # ---- Synthetic data params params = { 'redun.pos': 2, 'redun.neg': 2, } # oracle_accuracy = .98 # oracle_accuracy = .90 # oracle_accuracy = (.8, 1.0) oracle_accuracy = (.85, 1.0) # oracle_accuracy = 1.0 # --- draw params VISUALIZE = ub.argflag('--viz') # QUIT_OR_EMEBED = 'embed' QUIT_OR_EMEBED = 'quit' def asint(p): return p if p is None else int(p) TARGET_REVIEW = asint(ub.argval('--target', default=None)) START = asint(ub.argval('--start', default=None)) END = asint(ub.argval('--end', default=None)) # ------------------ # rng = np.random.RandomState(42) # infr = demo.demodata_infr(num_pccs=4, size=3, size_std=1, p_incon=0) # infr = demo.demodata_infr(num_pccs=6, size=7, size_std=1, p_incon=0) # infr = demo.demodata_infr(num_pccs=3, size=5, size_std=.2, p_incon=0) infr = demo.demodata_infr(pcc_sizes=[5, 2, 4]) infr.verbose = 100 infr.ensure_cliques() infr.ensure_full() # Dummy scoring infr.init_simulation(oracle_accuracy=oracle_accuracy, name='run_demo') # infr_gt = infr.copy() dpath = ub.ensuredir(ub.truepath('~/Desktop/demo')) if 0: ub.delete(dpath) ub.ensuredir(dpath) fig_counter = it.count(0) def show_graph(infr, title, final=False, selected_edges=None): from matplotlib import pyplot as plt if not VISUALIZE: return # TODO: rich colored text? latest = '\n'.join(infr.latest_logs()) showkw = dict( # fontsize=infr.graph.graph['fontsize'], # fontname=infr.graph.graph['fontname'], show_unreviewed_edges=True, show_inferred_same=False, show_inferred_diff=False, outof=(len(infr.aids)), # show_inferred_same=True, # show_inferred_diff=True, selected_edges=selected_edges, show_labels=True, simple_labels=True, # show_recent_review=not final, show_recent_review=False, # splines=infr.graph.graph['splines'], reposition=False, # with_colorbar=True ) verbose = infr.verbose infr.verbose = 0 infr_ = infr.copy() infr_ = infr infr_.verbose = verbose infr_.show(pickable=True, verbose=0, **showkw) infr.verbose = verbose # print('status ' + ub.repr2(infr_.status())) # infr.show(**showkw) ax = plt.gca() ax.set_title(title, fontsize=20) fig = plt.gcf() # fontsize = 22 fontsize = 12 if True: # postprocess xlabel lines = [] for line in latest.split('\n'): if False and line.startswith('ORACLE ERROR'): lines += ['ORACLE ERROR'] else: lines += [line] latest = '\n'.join(lines) if len(lines) > 10: fontsize = 16 if len(lines) > 12: fontsize = 14 if len(lines) > 14: fontsize = 12 if len(lines) > 18: fontsize = 10 if len(lines) > 23: fontsize = 8 if True: util.mplutil.adjust_subplots(top=.95, left=0, right=1, bottom=.45, fig=fig) ax.set_xlabel('\n' + latest) xlabel = ax.get_xaxis().get_label() xlabel.set_horizontalalignment('left') # xlabel.set_x(.025) # xlabel.set_x(-.6) xlabel.set_x(-2.0) # xlabel.set_fontname('CMU Typewriter Text') xlabel.set_fontname('Inconsolata') xlabel.set_fontsize(fontsize) ax.set_aspect('equal') # ax.xaxis.label.set_color('red') fpath = join(dpath, 'demo_{:04d}.png'.format(next(fig_counter))) fig.savefig( fpath, dpi=300, # transparent=True, edgecolor='none') # pt.save_figure(dpath=dpath, dpi=300) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs(groupby='name_label') infr.set_node_attrs('pin', 'true') node_dict = infr.graph.nodes print(ub.repr2(node_dict[1])) if VISUALIZE: infr.latest_logs() # Pin Nodes into the target groundtruth position show_graph(infr, 'target-gt') print(ub.repr2(infr.status())) infr.clear_feedback() infr.clear_name_labels() infr.clear_edges() print(ub.repr2(infr.status())) infr.latest_logs() if VISUALIZE: infr.update_visual_attrs() infr.prioritize('prob_match') if VISUALIZE or TARGET_REVIEW is None or TARGET_REVIEW == 0: show_graph(infr, 'initial state') def on_new_candidate_edges(infr, edges): # hack updateing visual attrs as a callback if VISUALIZE: infr.update_visual_attrs() infr.on_new_candidate_edges = on_new_candidate_edges infr.params.update(**params) infr.refresh_candidate_edges() VIZ_ALL = (VISUALIZE and TARGET_REVIEW is None and START is None) print('VIZ_ALL = %r' % (VIZ_ALL, )) if VIZ_ALL or TARGET_REVIEW == 0: show_graph(infr, 'find-candidates') # _iter2 = enumerate(infr.generate_reviews(**params)) # _iter2 = list(_iter2) # assert len(_iter2) > 0 # prog = ub.ProgIter(_iter2, label='run_demo', bs=False, adjust=False, # enabled=False) count = 1 first = 1 for edge, priority in infr._generate_reviews(data=True): msg = 'review #%d, priority=%.3f' % (count, priority) print('\n----------') infr.print('pop edge {} with priority={:.3f}'.format(edge, priority)) # print('remaining_reviews = %r' % (infr.remaining_reviews()),) # Make the next review if START is not None: VIZ_ALL = count >= START if END is not None and count >= END: break infr.print(msg) if ub.allsame(infr.pos_graph.node_labels(*edge)) and first: # Have oracle make a mistake early feedback = infr.request_oracle_review(edge, accuracy=0) first -= 1 else: feedback = infr.request_oracle_review(edge) AT_TARGET = TARGET_REVIEW is not None and count >= TARGET_REVIEW - 1 SHOW_CANDIATE_POP = True if SHOW_CANDIATE_POP and (VIZ_ALL or AT_TARGET): infr.print( ub.repr2(infr.task_probs['match_state'][edge], precision=4, si=True)) infr.print('len(queue) = %r' % (len(infr.queue))) # Show edge selection infr.print('Oracle will predict: ' + feedback['evidence_decision']) show_graph(infr, 'pre' + msg, selected_edges=[edge]) if count == TARGET_REVIEW: infr.EMBEDME = QUIT_OR_EMEBED == 'embed' infr.add_feedback(edge, **feedback) infr.print('len(queue) = %r' % (len(infr.queue))) # infr.apply_nondynamic_update() # Show the result if VIZ_ALL or AT_TARGET: show_graph(infr, msg) # import sys # sys.exit(1) if count == TARGET_REVIEW: break count += 1 infr.print('status = ' + ub.repr2(infr.status(extended=False))) show_graph(infr, 'post-review (#reviews={})'.format(count), final=True) if VISUALIZE: if not getattr(infr, 'EMBEDME', False): # import plottool as pt # util.mplutil.all_figures_tile() util.mplutil.show_if_requested()
def test_neg_metagraph_merge(): """ Test that the negative metagraph tracks the number of negative edges between PCCs through label-changing merge operations python ~/code/graphid/tests/test_neg_metagraph.py test_neg_metagraph_merge """ # Create a graph with 4 CCs, with 3-pos-redun, and no negative edges infr = demo.demodata_infr(num_pccs=4, pcc_size=5, pos_redun=3, ignore_pair=True, infer=True) infr.verbose = 1000 cc_a, cc_b, cc_c, cc_d = infr.positive_components() a1, a2, a3, a4, a5 = cc_a b1, b2, b3, b4, b5 = cc_b c1, c2, c3, c4, c5 = cc_c d1, d2, d3, d4, d5 = cc_d nmg = infr.neg_metagraph # The initial negative metagraph has 4 nodes representing the PCCs # and no edges becase we have not added any negative feedback assert nmg.number_of_nodes() == 4 assert nmg.number_of_edges() == 0 # Remember the original PCC labels # (pccs are the nodes in the negative metagraph) A, B, C, D = infr.node_labels(a1, b1, c1, d1) # Add three negative edges between a and b # one between (a, c), (b, d), (a, d), and (c, d) print('\nSetting up negative edges, before the merge test') infr.add_feedback((a1, b1), NEGTV) infr.add_feedback((a2, b2), NEGTV) infr.add_feedback((a3, b3), NEGTV) infr.add_feedback((a4, c4), NEGTV) infr.add_feedback((b4, d4), NEGTV) infr.add_feedback((c1, d1), NEGTV) infr.add_feedback((a4, d4), NEGTV) assert nmg.edges[(A, B)]['weight'] == 3 assert nmg.edges[(A, C)]['weight'] == 1 assert (B, C) not in nmg.edges assert nmg.edges[(A, D)]['weight'] == 1 assert nmg.edges[(B, D)]['weight'] == 1 assert nmg.number_of_edges() == 5 assert nmg.number_of_nodes() == 4 # Now merge A and B into a single PCC print('\nMerging A and B into a single PCC: AB') infr.add_feedback((a1, b1), POSTV) AB = infr.node_label(a1) # The original meta-nodes A and B should not be combined into AB assert infr.node_label(b1) == AB assert A != B assert A == AB or A not in nmg.nodes assert B == AB or B not in nmg.nodes # Should have combined weights from (A, D) and (B, D) # And (A, C) should be brought over as-is assert nmg.edges[(AB, D)]['weight'] == 2 assert nmg.edges[(AB, C)]['weight'] == 1 # should not have a self-loop weight weight 2 # (it decreased because we changed a previously neg edge to pos) assert nmg.edges[(AB, AB)]['weight'] == 2 assert len(list(nx.selfloop_edges(nmg))) == 1 # nothing should change between C and D assert nmg.edges[(C, D)]['weight'] == 1 # Should decrease number of nodes and edges assert nmg.number_of_nodes() == 3 assert nmg.number_of_edges() == 4 infr.assert_neg_metagraph() # Additional merge print('\nMerging C and D into a single PCC: CD') infr.add_feedback((c2, d2), POSTV) CD = infr.node_label(c1) infr.assert_neg_metagraph() assert nmg.number_of_nodes() == 2 assert nmg.number_of_edges() == 3 assert nmg.edges[(CD, CD)]['weight'] == 1 assert nmg.edges[(AB, CD)]['weight'] == 3 assert nmg.edges[(AB, AB)]['weight'] == 2 # Yet another merge print('\nMerging AB and CD into a single PCC: ABCD') infr.add_feedback((a1, c1), POSTV) ABCD = infr.node_label(c1) assert nmg.number_of_nodes() == 1, 'should only be one PCC now' assert nmg.number_of_edges() == 1 nmg.edges[(ABCD, ABCD)]['weight'] = 6 infr.assert_neg_metagraph()
def demo_refresh(): r""" CommandLine: python -m graphid.core.refresh demo_refresh \ --num_pccs=40 --size=2 --show Example: >>> # ENABLE_DOCTEST >>> from graphid.core.refresh import * # NOQA >>> demo_refresh() >>> util.show_if_requested() """ from graphid import demo # import utool as ut # demokw = ut.argparse_dict({'num_pccs': 50, 'size': 4}) # refreshkw = ut.argparse_funckw(RefreshCriteria) demokw = {'num_pccs': 50, 'size': 4} refreshkw = dict(window=20, patience=72, thresh=.1, method='binomial') # make an inference object infr = demo.demodata_infr(size_std=0, **demokw) edges = list(infr.ranker.predict_candidate_edges(infr.aids, K=100)) scores = np.array(infr.verifier.predict_edges(edges)) sortx = scores.argsort()[::-1] edges = list(ub.take(edges, sortx)) scores = scores[sortx] ys = infr.match_state_df(edges)[POSTV].values y_remainsum = ys[::-1].cumsum()[::-1] # Do oracle reviews and wait to converge refresh = RefreshCriteria(**refreshkw) xdata = [] pprob_any = [] rfrac_any = [] for count, (edge, y) in enumerate(zip(edges, ys)): refresh.add(y, user_id='user:oracle') rfrac_any.append(y_remainsum[count] / y_remainsum[0]) pprob_any.append(refresh.prob_any_remain()) xdata.append(count + 1) if refresh.check(): break xdata = xdata ydatas = ub.odict([ ('Est. probability any remain', pprob_any), ('Fraction remaining', rfrac_any), ]) # xdoctest: +REQUIRES(--show) from graphid import util util.qtensure() # from ibeis.scripts.thesis import TMP_RC # import matplotlib as mpl import matplotlib.pyplot as plt # mpl.rcParams.update(TMP_RC) util.multi_plot( xdata, ydatas, xlabel='# manual reviews', # rcParams=TMP_RC, marker='', ylim=(0, 1), use_legend=False, ) demokw = ub.map_keys({'num_pccs': '#PCC', 'size': 'PCC size'}, demokw) thresh = refreshkw.pop('thresh') refreshkw['span'] = refreshkw.pop('window') util.relative_text((.02, .58 + .0), ub.repr2(demokw, sep=' ', nl=0)[1:], valign='bottom') util.relative_text((.02, .68 + .0), ub.repr2(refreshkw, sep=' ', nl=0)[1:], valign='bottom') legend = plt.gca().legend() legend.get_frame().set_alpha(1.0) plt.plot([xdata[0], xdata[-1]], [thresh, thresh], 'g--', label='thresh')