def test_inconsistent_edges(): logger.info('\n=======================\n' 'Test inconsistent_edges\n' '=======================') G = tct.ex_graph_fig1() n2c_optimal = { 'a': 0, 'b': 0, 'd': 0, 'e': 0, 'c': 1, 'h': 2, 'i': 2, 'f': 3, 'g': 3, 'j': 3, 'k': 3, } clustering = ct.build_clustering(n2c_optimal) inconsistent = inconsistent_edges(G, clustering, n2c_optimal) should_be = [ ('a', 'e', -2), ('c', 'f', 2), ('d', 'h', 3), ('d', 'e', -1), ('f', 'i', 2), ('f', 'k', -3), ('g', 'j', -3), ] if set(should_be) == set(inconsistent): logger.info('Identify inconsistent edges: success') else: logger.info('Identify inconsistent edges: FAIL')
def test_replace_clusters(): logger.info('===========================') logger.info('test replace_clusters') cids = list(ct.cids_from_range(8)) n2c = { 'a': cids[0], 'b': cids[0], 'd': cids[0], 'e': cids[0], 'c': cids[1], 'h': cids[2], 'i': cids[2], 'f': cids[3], 'g': cids[3], 'j': cids[4], 'k': cids[4], } clustering = ct.build_clustering(n2c) old_cids = [cids[2], cids[4]] added_clusters = {cids[5]: set(['j']), cids[7]: set(['h', 'i', 'k'])} ct.replace_clusters(old_cids, added_clusters, clustering, n2c) logger.info('Cluster ids, should be c0, c1, c3, c5, c7. Are: %s' % (list(clustering.keys()), )) logger.info("clustering[c5] should be {'j'}!! and is %s" % (clustering[cids[5]], )) logger.info("clustering[c7] should be {'h', 'i', 'k'} and is %s" % (clustering[cids[7]], )) logger.info("n2c['h'] should be c7 and is %s" % (n2c['h'], )) logger.info("n2c['j'] should be c5 and is %s" % (n2c['j'], ))
def build_example_LCA(): G = tct.ex_graph_fig1() n2c_optimal = { 'a': 0, 'b': 0, 'd': 0, 'e': 0, 'c': 1, 'h': 2, 'i': 2, 'f': 3, 'g': 3, 'j': 3, 'k': 3, } clustering_opt = ct.build_clustering(n2c_optimal) cid0 = 2 cid1 = 3 nodes_in_clusters = list(clustering_opt[2] | clustering_opt[3]) subG = G.subgraph(nodes_in_clusters) score = ct.cid_list_score(subG, clustering_opt, n2c_optimal, [cid0, cid1]) a = LCA(subG, clustering_opt, [cid0, cid1], score) to_clusters = {0: {'f', 'h', 'i', 'j'}, 1: {'g', 'k'}} subG = G.subgraph(nodes_in_clusters) to_node2cid = { n: cid for cid in range(len(to_clusters)) for n in to_clusters[cid] } to_score = ct.clustering_score(subG, to_node2cid) a.set_to_clusters(to_clusters, to_score) return a, G
def test_cluster_scoring_and_weights(): G = ex_graph_fig1() logger.info('=====================') logger.info('Testing cid_list_score') cids = list(ct.cids_from_range(4)) n2c_random = { 'a': cids[0], 'b': cids[0], 'f': cids[0], 'c': cids[1], 'g': cids[1], 'd': cids[2], 'e': cids[2], 'i': cids[2], 'h': cids[3], 'j': cids[3], 'k': cids[3], } clustering_random = ct.build_clustering(n2c_random) score = ct.cid_list_score(G, clustering_random, n2c_random, [cids[0], cids[2], cids[3]]) logger.info('Score between clusters [c0, c2, c3] should be -5 and is %s' % (score, )) logger.info('=====================') logger.info('Testing clustering_score') """ First clustering: all together """ n2c_single_cluster = {n: 'c0' for n in G.nodes} logger.info('Score with all together should be 21. Score = %s' % (ct.clustering_score(G, n2c_single_cluster), )) """ Second clustering: all separate """ n2c_all_separate = {n: 'c' + str(i) for i, n in enumerate(G.nodes)} logger.info('Score with all together should be -21. Score = %s' % (ct.clustering_score(G, n2c_all_separate), )) """ Third clustering: optimal, by hand """ cids = list(ct.cids_from_range(4)) n2c_optimal = { 'a': cids[0], 'b': cids[0], 'd': cids[0], 'e': cids[0], 'c': cids[1], 'h': cids[2], 'i': cids[2], 'f': cids[3], 'g': cids[3], 'j': cids[3], 'k': cids[3], } logger.info('Optimal score should be 49. Score = %s' % (ct.clustering_score(G, n2c_optimal), )) negatives, positives = ct.get_weight_lists(G, sort_positive=True) logger.info('Length of negatives should be 10. It is %s' % (len(negatives), )) logger.info('Length of positives should be 11. It is %s' % (len(positives), )) logger.info('0th positive should be 8. It is %s' % (positives[0], )) logger.info('Last positive should be 2. It is %s' % (positives[-1], ))
def test_build_clustering_and_mapping(): logger.info('==================') logger.info('Testing build_clustering') empty_n2c = {} empty_clustering = ct.build_clustering(empty_n2c) logger.info( 'Empty node 2 cluster mapping should produce empty clustering %s' % (empty_clustering, )) # G = ex_graph_fig1() n2c_optimal = { 'a': '0', 'b': '0', 'd': '0', 'e': '0', 'c': '1', 'h': '2', 'i': '2', 'f': '3', 'g': '3', 'j': '3', 'k': '3', } clustering = ct.build_clustering(n2c_optimal) logger.info("Cluster 0 should be ['a', 'b', 'd', 'e']. It is %s" % (sorted(clustering['0']), )) logger.info("Cluster 1 should be ['c']. It is %s" % (sorted(clustering['1']), )) logger.info("Cluster 2 should be ['h', 'i']. It is %s" % (sorted(clustering['2']), )) logger.info("Cluster 3 should be ['f', 'g', 'j', 'k']. It is %s" % (sorted(clustering['3']), )), logger.info('==================') logger.info('Testing build_node_to_cluster_mapping') empty_clustering = {} empty_n2c = ct.build_node_to_cluster_mapping(empty_clustering) logger.info( 'Empty clustering should produce empty node-to-cluster mapping %s' % (empty_n2c, )) n2c_rebuilt = ct.build_node_to_cluster_mapping(clustering) logger.info( 'After rebuilding the node2cid mapping should be the same. Is it? %s' % (n2c_optimal == n2c_rebuilt, ))
def test_form_connected_cluster_pairs(): logger.info('=================================') logger.info('test form_connected_cluster_pairs') G = ex_graph_fig1() cids = list(ct.cids_from_range(5)) n2c = { 'a': cids[0], 'b': cids[0], 'd': cids[0], 'e': cids[0], 'c': cids[1], 'h': cids[2], 'i': cids[2], 'f': cids[3], 'g': cids[3], 'j': cids[4], 'k': cids[4], } clustering = ct.build_clustering(n2c) cid_pairs = ct.form_connected_cluster_pairs(G, clustering, n2c) logger.info('form_connected_cluster_pairs(G, clustering, n2c)') logger.info('result: %s' % (cid_pairs, )) logger.info('expecting: %s' % ([ (cids[0], cids[1]), (cids[0], cids[2]), (cids[0], cids[3]), (cids[1], cids[3]), (cids[2], cids[3]), (cids[2], cids[4]), (cids[3], cids[4]), ], )) new_cids = [cids[1], cids[4]] cid_pairs = ct.form_connected_cluster_pairs(G, clustering, n2c, new_cids) logger.info('form_connected_cluster_pairs(G, clustering, n2c, new_cids)') logger.info('result: %s' % (cid_pairs, )) logger.info('expecting: %s' % ([ (cids[0], cids[1]), (cids[1], cids[3]), (cids[2], cids[4]), (cids[3], cids[4]), ], ))
def test_shift_between_clusters(): logger.info('===========================') logger.info('test_shift_between_clusters') cids = list(ct.cids_from_range(4)) n2c_optimal = { 'a': cids[0], 'b': cids[0], 'd': cids[0], 'e': cids[0], 'c': cids[1], 'h': cids[2], 'i': cids[2], 'f': cids[3], 'g': cids[3], 'j': cids[3], 'k': cids[3], } clustering = ct.build_clustering(n2c_optimal) n0_cid, n1_cid = cids[3], cids[2] n0_nodes_to_move = {'f', 'j'} logger.info('Shifting from cluster %s to %s:' % (n0_cid, n1_cid)) logger.info('Nodes to move: %s' % (sorted(n0_nodes_to_move), )) logger.info('Cluster %s: %s' % (n0_cid, sorted(clustering[n0_cid]))) logger.info('Cluster %s: %s' % (n1_cid, sorted(clustering[n1_cid]))) ct.shift_between_clusters(n0_cid, n0_nodes_to_move, n1_cid, clustering, n2c_optimal) logger.info('After shift, cluster %s: %s' % (n0_cid, sorted(clustering[n0_cid]))) logger.info('After shift, cluster %s: %s' % (n1_cid, sorted(clustering[n1_cid]))) logger.info("n2c['f'] = %s" % (n2c_optimal['f'], )) logger.info("n2c['j'] = %s" % (n2c_optimal['j'], )) logger.info("n2c['h'] = %s" % (n2c_optimal['h'], )) logger.info("n2c['i'] = %s" % (n2c_optimal['i'], )) logger.info("n2c['g'] = %s" % (n2c_optimal['g'], )) logger.info("n2c['k'] = %s" % (n2c_optimal['k'], ))
def test_merge(): logger.info('===========================') logger.info('test_merge') G = ex_graph_fig1() cids = list(ct.cids_from_range(4)) logger.info(cids) n2c_optimal = { 'a': cids[0], 'b': cids[0], 'd': cids[0], 'e': cids[0], 'c': cids[1], 'h': cids[2], 'i': cids[2], 'f': cids[3], 'g': cids[3], 'j': cids[3], 'k': cids[3], } clustering = ct.build_clustering(n2c_optimal) logger.info('-------------') logger.info('score_delta_after_merge') delta = ct.score_delta_after_merge(cids[2], cids[3], G, clustering) logger.info('possible merge of 2, 3; delta should be -4, and is %s' % (delta, )) logger.info('-------------') logger.info('merge_clusters') score_before = ct.clustering_score(G, n2c_optimal) delta = ct.merge_clusters(cids[0], cids[2], G, clustering, n2c_optimal) score_after = ct.clustering_score(G, n2c_optimal) logger.info('delta = %s should be %s' % ( delta, score_after - score_before, )) logger.info('---') for c in clustering: logger.info('%s: %s' % (c, clustering[c])) logger.info('---') for n in G.nodes: logger.info('%s: %s' % (n, n2c_optimal[n])) logger.info('--------') logger.info('Retesting merge with order of clusters reversed') n2c_optimal = { 'a': cids[0], 'b': cids[0], 'd': cids[0], 'e': cids[0], 'c': cids[1], 'h': cids[2], 'i': cids[2], 'f': cids[3], 'g': cids[3], 'j': cids[3], 'k': cids[3], } clustering = ct.build_clustering(n2c_optimal) logger.info('-------------') logger.info('score_delta_after_merge') delta = ct.score_delta_after_merge(cids[3], cids[2], G, clustering) logger.info('possible merge of 3, 2; delta should be -4, and is %s' % (delta, )) logger.info('-------------') logger.info('merge_clusters') score_before = ct.clustering_score(G, n2c_optimal) delta = ct.merge_clusters(cids[2], cids[0], G, clustering, n2c_optimal) score_after = ct.clustering_score(G, n2c_optimal) logger.info('delta = %s should be %s' % ( delta, score_after - score_before, )) logger.info('---') for c in clustering: logger.info('%s: %s' % (c, clustering[c])) logger.info('---') for n in G.nodes: logger.info('%s: %s' % (n, n2c_optimal[n]))