Esempio n. 1
0
def test_inconsistent_edges():
    logger.info('\n=======================\n'
                'Test inconsistent_edges\n'
                '=======================')
    G = tct.ex_graph_fig1()
    n2c_optimal = {
        'a': 0,
        'b': 0,
        'd': 0,
        'e': 0,
        'c': 1,
        'h': 2,
        'i': 2,
        'f': 3,
        'g': 3,
        'j': 3,
        'k': 3,
    }
    clustering = ct.build_clustering(n2c_optimal)

    inconsistent = inconsistent_edges(G, clustering, n2c_optimal)
    should_be = [
        ('a', 'e', -2),
        ('c', 'f', 2),
        ('d', 'h', 3),
        ('d', 'e', -1),
        ('f', 'i', 2),
        ('f', 'k', -3),
        ('g', 'j', -3),
    ]
    if set(should_be) == set(inconsistent):
        logger.info('Identify inconsistent edges: success')
    else:
        logger.info('Identify inconsistent edges: FAIL')
def test_replace_clusters():
    logger.info('===========================')
    logger.info('test replace_clusters')
    cids = list(ct.cids_from_range(8))
    n2c = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[4],
        'k': cids[4],
    }
    clustering = ct.build_clustering(n2c)
    old_cids = [cids[2], cids[4]]
    added_clusters = {cids[5]: set(['j']), cids[7]: set(['h', 'i', 'k'])}
    ct.replace_clusters(old_cids, added_clusters, clustering, n2c)
    logger.info('Cluster ids, should be c0, c1, c3, c5, c7.  Are: %s' %
                (list(clustering.keys()), ))
    logger.info("clustering[c5] should be {'j'}!! and is %s" %
                (clustering[cids[5]], ))
    logger.info("clustering[c7] should be {'h', 'i', 'k'} and is %s" %
                (clustering[cids[7]], ))
    logger.info("n2c['h'] should be c7 and is %s" % (n2c['h'], ))
    logger.info("n2c['j'] should be c5 and is %s" % (n2c['j'], ))
Esempio n. 3
0
def build_example_LCA():
    G = tct.ex_graph_fig1()
    n2c_optimal = {
        'a': 0,
        'b': 0,
        'd': 0,
        'e': 0,
        'c': 1,
        'h': 2,
        'i': 2,
        'f': 3,
        'g': 3,
        'j': 3,
        'k': 3,
    }
    clustering_opt = ct.build_clustering(n2c_optimal)
    cid0 = 2
    cid1 = 3
    nodes_in_clusters = list(clustering_opt[2] | clustering_opt[3])
    subG = G.subgraph(nodes_in_clusters)

    score = ct.cid_list_score(subG, clustering_opt, n2c_optimal, [cid0, cid1])
    a = LCA(subG, clustering_opt, [cid0, cid1], score)

    to_clusters = {0: {'f', 'h', 'i', 'j'}, 1: {'g', 'k'}}
    subG = G.subgraph(nodes_in_clusters)
    to_node2cid = {
        n: cid
        for cid in range(len(to_clusters)) for n in to_clusters[cid]
    }
    to_score = ct.clustering_score(subG, to_node2cid)
    a.set_to_clusters(to_clusters, to_score)

    return a, G
def test_cluster_scoring_and_weights():
    G = ex_graph_fig1()

    logger.info('=====================')
    logger.info('Testing cid_list_score')
    cids = list(ct.cids_from_range(4))
    n2c_random = {
        'a': cids[0],
        'b': cids[0],
        'f': cids[0],
        'c': cids[1],
        'g': cids[1],
        'd': cids[2],
        'e': cids[2],
        'i': cids[2],
        'h': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    clustering_random = ct.build_clustering(n2c_random)
    score = ct.cid_list_score(G, clustering_random, n2c_random,
                              [cids[0], cids[2], cids[3]])
    logger.info('Score between clusters [c0, c2, c3] should be -5 and is %s' %
                (score, ))

    logger.info('=====================')
    logger.info('Testing clustering_score')
    """ First clustering:  all together """
    n2c_single_cluster = {n: 'c0' for n in G.nodes}
    logger.info('Score with all together should be 21.  Score = %s' %
                (ct.clustering_score(G, n2c_single_cluster), ))
    """ Second clustering:  all separate """
    n2c_all_separate = {n: 'c' + str(i) for i, n in enumerate(G.nodes)}
    logger.info('Score with all together should be -21.  Score = %s' %
                (ct.clustering_score(G, n2c_all_separate), ))
    """ Third clustering: optimal, by hand """
    cids = list(ct.cids_from_range(4))
    n2c_optimal = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    logger.info('Optimal score should be 49. Score = %s' %
                (ct.clustering_score(G, n2c_optimal), ))

    negatives, positives = ct.get_weight_lists(G, sort_positive=True)
    logger.info('Length of negatives should be 10.  It is %s' %
                (len(negatives), ))
    logger.info('Length of positives should be 11.  It is %s' %
                (len(positives), ))
    logger.info('0th positive should be 8.  It is %s' % (positives[0], ))
    logger.info('Last positive should be 2.  It is %s' % (positives[-1], ))
def test_build_clustering_and_mapping():
    logger.info('==================')
    logger.info('Testing build_clustering')
    empty_n2c = {}
    empty_clustering = ct.build_clustering(empty_n2c)
    logger.info(
        'Empty node 2 cluster mapping should produce empty clustering %s' %
        (empty_clustering, ))

    # G = ex_graph_fig1()
    n2c_optimal = {
        'a': '0',
        'b': '0',
        'd': '0',
        'e': '0',
        'c': '1',
        'h': '2',
        'i': '2',
        'f': '3',
        'g': '3',
        'j': '3',
        'k': '3',
    }

    clustering = ct.build_clustering(n2c_optimal)
    logger.info("Cluster 0 should be ['a', 'b', 'd', 'e']. It is %s" %
                (sorted(clustering['0']), ))
    logger.info("Cluster 1 should be ['c']. It is %s" %
                (sorted(clustering['1']), ))
    logger.info("Cluster 2 should be ['h', 'i']. It is %s" %
                (sorted(clustering['2']), ))
    logger.info("Cluster 3 should be ['f', 'g', 'j', 'k']. It is %s" %
                (sorted(clustering['3']), )),

    logger.info('==================')
    logger.info('Testing build_node_to_cluster_mapping')
    empty_clustering = {}
    empty_n2c = ct.build_node_to_cluster_mapping(empty_clustering)
    logger.info(
        'Empty clustering should produce empty node-to-cluster mapping %s' %
        (empty_n2c, ))

    n2c_rebuilt = ct.build_node_to_cluster_mapping(clustering)
    logger.info(
        'After rebuilding the node2cid mapping should be the same.  Is it? %s'
        % (n2c_optimal == n2c_rebuilt, ))
def test_form_connected_cluster_pairs():
    logger.info('=================================')
    logger.info('test form_connected_cluster_pairs')
    G = ex_graph_fig1()
    cids = list(ct.cids_from_range(5))
    n2c = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[4],
        'k': cids[4],
    }
    clustering = ct.build_clustering(n2c)

    cid_pairs = ct.form_connected_cluster_pairs(G, clustering, n2c)
    logger.info('form_connected_cluster_pairs(G, clustering, n2c)')
    logger.info('result:  %s' % (cid_pairs, ))
    logger.info('expecting: %s' % ([
        (cids[0], cids[1]),
        (cids[0], cids[2]),
        (cids[0], cids[3]),
        (cids[1], cids[3]),
        (cids[2], cids[3]),
        (cids[2], cids[4]),
        (cids[3], cids[4]),
    ], ))

    new_cids = [cids[1], cids[4]]
    cid_pairs = ct.form_connected_cluster_pairs(G, clustering, n2c, new_cids)
    logger.info('form_connected_cluster_pairs(G, clustering, n2c, new_cids)')
    logger.info('result:  %s' % (cid_pairs, ))
    logger.info('expecting: %s' % ([
        (cids[0], cids[1]),
        (cids[1], cids[3]),
        (cids[2], cids[4]),
        (cids[3], cids[4]),
    ], ))
def test_shift_between_clusters():
    logger.info('===========================')
    logger.info('test_shift_between_clusters')
    cids = list(ct.cids_from_range(4))
    n2c_optimal = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    clustering = ct.build_clustering(n2c_optimal)

    n0_cid, n1_cid = cids[3], cids[2]
    n0_nodes_to_move = {'f', 'j'}
    logger.info('Shifting from cluster %s to %s:' % (n0_cid, n1_cid))
    logger.info('Nodes to move: %s' % (sorted(n0_nodes_to_move), ))
    logger.info('Cluster %s: %s' % (n0_cid, sorted(clustering[n0_cid])))
    logger.info('Cluster %s: %s' % (n1_cid, sorted(clustering[n1_cid])))

    ct.shift_between_clusters(n0_cid, n0_nodes_to_move, n1_cid, clustering,
                              n2c_optimal)
    logger.info('After shift, cluster %s: %s' %
                (n0_cid, sorted(clustering[n0_cid])))
    logger.info('After shift, cluster %s: %s' %
                (n1_cid, sorted(clustering[n1_cid])))
    logger.info("n2c['f'] = %s" % (n2c_optimal['f'], ))
    logger.info("n2c['j'] = %s" % (n2c_optimal['j'], ))
    logger.info("n2c['h'] = %s" % (n2c_optimal['h'], ))
    logger.info("n2c['i'] = %s" % (n2c_optimal['i'], ))
    logger.info("n2c['g'] = %s" % (n2c_optimal['g'], ))
    logger.info("n2c['k'] = %s" % (n2c_optimal['k'], ))
def test_merge():
    logger.info('===========================')
    logger.info('test_merge')
    G = ex_graph_fig1()
    cids = list(ct.cids_from_range(4))
    logger.info(cids)
    n2c_optimal = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    clustering = ct.build_clustering(n2c_optimal)

    logger.info('-------------')
    logger.info('score_delta_after_merge')
    delta = ct.score_delta_after_merge(cids[2], cids[3], G, clustering)
    logger.info('possible merge of 2, 3; delta should be -4, and is %s' %
                (delta, ))

    logger.info('-------------')
    logger.info('merge_clusters')
    score_before = ct.clustering_score(G, n2c_optimal)
    delta = ct.merge_clusters(cids[0], cids[2], G, clustering, n2c_optimal)
    score_after = ct.clustering_score(G, n2c_optimal)
    logger.info('delta = %s should be %s' % (
        delta,
        score_after - score_before,
    ))
    logger.info('---')
    for c in clustering:
        logger.info('%s: %s' % (c, clustering[c]))
    logger.info('---')
    for n in G.nodes:
        logger.info('%s: %s' % (n, n2c_optimal[n]))

    logger.info('--------')
    logger.info('Retesting merge with order of clusters reversed')
    n2c_optimal = {
        'a': cids[0],
        'b': cids[0],
        'd': cids[0],
        'e': cids[0],
        'c': cids[1],
        'h': cids[2],
        'i': cids[2],
        'f': cids[3],
        'g': cids[3],
        'j': cids[3],
        'k': cids[3],
    }
    clustering = ct.build_clustering(n2c_optimal)

    logger.info('-------------')
    logger.info('score_delta_after_merge')
    delta = ct.score_delta_after_merge(cids[3], cids[2], G, clustering)
    logger.info('possible merge of 3, 2; delta should be -4, and is %s' %
                (delta, ))

    logger.info('-------------')
    logger.info('merge_clusters')
    score_before = ct.clustering_score(G, n2c_optimal)
    delta = ct.merge_clusters(cids[2], cids[0], G, clustering, n2c_optimal)
    score_after = ct.clustering_score(G, n2c_optimal)
    logger.info('delta = %s should be %s' % (
        delta,
        score_after - score_before,
    ))
    logger.info('---')
    for c in clustering:
        logger.info('%s: %s' % (c, clustering[c]))
    logger.info('---')
    for n in G.nodes:
        logger.info('%s: %s' % (n, n2c_optimal[n]))