Exemple #1
0
def test_embed_nodes():
    #
    """Z describes tree (embedded nodes in parentheses):
        0----------+
       (2)---+     |-6
        1    |-(5)-+
        |-4--+
       (3)
    """
    Z = np.array([[1., 3., 1., 2.],
                  [2., 4., 2., 3.],
                  [0., 5., 3., 4.]])
    
    assert_true(equal_arrays(embed_nodes(Z, [2, 3]),
                             [3, 5, 5]),
                "assigns nodes the first embedded descendent")
                      
    """Z describes tree (embedded nodes in parentheses):
       (0)-------+
       (2)---+   |-(6)
        1    |-5-+
        |-4--+
        3
    """
    Z = np.array([[1., 3., 1., 2.],
                  [2., 4., 2., 3.],
                  [0., 5., 3., 4.]])
    
    assert_true(equal_arrays(embed_nodes(Z, [0, 2]),
                             [-1, 2, 6]),
                "assign -1 for unembedded nodes")
Exemple #2
0
def test_ancestors():
    #
    """Z describes tree
        0
        |---7---+
        1       |
                |
        2---+   |-8
            |   |
        3   |-6-+
        |-5-+
        4
    """
    Z = np.array([[3., 4., 1., 2.],
                  [2., 5., 1., 3.],
                  [0., 1., 3., 2.],
                  [6., 7., 4., 5.]])
                  
    assert_true(equal_arrays(ancestors(Z, range(5)),
                             [5, 6, 7, 8]),
                "returns ancestors of all leaf clusters")
    
    assert_true(equal_arrays(ancestors(Z, [1]),
                             [7, 8]),
                "returns ancestors of a single leaf cluster")
    
    assert_true(equal_arrays(ancestors(Z, [5, 6, 8]),
                             [6, 8]),
                "returns union of ancestors for a path of nodes")
                        
    assert_true(equal_arrays(ancestors(Z, [5, 6, 8], inclusive=True),
                             [5, 6, 8]),
                "returns union of path nodes including nodes themselves when "
                "`inclusive` flag is set")
Exemple #3
0
def test_argrank():
    v1 = [5, 3, 4, 8]
    o1 = [3, 1, 2, 4]
    print _fractional_rank(v1), o1
    assert_true(equal_arrays(_fractional_rank(v1), o1),
                "returns integer rank of values in one-dimensional array")
    x1 = np.array(v1)
    _ifractional_rank(x1)
    assert_true(equal_arrays(x1, o1), "loads array ranks into input array")

    v2 = [5, 3, 8, 8]
    o2 = [2, 1, 3.5, 3.5]
    assert_true(equal_arrays(_fractional_rank(v2), o2),
                "returns mean of tied ranks")
    x2 = np.array(v2, dtype=np.double)
    _ifractional_rank(x2)
    assert_true(equal_arrays(x2, o2),
                "loads array ranks with tied means into input array")

    v3 = np.array([[1, 10, 5, 2], [1, 4, 6, 2], [5, 5, 3, 10]])
    o3 = np.array([[1, 4, 3, 2], [1, 3, 4, 2], [2.5, 2.5, 1, 4]])
    assert_true(equal_arrays(argrank(v3, axis=1), o3),
                "with `axis=1` passed returns ranks along rows of 2D array")
    assert_true(
        equal_arrays(argrank(v3.T, axis=0), o3.T),
        "with `axis=0` passed returns ranks along columns of 2D array")

    v4 = np.asarray([5, 3, 4, 8])
    w4 = np.asarray([2, 2, 1, 3])
    v4_dup = np.asarray([5, 3, 4, 8, 5, 3, 8, 8])
    assert_true(
        equal_arrays(_fractional_rank(v4, weight_fun=lambda i: w4[i]),
                     _fractional_rank(v4_dup)[:4]),
        "returns weighted ranks when weights parameter is passed")
    x4 = np.array(v4, dtype=np.double)
    _ifractional_rank(x4, weight_fun=lambda i: w4[i])
    assert_true(equal_arrays(x4,
                             _fractional_rank(v4_dup)[:4]),
                "returns idential ranks to non-mutating methods")

    v5 = np.array([[1, 10, 5, 2], [1, 4, 6, 2]])
    w5 = np.asarray([5, 1, 1, 6])
    v5_dup = np.array([[1, 10, 5, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2],
                       [1, 4, 6, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2]])
    assert_true(
        equal_arrays(argrank(v5, weight_fun=lambda i: w5[i], axis=1),
                     argrank(v5_dup, axis=1)[:, :4]),
        "broadcasts weights vector along rows of 2D array when ranking columns"
    )

    v6 = np.array([[1, 10, 5, 2], [1, 4, 6, 2]])
    w6 = np.asarray([2, 5])
    v6_dup = np.array([[1, 10, 5, 2], [1, 4, 6, 2], [1, 10, 5, 2],
                       [1, 4, 6, 2], [1, 4, 6, 2], [1, 4, 6, 2], [1, 4, 6, 2]])
    assert_true(
        equal_arrays(argrank(v6, weight_fun=lambda i: w6[i], axis=0),
                     argrank(v6_dup, axis=0)[:2]),
        "broadcasts weights vector along columns of 2D array when ranking rows"
    )
Exemple #4
0
 def _test_one_small():
     d1 = np_random.rand(190).astype(np.double)
     d1.tofile(infile)
     i1 = d1.argsort()
     argsort_chunk_mergesort(infile, outfile, chunk_size=30)
     assert_true(equal_arrays(np.fromfile(outfile, dtype=np.int), i1),
                 "sorted indices are stored in output file")
     assert_true(
         equal_arrays(np.fromfile(infile, dtype=np.double), d1[i1]),
         "input file values are in sorted order")
     os.remove(infile)
     os.remove(outfile)
Exemple #5
0
def test_squareform_coords():
    n = random.randint(3, 10)
    m = n * (n - 1) // 2
    (ri, ci) = pairs(n)

    (squareform_i, squareform_j) = squareform_coords(n, range(m))
    print squareform_i, ri
    print squareform_j, ci
    assert_true(
        equal_arrays(squareform_i, ri) and equal_arrays(squareform_j, ci),
        "compute row and column indices for elements of condensed matrix")
    assert_true(np.all(squareform_i <= squareform_j),
                "returns upper triangular coordinates")
Exemple #6
0
def test_ClusterQualityEngine():
    #
    """ The tree used for the following tests is represented below:
        0---+
        2-+ |-6
        1 |-5
        |-4
        3
    """
    Z = np.array([[1., 3., 1., 2.], [2., 4., 1., 3.], [0., 5., 2., 4.]])
    """Test that a Quality Engine using max with a single leaf data point
    will propagate score to ancestors. The following data is assigned to leaves:
        0:[1]--+
        2:[]-+ |-6
        1:[] |-5
        |-4--+
        3:[]    
    """
    qe1 = QualityEngineTester({0: [1]}, max)
    assert_true(equal_arrays(qe1.makeScores(Z), [1, 0, 0, 0, 0, 0, 1]),
                "computes max of a single leaf data point for all ancestors")
    """Test that a Quality Engine using len and a pair of leaves with data will
    combined leaf data and propagate to ancestors. The following data is assinged to
    leaves:
        0:[]------+
        2:["a"]-+ |-6
        1:["x"] |-5
        |-4-----+
        3:[]    
    """
    qe2 = QualityEngineTester({1: ["a"], 2: ["x"]}, len)
    assert_true(
        equal_arrays(qe2.makeScores(Z), [0, 1, 1, 0, 1, 2, 2]),
        "computes length of leaf data in case of pair of leaf data points")
    """Test that a Quality Engine using a range measure will compute the range
    of data from the full set of leaves. Leaf data assigned as follows:
        0:[]------+
        2:[1]---+ |-6
        1:[2,2] |-5
        |-4-----+
        3:[1,0]    
    """
    qe3 = QualityEngineTester({
        1: [2, 2],
        2: [1],
        3: [1, 0]
    }, lambda x: max(x) - min(x))
    assert_true(equal_arrays(qe3.makeScores(Z), [0, 0, 0, 1, 2, 2, 2]),
                "computes non-trivial function of combined leaf data points")
Exemple #7
0
def test_greedy_clique_by_elimination():
    #
    C = np.array([[True, True, False], [True, True, False],
                  [False, False, True]])  # 0, 1 in clique
    node_perm = np_random.permutation(3)
    C_perm = C[np.ix_(node_perm, node_perm)]
    indices_perm = np.empty(3, dtype=int)
    indices_perm[node_perm] = np.arange(3)

    assert_true(
        equal_arrays(greedy_clique_by_elimination(C_perm),
                     np.sort(indices_perm[:2])), "returns indices of clique")

    # two cliques with n-1 connecting edges
    C = np.array([[True, True, True, False, True, True],
                  [True, True, True, True, False, True],
                  [True, True, True, True, True, False],
                  [False, True, True, True, True, True],
                  [True, False, True, True, True, True],
                  [True, True, False, True, True,
                   True]])  # 0, 1, 2 and 3, 4, 5 cliques
    node_perm = np_random.permutation(6)
    C_perm = C[np.ix_(node_perm, node_perm)]
    indices_perm = np.empty(6, dtype=int)
    indices_perm[node_perm] = np.arange(6)

    assert_true(
        len(greedy_clique_by_elimination(C_perm)) == 3,
        "computes correct clique size for two highly connected equal sized cliques"
    )

    # two cliques with universally connected link node
    C = np.array([[True, True, True, True, False, False],
                  [True, True, True, True, False, False],
                  [True, True, True, True, False, False],
                  [True, True, True, True, True, True],
                  [False, False, False, True, True, True],
                  [False, False, False, True, True,
                   True]])  #0, 1, 2, 3 and 3, 4, 5 cliques
    node_perm = np_random.permutation(6)
    C_perm = C[np.ix_(node_perm, node_perm)]
    indices_perm = np.empty(6, dtype=int)
    indices_perm[node_perm] = np.arange(6)

    assert_true(
        equal_arrays(greedy_clique_by_elimination(C_perm),
                     np.sort(indices_perm[:4])),
        "computes the larger of two overlapping cliques")
Exemple #8
0
def test_condensed_index():
    n = random.randint(3, 10)
    m = n * (n - 1) // 2
    (ri, ci) = pairs(n)
    assert_true(
        equal_arrays(ri, [i for i in range(n - 1) for _ in range(i + 1, n)])
        and equal_arrays(ci,
                         [j for i in range(n - 1) for j in range(i + 1, n)]),
        "generate pairs of square coordinates")

    condensed_indices = condensed_index(n, ri, ci)
    assert_true(equal_arrays(condensed_indices, np.arange(m)),
                "compute linear index of condensed distance matrix")

    assert_true(equal_arrays(condensed_indices, condensed_index(n, ci, ri)),
                "computes linear index correctly when row < col")
 def test_parse(self):
     (table, taxons) = self._ce.parse([
         "d__Archaea; p__Euryarchaeota; c__Methanococci; o__Methanococcales; f__Methanococcaceae; g__Methanococcus",
         "d__Archaea; p__Euryarchaeota; c__Methanococci; o__Methanococcales; f__Methanococcaceae; g__Methanococcus",
         "d__Archaea; p__Euryarchaeota; c__Thermococci; o__Thermococcales; f__Thermococcaceae; g__Pyrococcus",
         "d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia",
         "d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales",
         "d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Nitrosomonadales; f__Nitrosomonadaceae; g__Nitrosomonas",
         ])
     
     assert_true(equal_arrays(taxons[table[0]],
                              ["Archaea", "Euryarchaeota", "Methanococci", "Methanococcales", "Methanococcaceae", "Methanococcus", ""]),
                 "returns table of taxon tag indices and array of tags")
     
     #Pairwise distances:
     #(Methanococcus, Methanococcus2): 0 (=Species)
     assert_true(self._ce.getDistance(table[0], table[1])==0,
                 "0 distance between two equal classifications")
     #(Methanococcus, Pyrococcus): 5 (=Phylum)
     assert_true(self._ce.getDistance(table[0], table[2])==5,
                 "5 distance between classifications equal up to phylum level")
     #(Methanococcus, Burkholderia): 7 (=Root)
     assert_true(self._ce.getDistance(table[0], table[3])==7,
                 "7 distance between classifications equal only at root level")
     #(Burkholderia, Burkholderiales): 0 (=Species)
     assert_true(self._ce.getDistance(table[3], table[4])==0,
                 "0 distance between classifications equal at all defined levels")
     #(Burkholderiales, Nitrosomonas): 4 (=Class)
     assert_true(self._ce.getDistance(table[4], table[5])==4,
                 "4 distance between classifications equal up to class level")
Exemple #10
0
 def _test_one_small():
     f1 = np_random.rand(20, 50)
     d1 = sp_distance.pdist(f1, metric="euclidean")
     pdist_chunk(f1, filename, chunk_size=30, metric="euclidean")
     assert_true(
         equal_arrays(np.fromfile(filename, dtype=np.double), d1),
         "computes same distances as unchunked function")
     os.remove(filename)
 def test_parse_taxstring(self):
     assert_true(equal_arrays(self._ce.parse_taxstring("d__Archaea; p__Euryarchaeota; c__Methanococci; o__Methanococcales; f__Methanococcaceae; g__Methanococcus"),
                              ["Archaea", "Euryarchaeota", "Methanococci", "Methanococcales", "Methanococcaceae", "Methanococcus"]),
                 "returns array of parsed taxonomic ranks")
     
     assert_true(equal_arrays(self._ce.parse_taxstring("d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales"),
                              ["Bacteria", "Proteobacteria", "Betaproteobacteria", "Burkholderiales"]),
                 "returns array of parsed taxonomic ranks defined to order level")
                 
     assert_true(equal_arrays(self._ce.parse_taxstring("Root; d__Bacteria; p__Proteobacteria"),
                              ["Bacteria", "Proteobacteria"]),
                 "returned array ignores initial 'Root' rank")
                 
     assert_true(equal_arrays(self._ce.parse_taxstring("d__Bacteria; p__Proteobacteria;"),
                              ["Bacteria", "Proteobacteria"]),
                 "returned array ignores trailing semi-colon")
                 
     assert_true(equal_arrays(self._ce.parse_taxstring("Root;d__Bacteria;p__Proteobacteria"),
                              ["Bacteria", "Proteobacteria"]),
                 "parses taxonomic string without spaces after semi-colon separator")
                 
     assert_true(equal_arrays(self._ce.parse_taxstring("d__Bacteria; c__Betaproteobacteria; o__Burkholderiales"),
                              ["Bacteria"]),
                 "stops parsing when a bad tag is encountered")
                 
     assert_true(equal_arrays(self._ce.parse_taxstring("Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales"),
                              ["Bacteria", "Proteobacteria", "Betaproteobacteria", "Burkholderiales"]),
                 "parses taxonomic string without rank tags")
def test_greedy_clique_by_elimination():
    #
    C = np.array([[True , True , False],
                  [True , True , False],
                  [False, False, True ]]) # 0, 1 in clique
    node_perm = np_random.permutation(3)
    C_perm = C[np.ix_(node_perm, node_perm)]
    indices_perm = np.empty(3, dtype=int)
    indices_perm[node_perm] = np.arange(3)
    
    assert_true(equal_arrays(greedy_clique_by_elimination(C_perm),
                             np.sort(indices_perm[:2])),
                "returns indices of clique")
    
    # two cliques with n-1 connecting edges
    C = np.array([[True , True , True , False, True , True ],
                  [True , True , True , True , False, True ],
                  [True , True , True , True , True , False],
                  [False, True , True , True , True , True ],
                  [True , False, True , True , True , True ],
                  [True , True , False, True , True , True ]]) # 0, 1, 2 and 3, 4, 5 cliques
    node_perm = np_random.permutation(6)
    C_perm = C[np.ix_(node_perm, node_perm)]
    indices_perm = np.empty(6, dtype=int)
    indices_perm[node_perm] = np.arange(6)
    
    assert_true(len(greedy_clique_by_elimination(C_perm)) == 3,
                "computes correct clique size for two highly connected equal sized cliques")
                
    # two cliques with universally connected link node
    C = np.array([[True , True , True , True , False, False],
                  [True , True , True , True , False, False],
                  [True , True , True , True , False, False],
                  [True , True , True , True , True , True ],
                  [False, False, False, True , True , True ],
                  [False, False, False, True , True , True ]]) #0, 1, 2, 3 and 3, 4, 5 cliques
    node_perm = np_random.permutation(6)
    C_perm = C[np.ix_(node_perm, node_perm)]
    indices_perm = np.empty(6, dtype=int)
    indices_perm[node_perm] = np.arange(6)
    
    assert_true(equal_arrays(greedy_clique_by_elimination(C_perm),
                             np.sort(indices_perm[:4])),
                "computes the larger of two overlapping cliques")
Exemple #13
0
 def _test_one_big():
     d2 = np.arange(2**9 * (2**10 - 1))
     np_random.shuffle(d2)
     d2.tofile(dist_file)
     #i2.tofile(indices_file)
     x3 = argrank_chunk(dist_file, indices_file, chunk_size=int(1e5))
     assert_true(equal_arrays(x3, d2 + 1),
                 "computes ranks of a large-ish permutation array")
     os.remove(dist_file)
     os.remove(indices_file)
Exemple #14
0
def test_reachability_order():
    #
    """
    Y encodes weighted distances for pairs:
    (0, 1) =  17.7
    (0, 2) =  70.0
    (0, 3) =  97.1
    (0, 4) =  50.8
    (1, 2) = 121.6
    (1, 3) =  79.4
    (1, 4) =  82.1
    (2, 3) = 120.9
    (2, 4) =  77.3
    (3, 4) =  14.4
    
    closest to furtherest distances
    0 = 17.7, 50.8,  70.0,  97.1
    1 = 17.7, 79.4,  82.1, 121.6
    2 = 70.0, 77.3, 120.9, 121.6
    3 = 14.4, 79.4,  97.1, 120.9
    4 = 14.4, 50.8,  77.3,  82.1 
    """
    Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4])
    (o, d) = reachability_order(Y)
    assert_true(equal_arrays(o, [0, 1, 4, 3, 2]),
                "returns reachability traversal order")
    assert_true(equal_arrays(d, [0, 17.7, 50.8, 14.4, 70.0]),
                "returns reachability distances when traversing points")
    """
    closest to furtherest pairs / distances with core_dists
    0 = 70.0 (1), 70.0 (2), 70.0 (4),  97.1 (3)
    1 = 82.1 (0), 82.1 (3), 82.1 (4),  121.6 (2)
    2 = 77.3 (0), 77.3 (4), 120.9 (3), 121.6 (1)
    3 = 79.4 (1), 79.4 (4), 97.1 (0),  120.9 (2)
    4 = 50.8 (0), 50.8 (3), 77.3 (2),  82.1 (1)
    """
    core_dists = np.array([70.0, 82.1, 77.3, 79.4, 50.8])
    (o, d) = reachability_order(Y, core_dists)
    assert_true(equal_arrays(o, [0, 1, 2, 4, 3]),
                "returns reachability traversal order with core distances")
    assert_true(
        equal_arrays(d, [70.0, 70.0, 70.0, 70.0, 50.8]),
        "returns reachability distances computed using core distances")
Exemple #15
0
        def _test_one_big():
            f2 = np_random.rand(2**10, 50)
            d2 = sp_distance.pdist(f2, metric="euclidean")
            pdist_chunk(f2, filename, chunk_size=int(1e5), metric="euclidean")
            assert_true(
                equal_arrays(np.fromfile(filename, dtype=np.double), d2),
                "computes same distances as unchunked function for a large-ish dataset"
            )

            os.remove(filename)
Exemple #16
0
def test_flatten_nodes():
    #
    """Z describes tree:
        0-------+
        2---+   |-6
        1   |-5-+
        |-4-+
        3
    """
    Z = np.array([[1., 3., 1., 2.],
                  [2., 4., 1., 3.],
                  [0., 5., 2., 4.]])
    
    assert_true(equal_arrays(flatten_nodes(Z),
                             [1, 1, 2]),
                "assigns nodes the indices of direct parent of equal height")
                      
    """Z describes tree:
        5
        |-9-+
        6   |-10-+
        2---+    |
                 |
        1        |-12
        |-8-+    |
        0   |    |
            |-11-+
        3   |
        |-7-+
        4
    """
    Z = np.array([[ 3.,  4., 1., 2.],
                  [ 0.,  1., 2., 2.],
                  [ 5.,  6., 3., 2.],
                  [ 2.,  9., 3., 3.],
                  [ 7.,  8., 3., 4.],
                  [10., 11., 3., 7.]])
    
    assert_true(equal_arrays(flatten_nodes(Z),
                             [0, 1, 5, 5, 5, 5]),
                "assigns nodes the indices of parents and grandparents of equal height")
Exemple #17
0
 def _test_one():
     left = np_random.rand(np_random.random_integers(80, 1000))
     left.sort()
     right = np_random.rand(np_random.random_integers(80, 1000))
     right.sort()
     values = np.concatenate((left, right))
     indices = values.argsort()
     sorted_values = values[indices]
     
     n = np_random.random_integers(80, values.size)
     merged = np.zeros(n, dtype=values.dtype)
     merged_indices = np.zeros(n, dtype=indices.dtype)
     merge(left,
           np.arange(left.size),
           right,
           np.arange(left.size, values.size),
           merged,
           merged_indices,
           )
     assert_true(equal_arrays(sorted_values[:n], merged),
                 "sorts values in output array")
     assert_true(equal_arrays(indices[:n], merged_indices),
                 "writes sorting indices into output indices array")
Exemple #18
0
 def _test_one_small():
     a = np_random.rand(200)
     b = np_random.rand(200)
     b.tofile(infilename)
     a.tofile(outfilename)
     iapply_func_chunk(outfilename,
                       infilename,
                       operator.add,
                       chunk_size=50)
     assert_true(
         equal_arrays(a + b, np.fromfile(outfilename, dtype=a.dtype)),
         "applies add operation in place using disk-stored array")
     os.remove(infilename)
     os.remove(outfilename)
Exemple #19
0
    def _test_one():
        left = np_random.rand(np_random.random_integers(80, 1000))
        left.sort()
        right = np_random.rand(np_random.random_integers(80, 1000))
        right.sort()
        values = np.concatenate((left, right))
        indices = values.argsort()
        sorted_values = values[indices]

        n = np_random.random_integers(80, values.size)
        merged = np.zeros(n, dtype=values.dtype)
        merged_indices = np.zeros(n, dtype=indices.dtype)
        merge(
            left,
            np.arange(left.size),
            right,
            np.arange(left.size, values.size),
            merged,
            merged_indices,
        )
        assert_true(equal_arrays(sorted_values[:n], merged),
                    "sorts values in output array")
        assert_true(equal_arrays(indices[:n], merged_indices),
                    "writes sorting indices into output indices array")
Exemple #20
0
        def _test_one_small():
            d1 = np_random.rand(190).astype(np.double)
            d1.tofile(dist_file)
            x1 = argrank_chunk(dist_file, indices_file, chunk_size=40)
            assert_true(equal_arrays(x1, argrank(d1, axis=None)),
                        "returns equal ranks to non-chunked function")

            d1.tofile(dist_file)
            w2 = np_random.rand(190).astype(np.double)
            x2 = argrank_chunk(dist_file,
                               indices_file,
                               weight_fun=lambda i: w2[i],
                               chunk_size=40)
            assert_true(
                almost_equal_arrays(
                    x2, argrank(d1, weight_fun=lambda i: w2[i], axis=None)),
                "correctly weights ranks when passed a weight function")
            os.remove(dist_file)
            os.remove(indices_file)
Exemple #21
0
def test_fcluster_merge():
    #
    """Z describes tree:
        0-------+
        2---+   |-6
        1   |-5-+
        |-4-+
        3
    """
    Z = np.array([[1., 3., 1., 2.],
                  [2., 4., 1., 3.],
                  [0., 5., 2., 4.]])
                  
    """Assign merges:
        0-----------+
        2-----+     |-6:0
        1     |-5:0-+
        |-4:1-+
        3    
    """
    (T, M) = fcluster_merge(Z,
                            [True, False, False],
                            return_nodes=True)
    assert_true(equal_arrays(M, [0, 4, 2, 4]),
                "returns cluster roots for skewed tree")
    assert_true(is_isomorphic(T, [1, 2, 3, 2]),
                "computes flat cluster indices for skewed tree")
                                
    """Assign merges:
        0-----------+
        2-----+     |-6:0
        1     |-5:1-+
        |-4:0-+
        3    
    """
    (T, M) = fcluster_merge(Z,
                            [False, True, False],
                            return_nodes=True)
    assert_true(equal_arrays(M, [0, 5, 5, 5]),
                "`fcluster_merge` returns cluster roots for skewed tree with "
                "large valued internal coefficient")
    assert_true(is_isomorphic(T, [1, 2, 2, 2]),
                "`fcluster_merge` computes flat cluster indices for skewed "
                "tree with large valued internal coefficient")  
                                              
    """Assign merges:
        0-----------+
        2-----+     |-6:0
        1     |-5:0-+
        |-4:1-+
        3    
    """
    (T, M) = fcluster_merge(Z,
                            [True, False, False],
                            return_nodes=True)
    assert_true(equal_arrays(M, [0, 4, 2, 4]),
                "returns cluster roots for skewed tree with lower valued "
                "internal coefficient")
    assert_true(is_isomorphic(T, [1, 2, 3, 2]),
               "returns flat cluster indices for skewed tree with lower "
               "valued internal coefficient")
                      
                      
    """Z describes tree:
        0
        |---7---+
        1       |
                |-8
        2---+   |
        3   |-6-+
        |-5-+
        4
    """
    Z = np.array([[3., 4., 1., 2.],
                  [2., 5., 1., 3.],
                  [0., 1., 3., 2.],
                  [6., 7., 4., 5.]])
                  
    """Assign merges:
        0
        |----7:1----+
        1           |
                    |-8:0
        2-----+     |
        3     |-6:1-+
        |-5:1-+
        4
    """
    (T, M) = fcluster_merge(Z,
                            [True, True, True, False],
                            return_nodes=True)
    assert_true(equal_arrays(M, [7, 7, 6, 6, 6]),
                "computes cluster roots for balanced tree")
    assert_true(is_isomorphic(T, [1, 1, 2, 2, 2]),
                "computes flat cluster indices for balanced tree")
                        
    """Assign merges:
        0
        |----7:0----+
        1           |
                    |-8:0
        2-----+     |
        3     |-6:1-+
        |-5:0-+
        4
    """                    
    (T, M) = fcluster_merge(Z,
                            [False, True, False, False],
                            return_nodes=True)
    assert_true(equal_arrays(M, [0, 1, 6, 6, 6]),
                "returns cluster roots for balanced tree with singleton and "
                "non-singleton clusters")
    assert_true(is_isomorphic(T, [1, 2, 3, 3, 3]),
                "computes flat cluster indices for balanced tree with "
                "singleton and non-singleton clusters")
Exemple #22
0
def test_linkage_from_reachability():
    #
    """
    Y encodes weighted distances for pairs:
    (0, 1) =  17.7
    (0, 2) =  70.0
    (0, 3) =  97.1
    (0, 4) =  50.8
    (1, 2) = 121.6
    (1, 3) =  79.4
    (1, 4) =  82.1
    (2, 3) = 120.9
    (2, 4) =  77.3
    (3, 4) =  14.4
    
    Corresponding tree is:
        0
        |-6-+
        1   |
            |-7-+
        3   |   |
        |-5-+   |-8
        4       |
        2-------+
    """
    
    Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4])
    Z = np.array([[3., 4., 14.4, 2.],
                  [0., 1., 17.7, 2.],
                  [5., 6., 50.8, 4.],
                  [2., 7., 70.0, 5.]])
    
    (o, d) = distance.reachability_order(Y)
    assert_true(equal_arrays(linkage_from_reachability(d), permute_obs(Z, o)),
                "returns linkage corresponding to reachability ordering")
                        
    
    """
    Y encodes weighted distances for pairs:
    (0, 1) =  2
    (0, 2) =  9
    (0, 3) =  3
    (0, 4) =  5
    (0, 5) = 18
    (0, 6) =  7
    (1, 2) = 13
    (1, 3) =  4
    (1, 4) =  4
    (1, 5) =  4
    (1, 6) =  3
    (2, 3) =  9
    (2, 4) =  8
    (2, 5) =  3
    (2, 6) =  5
    (3, 4) =  1
    (3, 5) = 10
    (3, 6) =  9
    (4, 5) = 12
    (4, 6) = 11
    (5, 6) =  3
    
    Corresponding tree:
        5
        |-9-+
        6   |-10-+
        2---+    |
                 |
        1        |-12
        |-8-+    |
        0   |    |
            |-11-+
        3   |
        |-7-+
        4
    """
    Y = np.array([2., 9., 3., 5., 18., 7., 13., 4., 4., 4., 3., 9., 8., 3., 5., 1., 10., 9., 12., 11., 3.])
    Z = np.array([[ 3.,  4., 1., 2.],
                  [ 0.,  1., 2., 2.],
                  [ 5.,  6., 3., 2.],
                  [ 2.,  9., 3., 3.],
                  [ 7.,  8., 3., 4.],
                  [10., 11., 3., 7.]])
                  
    (o, d) = distance.reachability_order(Y)
    assert_true(equal_arrays(linkage_from_reachability(d)[:, 2],
                             Z[:, 2]),
                "returns linkage with correct heights for a moderately complex "
                "hierarchy")
Exemple #23
0
def test_maxscoresbelow():
    #
    """Z describes tree:
        0-------+
        2---+   |-6
        1   |-5-+
        |-4-+
        3
    """
    Z = np.array([[1., 3., 1., 2.],
                  [2., 4., 1., 3.],
                  [0., 5., 2., 4.]])
                  
    """Assign coefficients:
        0:1---------+
        2:0---+     |-6:0
        1:1   |-5:0-+
        |-4:1-+
        3:0    
    """ 
    assert_true(equal_arrays(maxscoresbelow(Z, [1, 1, 0, 0, 1, 0, 0], np.maximum),
                             [1, 1, 1]),
                "returns maximum coefficients for skewed tree")
                                
    """Assign coefficients:
        0:0---------+
        2:1---+     |-6:0
        1:1   |-5:2-+
        |-4:0-+
        3:0   
    """
    assert_true(equal_arrays(maxscoresbelow(Z, [0, 1, 1, 0, 0, 2, 0], np.maximum),
                             [1, 1, 2]),
                "returns maximum coefficients for skewed tree with large valued "
                "internal coefficient")
                                              
    """Assign coefficients:
        0:0---------+
        2:0---+     |-6:0
        1:0   |-5:1-+
        |-4:2-+
        3:0    
    """
    assert_true(equal_arrays(maxscoresbelow(Z, [0, 0, 0, 0, 2, 1, 0], np.add),
                             [0, 2, 2]),
                "returns maximum coefficients for skewed tree with lower "
                "valued internal coefficient")
                      
                      
    """Z describes tree:
        0
        |---7---+
        1       |
                |-8
        2---+   |
        3   |-6-+
        |-5-+
        4
    """
    Z = np.array([[3., 4., 1., 2.],
                  [2., 5., 1., 3.],
                  [0., 1., 3., 2.],
                  [6., 7., 4., 5.]])
                  
    """Assign coefficients:
        0:0
        |----7:1----+
        1:1         |
                    |-8:1
        2:0---+     |
        3:2   |-6:2-+
        |-5:2-+
        4:0
    """
    assert_true(equal_arrays(maxscoresbelow(Z, [0, 1, 0, 2, 0, 2, 2, 1, 1], np.maximum),
                             [2, 2, 1, 2]),
                "computes maximum coefficients for a balanced tree")
                        
    """Assign coefficients:
        0:1
        |----7:0----+
        1:1         |
                    |-8:0
        2:1---+     |
        3:1   |-6:5-+
        |-5:0-+
        4:2
    """
    assert_true(equal_arrays(maxscoresbelow(Z, [1, 1, 1, 1, 2, 0, 5, 0, 0], np.add),
                             [3, 4, 2, 7]),
                "returns maximum coefficients for balanced tree with singleton "
                "and non-singleton clusters")
                                        
    """Assign coefficients:
        0:1
        |----7:0----+
        1:1         |
                    |-8:0
        2:2---+     |
        3:1   |-6:0-+
        |-5:0-+
        4:2
    """
    assert_true(equal_arrays(maxscoresbelow(Z, [1, 1, 2, 1, 2, 0, 0, 0, 0], operator.add),
                             [3, 5, 2, 7]),
                "returns cumulative sum of leaf values with only zero interal "
                "coefficients")
Exemple #24
0
def test_core_distance():
    """
    Y encodes distances for pairs:
    (0, 1) =  2.2
    (0, 2) =  7.2
    (0, 3) = 10.4
    (0, 4) =  6.7
    (1, 2) = 12.8
    (1, 3) =  8.6
    (1, 4) =  8.9
    (2, 3) = 12.7
    (2, 4) =  8.6
    (3, 4) =  2.2
    
    closest to furtherest distances
    0 = 2.2, 6.7,  7.2, 10.4
    1 = 2.2, 8.6,  8.9, 12.8
    2 = 7.2, 8.6, 12.7, 12.8
    3 = 2.2, 8.6, 10.4, 12.7
    4 = 2.2, 6.7,  8.6,  8.9
    """
    Y = np.array([2.2, 7.2, 10.4, 6.7, 12.8, 8.6, 8.9, 12.7, 8.6, 2.2])
    n = sp_distance.num_obs_y(Y)

    assert_true(
        equal_arrays(core_distance(Y, minPts=1), [2.2, 2.2, 7.2, 2.2, 2.2]),
        "returns nearest neighbour distance with minPts=1")
    assert_true(
        equal_arrays(
            core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[1] * n),
            [2.2, 2.2, 7.2, 2.2, 2.2]),
        "returns nearest neighbour distance with unit weights and minWts")

    assert_true(
        equal_arrays(core_distance(Y, minPts=2), [6.7, 8.6, 8.6, 8.6, 6.7]),
        "returns 2-nearest neighbour distance with minPts=2")

    assert_true(
        equal_arrays(core_distance(Y, minPts=4),
                     [10.4, 12.8, 12.8, 12.7, 8.9]),
        "returns distance to 4-nearest neighbour with minPts=4")
    assert_true(
        equal_arrays(
            core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[4] * n),
            [10.4, 12.8, 12.8, 12.7, 8.9]),
        "returns distance to 4-nearest neighbour distance with unit "
        "weights and minWts=4")
    """
    Y encodes weighted distances for pairs:
    (0, 1) =  17.7
    (0, 2) =  70.0
    (0, 3) =  97.1
    (0, 4) =  50.8
    (1, 2) = 121.6
    (1, 3) =  79.4
    (1, 4) =  82.1
    (2, 3) = 120.9
    (2, 4) =  77.3
    (3, 4) =  14.4
    
    w encodes pairwise weights:
    (0, 1) =  4
    (0, 2) =  8
    (0, 3) =  6
    (0, 4) = 10
    (1, 2) =  6
    (1, 3) =  6
    (1, 4) = 10
    (2, 3) = 12
    (2, 4) = 20
    (3, 4) = 15
    
    cumulative weights
    0 =  4, 14, 22, 28
    1 =  4, 10, 20, 26
    2 =  8, 28, 36, 42
    3 = 15, 21, 27, 39
    4 = 15, 25, 45, 55
    
    closest to furtherest distances
    0 = 17.7, 50.8,  70.0,  97.1
    1 = 17.7, 79.4,  82.1, 121.6
    2 = 70.0, 77.3, 120.9, 121.6
    3 = 14.4, 79.4,  97.1, 120.9
    4 = 14.4, 50.8,  77.3,  82.1  
    """
    Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4])
    w = np.array([4, 8, 6, 10, 6, 6, 10, 12, 20, 15])
    n = sp_distance.num_obs_y(Y)

    assert_true(
        equal_arrays(
            core_distance(Y,
                          weight_fun=lambda i, j: w[condensed_index(n, i, j)],
                          minWt=[20] * n), [70.0, 82.1, 77.3, 79.4, 50.8])
        and equal_arrays(
            core_distance(Y,
                          weight_fun=lambda i, j: w[condensed_index(n, i, j)],
                          minWt=[30] * n), [97.1, 121.6, 120.9, 120.9, 77.3]),
        "computes weighted core distances at various limits")