def test_embed_nodes(): # """Z describes tree (embedded nodes in parentheses): 0----------+ (2)---+ |-6 1 |-(5)-+ |-4--+ (3) """ Z = np.array([[1., 3., 1., 2.], [2., 4., 2., 3.], [0., 5., 3., 4.]]) assert_true(equal_arrays(embed_nodes(Z, [2, 3]), [3, 5, 5]), "assigns nodes the first embedded descendent") """Z describes tree (embedded nodes in parentheses): (0)-------+ (2)---+ |-(6) 1 |-5-+ |-4--+ 3 """ Z = np.array([[1., 3., 1., 2.], [2., 4., 2., 3.], [0., 5., 3., 4.]]) assert_true(equal_arrays(embed_nodes(Z, [0, 2]), [-1, 2, 6]), "assign -1 for unembedded nodes")
def test_ancestors(): # """Z describes tree 0 |---7---+ 1 | | 2---+ |-8 | | 3 |-6-+ |-5-+ 4 """ Z = np.array([[3., 4., 1., 2.], [2., 5., 1., 3.], [0., 1., 3., 2.], [6., 7., 4., 5.]]) assert_true(equal_arrays(ancestors(Z, range(5)), [5, 6, 7, 8]), "returns ancestors of all leaf clusters") assert_true(equal_arrays(ancestors(Z, [1]), [7, 8]), "returns ancestors of a single leaf cluster") assert_true(equal_arrays(ancestors(Z, [5, 6, 8]), [6, 8]), "returns union of ancestors for a path of nodes") assert_true(equal_arrays(ancestors(Z, [5, 6, 8], inclusive=True), [5, 6, 8]), "returns union of path nodes including nodes themselves when " "`inclusive` flag is set")
def test_argrank(): v1 = [5, 3, 4, 8] o1 = [3, 1, 2, 4] print _fractional_rank(v1), o1 assert_true(equal_arrays(_fractional_rank(v1), o1), "returns integer rank of values in one-dimensional array") x1 = np.array(v1) _ifractional_rank(x1) assert_true(equal_arrays(x1, o1), "loads array ranks into input array") v2 = [5, 3, 8, 8] o2 = [2, 1, 3.5, 3.5] assert_true(equal_arrays(_fractional_rank(v2), o2), "returns mean of tied ranks") x2 = np.array(v2, dtype=np.double) _ifractional_rank(x2) assert_true(equal_arrays(x2, o2), "loads array ranks with tied means into input array") v3 = np.array([[1, 10, 5, 2], [1, 4, 6, 2], [5, 5, 3, 10]]) o3 = np.array([[1, 4, 3, 2], [1, 3, 4, 2], [2.5, 2.5, 1, 4]]) assert_true(equal_arrays(argrank(v3, axis=1), o3), "with `axis=1` passed returns ranks along rows of 2D array") assert_true( equal_arrays(argrank(v3.T, axis=0), o3.T), "with `axis=0` passed returns ranks along columns of 2D array") v4 = np.asarray([5, 3, 4, 8]) w4 = np.asarray([2, 2, 1, 3]) v4_dup = np.asarray([5, 3, 4, 8, 5, 3, 8, 8]) assert_true( equal_arrays(_fractional_rank(v4, weight_fun=lambda i: w4[i]), _fractional_rank(v4_dup)[:4]), "returns weighted ranks when weights parameter is passed") x4 = np.array(v4, dtype=np.double) _ifractional_rank(x4, weight_fun=lambda i: w4[i]) assert_true(equal_arrays(x4, _fractional_rank(v4_dup)[:4]), "returns idential ranks to non-mutating methods") v5 = np.array([[1, 10, 5, 2], [1, 4, 6, 2]]) w5 = np.asarray([5, 1, 1, 6]) v5_dup = np.array([[1, 10, 5, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2], [1, 4, 6, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2]]) assert_true( equal_arrays(argrank(v5, weight_fun=lambda i: w5[i], axis=1), argrank(v5_dup, axis=1)[:, :4]), "broadcasts weights vector along rows of 2D array when ranking columns" ) v6 = np.array([[1, 10, 5, 2], [1, 4, 6, 2]]) w6 = np.asarray([2, 5]) v6_dup = np.array([[1, 10, 5, 2], [1, 4, 6, 2], [1, 10, 5, 2], [1, 4, 6, 2], [1, 4, 6, 2], [1, 4, 6, 2], [1, 4, 6, 2]]) assert_true( equal_arrays(argrank(v6, weight_fun=lambda i: w6[i], axis=0), argrank(v6_dup, axis=0)[:2]), "broadcasts weights vector along columns of 2D array when ranking rows" )
def _test_one_small(): d1 = np_random.rand(190).astype(np.double) d1.tofile(infile) i1 = d1.argsort() argsort_chunk_mergesort(infile, outfile, chunk_size=30) assert_true(equal_arrays(np.fromfile(outfile, dtype=np.int), i1), "sorted indices are stored in output file") assert_true( equal_arrays(np.fromfile(infile, dtype=np.double), d1[i1]), "input file values are in sorted order") os.remove(infile) os.remove(outfile)
def test_squareform_coords(): n = random.randint(3, 10) m = n * (n - 1) // 2 (ri, ci) = pairs(n) (squareform_i, squareform_j) = squareform_coords(n, range(m)) print squareform_i, ri print squareform_j, ci assert_true( equal_arrays(squareform_i, ri) and equal_arrays(squareform_j, ci), "compute row and column indices for elements of condensed matrix") assert_true(np.all(squareform_i <= squareform_j), "returns upper triangular coordinates")
def test_ClusterQualityEngine(): # """ The tree used for the following tests is represented below: 0---+ 2-+ |-6 1 |-5 |-4 3 """ Z = np.array([[1., 3., 1., 2.], [2., 4., 1., 3.], [0., 5., 2., 4.]]) """Test that a Quality Engine using max with a single leaf data point will propagate score to ancestors. The following data is assigned to leaves: 0:[1]--+ 2:[]-+ |-6 1:[] |-5 |-4--+ 3:[] """ qe1 = QualityEngineTester({0: [1]}, max) assert_true(equal_arrays(qe1.makeScores(Z), [1, 0, 0, 0, 0, 0, 1]), "computes max of a single leaf data point for all ancestors") """Test that a Quality Engine using len and a pair of leaves with data will combined leaf data and propagate to ancestors. The following data is assinged to leaves: 0:[]------+ 2:["a"]-+ |-6 1:["x"] |-5 |-4-----+ 3:[] """ qe2 = QualityEngineTester({1: ["a"], 2: ["x"]}, len) assert_true( equal_arrays(qe2.makeScores(Z), [0, 1, 1, 0, 1, 2, 2]), "computes length of leaf data in case of pair of leaf data points") """Test that a Quality Engine using a range measure will compute the range of data from the full set of leaves. Leaf data assigned as follows: 0:[]------+ 2:[1]---+ |-6 1:[2,2] |-5 |-4-----+ 3:[1,0] """ qe3 = QualityEngineTester({ 1: [2, 2], 2: [1], 3: [1, 0] }, lambda x: max(x) - min(x)) assert_true(equal_arrays(qe3.makeScores(Z), [0, 0, 0, 1, 2, 2, 2]), "computes non-trivial function of combined leaf data points")
def test_greedy_clique_by_elimination(): # C = np.array([[True, True, False], [True, True, False], [False, False, True]]) # 0, 1 in clique node_perm = np_random.permutation(3) C_perm = C[np.ix_(node_perm, node_perm)] indices_perm = np.empty(3, dtype=int) indices_perm[node_perm] = np.arange(3) assert_true( equal_arrays(greedy_clique_by_elimination(C_perm), np.sort(indices_perm[:2])), "returns indices of clique") # two cliques with n-1 connecting edges C = np.array([[True, True, True, False, True, True], [True, True, True, True, False, True], [True, True, True, True, True, False], [False, True, True, True, True, True], [True, False, True, True, True, True], [True, True, False, True, True, True]]) # 0, 1, 2 and 3, 4, 5 cliques node_perm = np_random.permutation(6) C_perm = C[np.ix_(node_perm, node_perm)] indices_perm = np.empty(6, dtype=int) indices_perm[node_perm] = np.arange(6) assert_true( len(greedy_clique_by_elimination(C_perm)) == 3, "computes correct clique size for two highly connected equal sized cliques" ) # two cliques with universally connected link node C = np.array([[True, True, True, True, False, False], [True, True, True, True, False, False], [True, True, True, True, False, False], [True, True, True, True, True, True], [False, False, False, True, True, True], [False, False, False, True, True, True]]) #0, 1, 2, 3 and 3, 4, 5 cliques node_perm = np_random.permutation(6) C_perm = C[np.ix_(node_perm, node_perm)] indices_perm = np.empty(6, dtype=int) indices_perm[node_perm] = np.arange(6) assert_true( equal_arrays(greedy_clique_by_elimination(C_perm), np.sort(indices_perm[:4])), "computes the larger of two overlapping cliques")
def test_condensed_index(): n = random.randint(3, 10) m = n * (n - 1) // 2 (ri, ci) = pairs(n) assert_true( equal_arrays(ri, [i for i in range(n - 1) for _ in range(i + 1, n)]) and equal_arrays(ci, [j for i in range(n - 1) for j in range(i + 1, n)]), "generate pairs of square coordinates") condensed_indices = condensed_index(n, ri, ci) assert_true(equal_arrays(condensed_indices, np.arange(m)), "compute linear index of condensed distance matrix") assert_true(equal_arrays(condensed_indices, condensed_index(n, ci, ri)), "computes linear index correctly when row < col")
def test_parse(self): (table, taxons) = self._ce.parse([ "d__Archaea; p__Euryarchaeota; c__Methanococci; o__Methanococcales; f__Methanococcaceae; g__Methanococcus", "d__Archaea; p__Euryarchaeota; c__Methanococci; o__Methanococcales; f__Methanococcaceae; g__Methanococcus", "d__Archaea; p__Euryarchaeota; c__Thermococci; o__Thermococcales; f__Thermococcaceae; g__Pyrococcus", "d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales; f__Burkholderiaceae; g__Burkholderia", "d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales", "d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Nitrosomonadales; f__Nitrosomonadaceae; g__Nitrosomonas", ]) assert_true(equal_arrays(taxons[table[0]], ["Archaea", "Euryarchaeota", "Methanococci", "Methanococcales", "Methanococcaceae", "Methanococcus", ""]), "returns table of taxon tag indices and array of tags") #Pairwise distances: #(Methanococcus, Methanococcus2): 0 (=Species) assert_true(self._ce.getDistance(table[0], table[1])==0, "0 distance between two equal classifications") #(Methanococcus, Pyrococcus): 5 (=Phylum) assert_true(self._ce.getDistance(table[0], table[2])==5, "5 distance between classifications equal up to phylum level") #(Methanococcus, Burkholderia): 7 (=Root) assert_true(self._ce.getDistance(table[0], table[3])==7, "7 distance between classifications equal only at root level") #(Burkholderia, Burkholderiales): 0 (=Species) assert_true(self._ce.getDistance(table[3], table[4])==0, "0 distance between classifications equal at all defined levels") #(Burkholderiales, Nitrosomonas): 4 (=Class) assert_true(self._ce.getDistance(table[4], table[5])==4, "4 distance between classifications equal up to class level")
def _test_one_small(): f1 = np_random.rand(20, 50) d1 = sp_distance.pdist(f1, metric="euclidean") pdist_chunk(f1, filename, chunk_size=30, metric="euclidean") assert_true( equal_arrays(np.fromfile(filename, dtype=np.double), d1), "computes same distances as unchunked function") os.remove(filename)
def test_parse_taxstring(self): assert_true(equal_arrays(self._ce.parse_taxstring("d__Archaea; p__Euryarchaeota; c__Methanococci; o__Methanococcales; f__Methanococcaceae; g__Methanococcus"), ["Archaea", "Euryarchaeota", "Methanococci", "Methanococcales", "Methanococcaceae", "Methanococcus"]), "returns array of parsed taxonomic ranks") assert_true(equal_arrays(self._ce.parse_taxstring("d__Bacteria; p__Proteobacteria; c__Betaproteobacteria; o__Burkholderiales"), ["Bacteria", "Proteobacteria", "Betaproteobacteria", "Burkholderiales"]), "returns array of parsed taxonomic ranks defined to order level") assert_true(equal_arrays(self._ce.parse_taxstring("Root; d__Bacteria; p__Proteobacteria"), ["Bacteria", "Proteobacteria"]), "returned array ignores initial 'Root' rank") assert_true(equal_arrays(self._ce.parse_taxstring("d__Bacteria; p__Proteobacteria;"), ["Bacteria", "Proteobacteria"]), "returned array ignores trailing semi-colon") assert_true(equal_arrays(self._ce.parse_taxstring("Root;d__Bacteria;p__Proteobacteria"), ["Bacteria", "Proteobacteria"]), "parses taxonomic string without spaces after semi-colon separator") assert_true(equal_arrays(self._ce.parse_taxstring("d__Bacteria; c__Betaproteobacteria; o__Burkholderiales"), ["Bacteria"]), "stops parsing when a bad tag is encountered") assert_true(equal_arrays(self._ce.parse_taxstring("Bacteria; Proteobacteria; Betaproteobacteria; Burkholderiales"), ["Bacteria", "Proteobacteria", "Betaproteobacteria", "Burkholderiales"]), "parses taxonomic string without rank tags")
def test_greedy_clique_by_elimination(): # C = np.array([[True , True , False], [True , True , False], [False, False, True ]]) # 0, 1 in clique node_perm = np_random.permutation(3) C_perm = C[np.ix_(node_perm, node_perm)] indices_perm = np.empty(3, dtype=int) indices_perm[node_perm] = np.arange(3) assert_true(equal_arrays(greedy_clique_by_elimination(C_perm), np.sort(indices_perm[:2])), "returns indices of clique") # two cliques with n-1 connecting edges C = np.array([[True , True , True , False, True , True ], [True , True , True , True , False, True ], [True , True , True , True , True , False], [False, True , True , True , True , True ], [True , False, True , True , True , True ], [True , True , False, True , True , True ]]) # 0, 1, 2 and 3, 4, 5 cliques node_perm = np_random.permutation(6) C_perm = C[np.ix_(node_perm, node_perm)] indices_perm = np.empty(6, dtype=int) indices_perm[node_perm] = np.arange(6) assert_true(len(greedy_clique_by_elimination(C_perm)) == 3, "computes correct clique size for two highly connected equal sized cliques") # two cliques with universally connected link node C = np.array([[True , True , True , True , False, False], [True , True , True , True , False, False], [True , True , True , True , False, False], [True , True , True , True , True , True ], [False, False, False, True , True , True ], [False, False, False, True , True , True ]]) #0, 1, 2, 3 and 3, 4, 5 cliques node_perm = np_random.permutation(6) C_perm = C[np.ix_(node_perm, node_perm)] indices_perm = np.empty(6, dtype=int) indices_perm[node_perm] = np.arange(6) assert_true(equal_arrays(greedy_clique_by_elimination(C_perm), np.sort(indices_perm[:4])), "computes the larger of two overlapping cliques")
def _test_one_big(): d2 = np.arange(2**9 * (2**10 - 1)) np_random.shuffle(d2) d2.tofile(dist_file) #i2.tofile(indices_file) x3 = argrank_chunk(dist_file, indices_file, chunk_size=int(1e5)) assert_true(equal_arrays(x3, d2 + 1), "computes ranks of a large-ish permutation array") os.remove(dist_file) os.remove(indices_file)
def test_reachability_order(): # """ Y encodes weighted distances for pairs: (0, 1) = 17.7 (0, 2) = 70.0 (0, 3) = 97.1 (0, 4) = 50.8 (1, 2) = 121.6 (1, 3) = 79.4 (1, 4) = 82.1 (2, 3) = 120.9 (2, 4) = 77.3 (3, 4) = 14.4 closest to furtherest distances 0 = 17.7, 50.8, 70.0, 97.1 1 = 17.7, 79.4, 82.1, 121.6 2 = 70.0, 77.3, 120.9, 121.6 3 = 14.4, 79.4, 97.1, 120.9 4 = 14.4, 50.8, 77.3, 82.1 """ Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4]) (o, d) = reachability_order(Y) assert_true(equal_arrays(o, [0, 1, 4, 3, 2]), "returns reachability traversal order") assert_true(equal_arrays(d, [0, 17.7, 50.8, 14.4, 70.0]), "returns reachability distances when traversing points") """ closest to furtherest pairs / distances with core_dists 0 = 70.0 (1), 70.0 (2), 70.0 (4), 97.1 (3) 1 = 82.1 (0), 82.1 (3), 82.1 (4), 121.6 (2) 2 = 77.3 (0), 77.3 (4), 120.9 (3), 121.6 (1) 3 = 79.4 (1), 79.4 (4), 97.1 (0), 120.9 (2) 4 = 50.8 (0), 50.8 (3), 77.3 (2), 82.1 (1) """ core_dists = np.array([70.0, 82.1, 77.3, 79.4, 50.8]) (o, d) = reachability_order(Y, core_dists) assert_true(equal_arrays(o, [0, 1, 2, 4, 3]), "returns reachability traversal order with core distances") assert_true( equal_arrays(d, [70.0, 70.0, 70.0, 70.0, 50.8]), "returns reachability distances computed using core distances")
def _test_one_big(): f2 = np_random.rand(2**10, 50) d2 = sp_distance.pdist(f2, metric="euclidean") pdist_chunk(f2, filename, chunk_size=int(1e5), metric="euclidean") assert_true( equal_arrays(np.fromfile(filename, dtype=np.double), d2), "computes same distances as unchunked function for a large-ish dataset" ) os.remove(filename)
def test_flatten_nodes(): # """Z describes tree: 0-------+ 2---+ |-6 1 |-5-+ |-4-+ 3 """ Z = np.array([[1., 3., 1., 2.], [2., 4., 1., 3.], [0., 5., 2., 4.]]) assert_true(equal_arrays(flatten_nodes(Z), [1, 1, 2]), "assigns nodes the indices of direct parent of equal height") """Z describes tree: 5 |-9-+ 6 |-10-+ 2---+ | | 1 |-12 |-8-+ | 0 | | |-11-+ 3 | |-7-+ 4 """ Z = np.array([[ 3., 4., 1., 2.], [ 0., 1., 2., 2.], [ 5., 6., 3., 2.], [ 2., 9., 3., 3.], [ 7., 8., 3., 4.], [10., 11., 3., 7.]]) assert_true(equal_arrays(flatten_nodes(Z), [0, 1, 5, 5, 5, 5]), "assigns nodes the indices of parents and grandparents of equal height")
def _test_one(): left = np_random.rand(np_random.random_integers(80, 1000)) left.sort() right = np_random.rand(np_random.random_integers(80, 1000)) right.sort() values = np.concatenate((left, right)) indices = values.argsort() sorted_values = values[indices] n = np_random.random_integers(80, values.size) merged = np.zeros(n, dtype=values.dtype) merged_indices = np.zeros(n, dtype=indices.dtype) merge(left, np.arange(left.size), right, np.arange(left.size, values.size), merged, merged_indices, ) assert_true(equal_arrays(sorted_values[:n], merged), "sorts values in output array") assert_true(equal_arrays(indices[:n], merged_indices), "writes sorting indices into output indices array")
def _test_one_small(): a = np_random.rand(200) b = np_random.rand(200) b.tofile(infilename) a.tofile(outfilename) iapply_func_chunk(outfilename, infilename, operator.add, chunk_size=50) assert_true( equal_arrays(a + b, np.fromfile(outfilename, dtype=a.dtype)), "applies add operation in place using disk-stored array") os.remove(infilename) os.remove(outfilename)
def _test_one(): left = np_random.rand(np_random.random_integers(80, 1000)) left.sort() right = np_random.rand(np_random.random_integers(80, 1000)) right.sort() values = np.concatenate((left, right)) indices = values.argsort() sorted_values = values[indices] n = np_random.random_integers(80, values.size) merged = np.zeros(n, dtype=values.dtype) merged_indices = np.zeros(n, dtype=indices.dtype) merge( left, np.arange(left.size), right, np.arange(left.size, values.size), merged, merged_indices, ) assert_true(equal_arrays(sorted_values[:n], merged), "sorts values in output array") assert_true(equal_arrays(indices[:n], merged_indices), "writes sorting indices into output indices array")
def _test_one_small(): d1 = np_random.rand(190).astype(np.double) d1.tofile(dist_file) x1 = argrank_chunk(dist_file, indices_file, chunk_size=40) assert_true(equal_arrays(x1, argrank(d1, axis=None)), "returns equal ranks to non-chunked function") d1.tofile(dist_file) w2 = np_random.rand(190).astype(np.double) x2 = argrank_chunk(dist_file, indices_file, weight_fun=lambda i: w2[i], chunk_size=40) assert_true( almost_equal_arrays( x2, argrank(d1, weight_fun=lambda i: w2[i], axis=None)), "correctly weights ranks when passed a weight function") os.remove(dist_file) os.remove(indices_file)
def test_fcluster_merge(): # """Z describes tree: 0-------+ 2---+ |-6 1 |-5-+ |-4-+ 3 """ Z = np.array([[1., 3., 1., 2.], [2., 4., 1., 3.], [0., 5., 2., 4.]]) """Assign merges: 0-----------+ 2-----+ |-6:0 1 |-5:0-+ |-4:1-+ 3 """ (T, M) = fcluster_merge(Z, [True, False, False], return_nodes=True) assert_true(equal_arrays(M, [0, 4, 2, 4]), "returns cluster roots for skewed tree") assert_true(is_isomorphic(T, [1, 2, 3, 2]), "computes flat cluster indices for skewed tree") """Assign merges: 0-----------+ 2-----+ |-6:0 1 |-5:1-+ |-4:0-+ 3 """ (T, M) = fcluster_merge(Z, [False, True, False], return_nodes=True) assert_true(equal_arrays(M, [0, 5, 5, 5]), "`fcluster_merge` returns cluster roots for skewed tree with " "large valued internal coefficient") assert_true(is_isomorphic(T, [1, 2, 2, 2]), "`fcluster_merge` computes flat cluster indices for skewed " "tree with large valued internal coefficient") """Assign merges: 0-----------+ 2-----+ |-6:0 1 |-5:0-+ |-4:1-+ 3 """ (T, M) = fcluster_merge(Z, [True, False, False], return_nodes=True) assert_true(equal_arrays(M, [0, 4, 2, 4]), "returns cluster roots for skewed tree with lower valued " "internal coefficient") assert_true(is_isomorphic(T, [1, 2, 3, 2]), "returns flat cluster indices for skewed tree with lower " "valued internal coefficient") """Z describes tree: 0 |---7---+ 1 | |-8 2---+ | 3 |-6-+ |-5-+ 4 """ Z = np.array([[3., 4., 1., 2.], [2., 5., 1., 3.], [0., 1., 3., 2.], [6., 7., 4., 5.]]) """Assign merges: 0 |----7:1----+ 1 | |-8:0 2-----+ | 3 |-6:1-+ |-5:1-+ 4 """ (T, M) = fcluster_merge(Z, [True, True, True, False], return_nodes=True) assert_true(equal_arrays(M, [7, 7, 6, 6, 6]), "computes cluster roots for balanced tree") assert_true(is_isomorphic(T, [1, 1, 2, 2, 2]), "computes flat cluster indices for balanced tree") """Assign merges: 0 |----7:0----+ 1 | |-8:0 2-----+ | 3 |-6:1-+ |-5:0-+ 4 """ (T, M) = fcluster_merge(Z, [False, True, False, False], return_nodes=True) assert_true(equal_arrays(M, [0, 1, 6, 6, 6]), "returns cluster roots for balanced tree with singleton and " "non-singleton clusters") assert_true(is_isomorphic(T, [1, 2, 3, 3, 3]), "computes flat cluster indices for balanced tree with " "singleton and non-singleton clusters")
def test_linkage_from_reachability(): # """ Y encodes weighted distances for pairs: (0, 1) = 17.7 (0, 2) = 70.0 (0, 3) = 97.1 (0, 4) = 50.8 (1, 2) = 121.6 (1, 3) = 79.4 (1, 4) = 82.1 (2, 3) = 120.9 (2, 4) = 77.3 (3, 4) = 14.4 Corresponding tree is: 0 |-6-+ 1 | |-7-+ 3 | | |-5-+ |-8 4 | 2-------+ """ Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4]) Z = np.array([[3., 4., 14.4, 2.], [0., 1., 17.7, 2.], [5., 6., 50.8, 4.], [2., 7., 70.0, 5.]]) (o, d) = distance.reachability_order(Y) assert_true(equal_arrays(linkage_from_reachability(d), permute_obs(Z, o)), "returns linkage corresponding to reachability ordering") """ Y encodes weighted distances for pairs: (0, 1) = 2 (0, 2) = 9 (0, 3) = 3 (0, 4) = 5 (0, 5) = 18 (0, 6) = 7 (1, 2) = 13 (1, 3) = 4 (1, 4) = 4 (1, 5) = 4 (1, 6) = 3 (2, 3) = 9 (2, 4) = 8 (2, 5) = 3 (2, 6) = 5 (3, 4) = 1 (3, 5) = 10 (3, 6) = 9 (4, 5) = 12 (4, 6) = 11 (5, 6) = 3 Corresponding tree: 5 |-9-+ 6 |-10-+ 2---+ | | 1 |-12 |-8-+ | 0 | | |-11-+ 3 | |-7-+ 4 """ Y = np.array([2., 9., 3., 5., 18., 7., 13., 4., 4., 4., 3., 9., 8., 3., 5., 1., 10., 9., 12., 11., 3.]) Z = np.array([[ 3., 4., 1., 2.], [ 0., 1., 2., 2.], [ 5., 6., 3., 2.], [ 2., 9., 3., 3.], [ 7., 8., 3., 4.], [10., 11., 3., 7.]]) (o, d) = distance.reachability_order(Y) assert_true(equal_arrays(linkage_from_reachability(d)[:, 2], Z[:, 2]), "returns linkage with correct heights for a moderately complex " "hierarchy")
def test_maxscoresbelow(): # """Z describes tree: 0-------+ 2---+ |-6 1 |-5-+ |-4-+ 3 """ Z = np.array([[1., 3., 1., 2.], [2., 4., 1., 3.], [0., 5., 2., 4.]]) """Assign coefficients: 0:1---------+ 2:0---+ |-6:0 1:1 |-5:0-+ |-4:1-+ 3:0 """ assert_true(equal_arrays(maxscoresbelow(Z, [1, 1, 0, 0, 1, 0, 0], np.maximum), [1, 1, 1]), "returns maximum coefficients for skewed tree") """Assign coefficients: 0:0---------+ 2:1---+ |-6:0 1:1 |-5:2-+ |-4:0-+ 3:0 """ assert_true(equal_arrays(maxscoresbelow(Z, [0, 1, 1, 0, 0, 2, 0], np.maximum), [1, 1, 2]), "returns maximum coefficients for skewed tree with large valued " "internal coefficient") """Assign coefficients: 0:0---------+ 2:0---+ |-6:0 1:0 |-5:1-+ |-4:2-+ 3:0 """ assert_true(equal_arrays(maxscoresbelow(Z, [0, 0, 0, 0, 2, 1, 0], np.add), [0, 2, 2]), "returns maximum coefficients for skewed tree with lower " "valued internal coefficient") """Z describes tree: 0 |---7---+ 1 | |-8 2---+ | 3 |-6-+ |-5-+ 4 """ Z = np.array([[3., 4., 1., 2.], [2., 5., 1., 3.], [0., 1., 3., 2.], [6., 7., 4., 5.]]) """Assign coefficients: 0:0 |----7:1----+ 1:1 | |-8:1 2:0---+ | 3:2 |-6:2-+ |-5:2-+ 4:0 """ assert_true(equal_arrays(maxscoresbelow(Z, [0, 1, 0, 2, 0, 2, 2, 1, 1], np.maximum), [2, 2, 1, 2]), "computes maximum coefficients for a balanced tree") """Assign coefficients: 0:1 |----7:0----+ 1:1 | |-8:0 2:1---+ | 3:1 |-6:5-+ |-5:0-+ 4:2 """ assert_true(equal_arrays(maxscoresbelow(Z, [1, 1, 1, 1, 2, 0, 5, 0, 0], np.add), [3, 4, 2, 7]), "returns maximum coefficients for balanced tree with singleton " "and non-singleton clusters") """Assign coefficients: 0:1 |----7:0----+ 1:1 | |-8:0 2:2---+ | 3:1 |-6:0-+ |-5:0-+ 4:2 """ assert_true(equal_arrays(maxscoresbelow(Z, [1, 1, 2, 1, 2, 0, 0, 0, 0], operator.add), [3, 5, 2, 7]), "returns cumulative sum of leaf values with only zero interal " "coefficients")
def test_core_distance(): """ Y encodes distances for pairs: (0, 1) = 2.2 (0, 2) = 7.2 (0, 3) = 10.4 (0, 4) = 6.7 (1, 2) = 12.8 (1, 3) = 8.6 (1, 4) = 8.9 (2, 3) = 12.7 (2, 4) = 8.6 (3, 4) = 2.2 closest to furtherest distances 0 = 2.2, 6.7, 7.2, 10.4 1 = 2.2, 8.6, 8.9, 12.8 2 = 7.2, 8.6, 12.7, 12.8 3 = 2.2, 8.6, 10.4, 12.7 4 = 2.2, 6.7, 8.6, 8.9 """ Y = np.array([2.2, 7.2, 10.4, 6.7, 12.8, 8.6, 8.9, 12.7, 8.6, 2.2]) n = sp_distance.num_obs_y(Y) assert_true( equal_arrays(core_distance(Y, minPts=1), [2.2, 2.2, 7.2, 2.2, 2.2]), "returns nearest neighbour distance with minPts=1") assert_true( equal_arrays( core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[1] * n), [2.2, 2.2, 7.2, 2.2, 2.2]), "returns nearest neighbour distance with unit weights and minWts") assert_true( equal_arrays(core_distance(Y, minPts=2), [6.7, 8.6, 8.6, 8.6, 6.7]), "returns 2-nearest neighbour distance with minPts=2") assert_true( equal_arrays(core_distance(Y, minPts=4), [10.4, 12.8, 12.8, 12.7, 8.9]), "returns distance to 4-nearest neighbour with minPts=4") assert_true( equal_arrays( core_distance(Y, weight_fun=lambda _i, _j: 1, minWt=[4] * n), [10.4, 12.8, 12.8, 12.7, 8.9]), "returns distance to 4-nearest neighbour distance with unit " "weights and minWts=4") """ Y encodes weighted distances for pairs: (0, 1) = 17.7 (0, 2) = 70.0 (0, 3) = 97.1 (0, 4) = 50.8 (1, 2) = 121.6 (1, 3) = 79.4 (1, 4) = 82.1 (2, 3) = 120.9 (2, 4) = 77.3 (3, 4) = 14.4 w encodes pairwise weights: (0, 1) = 4 (0, 2) = 8 (0, 3) = 6 (0, 4) = 10 (1, 2) = 6 (1, 3) = 6 (1, 4) = 10 (2, 3) = 12 (2, 4) = 20 (3, 4) = 15 cumulative weights 0 = 4, 14, 22, 28 1 = 4, 10, 20, 26 2 = 8, 28, 36, 42 3 = 15, 21, 27, 39 4 = 15, 25, 45, 55 closest to furtherest distances 0 = 17.7, 50.8, 70.0, 97.1 1 = 17.7, 79.4, 82.1, 121.6 2 = 70.0, 77.3, 120.9, 121.6 3 = 14.4, 79.4, 97.1, 120.9 4 = 14.4, 50.8, 77.3, 82.1 """ Y = np.array([17.7, 70., 97.1, 50.8, 121.6, 79.4, 82.1, 120.9, 77.3, 14.4]) w = np.array([4, 8, 6, 10, 6, 6, 10, 12, 20, 15]) n = sp_distance.num_obs_y(Y) assert_true( equal_arrays( core_distance(Y, weight_fun=lambda i, j: w[condensed_index(n, i, j)], minWt=[20] * n), [70.0, 82.1, 77.3, 79.4, 50.8]) and equal_arrays( core_distance(Y, weight_fun=lambda i, j: w[condensed_index(n, i, j)], minWt=[30] * n), [97.1, 121.6, 120.9, 120.9, 77.3]), "computes weighted core distances at various limits")