def test_clusters_b(self): root = mtree.create_tree([[[0, [1, 2]], [3, [4, 5, 6]]],7]) root = center_and_sort_tree(root) clusters = get_clusters(root) observed = set(frozenset(x) for x in clusters) expected = set(frozenset(x) for x in [[1,2],[0,1,2],[4,5,6],[0,1,2,7],[3,4,5,6]]) self.assertEqual(expected, observed)
def test_dendrogram_imager(self): filename = 'dendrogram-test.png' root = mtree.create_tree([[0, [1, 2]], 3, [4, 5, 6]]) imager = DendrogramImager(root) fout = open(filename, 'wb') imager.im.save(fout) fout.close()
def test_dendrogram(self): root = mtree.create_tree([[0, [1, 2]], 3, [4, 5, 6]]) # make a tall tree observed_tall_art = str(AsciiArt(root, draw_tall_dendrogram)) self.assertEqual(observed_tall_art, g_expected_tall_art) # make a short tree observed_short_art = str(AsciiArt(root)) self.assertEqual(observed_short_art, g_expected_short_art)
def test_heatmap_with_dendrogram(self): root = mtree.create_tree([[0, [1, 2]], 3, [4, 5, 6]]) M = np.random.random((7, 7)) R = np.corrcoef(M) # draw the correlation heatmap filename = 'r-test.png' f = gradient.correlation_to_rgb get_heatmap_with_dendrogram(R, root, f, filename) # draw the squared correlation heatmap filename = 'rr-test.png' RoR = R*R f = gradient.squared_correlation_to_rgb get_heatmap_with_dendrogram(RoR, root, f, filename)
import mtree example_hash_list = [ "aa", "bb", "cc", "dd", "ee", "11", "22", "33", "44", "55" "66" ] print(mtree.create_tree(example_hash_list))
def test_center_and_sort_tree(self): root = mtree.create_tree([[[0, [1, 2]], 3, [4, 5, 6]],7]) root = center_and_sort_tree(root) expected = set(frozenset(x) for x in [[0,1,2],[3],[7],[4,5,6]]) observed = set(frozenset(get_label_set(c)) for c in root.children) self.assertEqual(expected, observed)
def test_id_to_nlabels(self): root = mtree.create_tree([[[0, [1, 2]], 3, [4, 5, 6]],7]) id_to_nlabels = build_id_to_nlabels(root, {}) self.assertEqual(id_to_nlabels[id(root)], 8) child_nlabels = [id_to_nlabels[id(child)] for child in root.children] self.assertEqual(sum(child_nlabels), 8)
def build_tree_helper(boxed_U_in, S_in, ordered_labels, tree_data): """ Get the root of an mtree reconstructed from the transformed data. The input matrix U will be freed (deleted) by this function. @param boxed_U_in: part of the laplacian sqrt obtained by svd @param S_in: another part of the laplacian sqrt obtained by svd @param ordered_labels: a list of labels conformant with rows of U @param tree_data: state whose scope is the construction of the tree @return: an mtree rooted at a degree 2 vertex unless the input matrix has 3 rows """ # take U_in out of the box if len(boxed_U_in) != 1: raise ValueError('expected a 2d array as the only element of a list') U_in = boxed_U_in[0] shape = U_in.shape if len(shape) != 2: raise valueError('expected a 2d array as the only element of a list') p, n = shape if p < 3 or n < 3: raise ValueError('expected the input matrix to have at least three rows and columns') # look for an informative split index_split = None if p > 3: # the signs of v match the signs of the fiedler vector v = khorr.get_fiedler_vector(U_in, S_in) index_split = splitbuilder.eigenvector_to_split(v) # if the split is degenerate then don't use it if min(len(x) for x in index_split) < 2: index_split = None # if no informative split was found then create a degenerate tree if not index_split: root = mtree.create_tree(ordered_labels) for node in root.preorder(): if node.has_label(): tree_data.add_node(node) return root # get the indices defined by the split a, b = tuple(list(sorted(x)) for x in index_split) # Create two new matrices. # Be somewhat careful to not create lots of intermediate matrices A = np.zeros((len(a)+1, n)) B = np.zeros((len(b)+1, n)) for i, index in enumerate(a): A[i] = U_in[index] * S_in for i, index in enumerate(b): B[i] = U_in[index] * S_in A_outgroup = np.sum(B, 0) B_outgroup = np.sum(A, 0) A[-1] = A_outgroup B[-1] = B_outgroup # delete the two references to the old matrix del U_in del boxed_U_in[0] # recursively construct the subtrees subtrees = [] stack = [[b,a,B], [a,b,A]] # delete non-stack references to partial matrices del A del B # process the partial matrices while stack: selection, complement, summed_L_sqrt = stack.pop() # record the outgroup label for this subtree outgroup_label = tree_data.decrement_outgroup_label() # create the ordered list of labels corresponding to leaves of the subtree next_ordered_labels = [ordered_labels[i] for i in selection] next_ordered_labels.append(outgroup_label) # get the criterion matrix for the next iteration U, S, VT = np.linalg.svd(summed_L_sqrt, full_matrices=0) del VT # delete matrices that are no longer useful del summed_L_sqrt # build the tree recursively boxed_U = [U] del U root = build_tree_helper(boxed_U, S, next_ordered_labels, tree_data) # if the root is degree 2 then remove the root node if root.degree() == 2: root = root.remove() # root the tree at the outgroup node root = tree_data.label_to_node[outgroup_label] root.reroot() # we don't need the outgroup label anymore tree_data.remove_node(root) # we can also remove the label from the outgroup node itself root.label = None # save the properly rooted subtree subtrees.append(root) # connect the two subtrees at their roots left_root, right_root = subtrees right_root = right_root.remove() left_root.add_child(right_root) return left_root