def get_response_content(fs): # get the set of names selection = Util.get_stripped_lines(StringIO(fs.names)) # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # assert that the name selection is compatible with the tree selected_name_set = set(selection) possible_name_set = set(node.get_name() for node in tree.gen_tips()) extra_names = selected_name_set - possible_name_set if extra_names: msg_a = 'the following selected names ' msg_b = 'are not valid tips: %s' % str(tuple(extra_names)) raise HandlingError(msg_a + msg_b) # get the pruned tree simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree) pruned_tree = get_pruned_tree(simple_tree, selected_name_set) # begin writing the result out = StringIO() trees = (tree, pruned_tree) tree_names = ('the original tree', 'the pruned tree') for tree, tree_name in zip(trees, tree_names): print >> out, 'calculating splits of %s:' % tree_name print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art) # return the response return out.getvalue()
def test_get_split_distance(self): """ Test the function that gets the number of missing nontrivial partitions. """ # define some trees tree_string_a = '((A:1, B:1):1, C:1, (D:1, E:1):1);' tree_string_b = '((A:1, B:1):1, D:1, (C:1, E:1):1);' tree_string_c = '((A:1, D:1):1, C:1, (B:1, E:1):1);' tree_string_d = '((A:1, D:1):1, (C:1, B:1, E:1):1);' tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree) tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree) tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree) tree_d = NewickIO.parse(tree_string_d, FelTree.NewickTree) # the distance from a tree to itself should be zero self.assertEqual(get_split_distance(tree_a, tree_a), 0) self.assertEqual(get_split_distance(tree_b, tree_b), 0) self.assertEqual(get_split_distance(tree_c, tree_c), 0) self.assertEqual(get_split_distance(tree_d, tree_d), 0) # some of the distances are symmetric self.assertEqual(get_split_distance(tree_a, tree_b), 1) self.assertEqual(get_split_distance(tree_b, tree_a), 1) self.assertEqual(get_split_distance(tree_b, tree_c), 2) self.assertEqual(get_split_distance(tree_c, tree_b), 2) self.assertEqual(get_split_distance(tree_a, tree_c), 2) self.assertEqual(get_split_distance(tree_c, tree_a), 2) # it is possible for the distance to be asymmetric if internal nodes are not order 3 self.assertEqual(get_split_distance(tree_a, tree_d), 1) self.assertEqual(get_split_distance(tree_d, tree_a), 2)
def get_response_content(fs): # read the query tree query_tree = NewickIO.parse(fs.query, FelTree.NewickTree) # read the reference tree reference_tree = NewickIO.parse(fs.reference, FelTree.NewickTree) # calculate the loss using the requested loss function if fs.uniform: loss_numerator = TreeComparison.get_split_distance( query_tree, reference_tree) elif fs.weighted: loss_numerator = TreeComparison.get_weighted_split_distance( query_tree, reference_tree) # do the normalization if requested if fs.normalize: if fs.uniform: loss_denominator = float( TreeComparison.get_nontrivial_split_count(reference_tree)) elif fs.weighted: loss_denominator = float( TreeComparison.get_weighted_split_count(reference_tree)) else: loss_denominator = 1 # return the response if loss_denominator: return str(loss_numerator / loss_denominator) + '\n' else: return 'normalization failed\n'
def test_update_generalized_nj_big(self): """ Test the generation of successor distance matrices from a more complicated initial distance matrix. """ # define the initial tree and the two subtrees s_tree_initial = '(((3:9, 2:2):4, 1:2):1, (4:1, 5:3):7, 6:2);' s_tree_a = '((3:9, 2:2):4, 1:2, B:0.5);' s_tree_b = '((4:1, 5:3):7, 6:2, A:0.5);' # Define an ordering of the taxa. # The initial ordering is arbitrary, # and the subsequent orderings are dependent on the initial ordering. taxa_initial = ['1', '4', '2', '5', '3', '6'] taxa_a = ['1', 'B', '2', '3'] taxa_b = ['A', '4', '5', '6'] # Define the distance matrices. D_initial = np.array( NewickIO.parse( s_tree_initial, FelTree.NewickTree).get_distance_matrix(taxa_initial)) D_a = np.array( NewickIO.parse(s_tree_a, FelTree.NewickTree).get_distance_matrix(taxa_a)) D_b = np.array( NewickIO.parse(s_tree_b, FelTree.NewickTree).get_distance_matrix(taxa_b)) # assert that the correct distance matrices are created D_out_a = update_generalized_nj(D_initial, set([1, 3, 5])) D_out_b = update_generalized_nj(D_initial, set([0, 2, 4])) self.assertTrue(np.allclose(D_a, D_out_a)) self.assertTrue(np.allclose(D_b, D_out_b))
def do_distance_analysis(X): # get the matrix of squared distances labels = list("0123") # reconstruct the matrix of Euclidean distances from a tree D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X]) sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels) sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree) sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree) D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels)) # reconstruct the matrix of squared Euclidean distances from a tree D = D_sqrt ** 2 tree = NeighborJoining.make_tree(D, labels) tree_string = NewickIO.get_newick_string(tree) feltree = NewickIO.parse(tree_string, FelTree.NewickTree) D_reconstructed = np.array(feltree.get_distance_matrix(labels)) # start writing out = StringIO() # matrix of Euclidean distances and its reconstruction from a tree print >> out, "matrix of Euclidean distances between tetrahedron vertices:" print >> out, D_sqrt print >> out, "neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):" print >> out, sqrt_tree_string print >> out, "distance matrix implied by this tree:" print >> out, D_sqrt_reconstructed # matrix of squared Euclidean distances and its reconstruction from a tree print >> out, "matrix of squared distances between tetrahedron vertices:" print >> out, D print >> out, "neighbor joining tree constructed from D = squared Euclidean distances (normal):" print >> out, tree_string print >> out, "distance matrix implied by this tree:" print >> out, D_reconstructed return out.getvalue().strip()
def do_distance_analysis(X): # get the matrix of squared distances labels = list('0123') # reconstruct the matrix of Euclidean distances from a tree D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X]) sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels) sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree) sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree) D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels)) # reconstruct the matrix of squared Euclidean distances from a tree D = D_sqrt**2 tree = NeighborJoining.make_tree(D, labels) tree_string = NewickIO.get_newick_string(tree) feltree = NewickIO.parse(tree_string, FelTree.NewickTree) D_reconstructed = np.array(feltree.get_distance_matrix(labels)) # start writing out = StringIO() # matrix of Euclidean distances and its reconstruction from a tree print >> out, 'matrix of Euclidean distances between tetrahedron vertices:' print >> out, D_sqrt print >> out, 'neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):' print >> out, sqrt_tree_string print >> out, 'distance matrix implied by this tree:' print >> out, D_sqrt_reconstructed # matrix of squared Euclidean distances and its reconstruction from a tree print >> out, 'matrix of squared distances between tetrahedron vertices:' print >> out, D print >> out, 'neighbor joining tree constructed from D = squared Euclidean distances (normal):' print >> out, tree_string print >> out, 'distance matrix implied by this tree:' print >> out, D_reconstructed return out.getvalue().strip()
def _create_trees(self): """ Create the full tree and the pruned tree. The full tree is a Newick.NewickTree, and the pruned tree is a FelTree.NewickTree object. """ # create the full tree self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree) # create the pruned tree through a temporary tree that will be modified temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree) remove_redundant_nodes(temp_tree) pruned_newick_string = NewickIO.get_newick_string(temp_tree) self.pruned_tree = NewickIO.parse(pruned_newick_string, FelTree.NewickTree)
def test_get_weighted_split_distance(self): """ Test the function that gets the number of missing nontrivial partitions. """ # define some trees tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);' tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);' tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree) tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree) # the distance from a tree to itself should be zero self.assertEqual(get_weighted_split_distance(tree_a, tree_a), 0) self.assertEqual(get_weighted_split_distance(tree_b, tree_b), 0) # the distance is not necessarily symmetric self.assertEqual(get_weighted_split_distance(tree_a, tree_b), 20) self.assertEqual(get_weighted_split_distance(tree_b, tree_a), 15)
def hard_coded_analysis_a(): tree_string = '(a:1, (b:2, d:5):1, c:4);' tree = NewickIO.parse(tree_string, FelTree.NewickTree) states = [] id_list = [] for state, id_ in sorted((node.name, id(node)) for node in tree.gen_tips()): id_list.append(id_) states.append(state) for node in tree.gen_internal_nodes(): id_list.append(id(node)) states.append('') n = len(states) for method in ('tips', 'full'): # get the distance matrix from the tree if method == 'tips': print 'leaves only:' distance_matrix = tree.get_distance_matrix(states) else: print 'leaves and internal nodes:' distance_matrix = tree.get_full_distance_matrix(id_list) print 'distance matrix from the tree:' print MatrixUtil.m_to_string(distance_matrix) # get the equivalent euclidean points z_points = list(gen_euclidean_points(distance_matrix)) for state, point in zip(states, z_points): print state, point # get the distance matrix from the transformed points print 'distance matrix from the transformed points:' distance_matrix = get_euclidean_distance_matrix(z_points) print MatrixUtil.m_to_string(distance_matrix) print
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) states = list(sorted(node.name for node in tree.gen_tips())) n = len(states) # start to prepare the reponse out = StringIO() # get the distance matrix distance_matrix = tree.get_distance_matrix(states) # get the equivalent euclidean points z_points = list(gen_euclidean_points(distance_matrix)) # get the centroid centroid = [sum(values)/n for values in zip(*z_points)] # get the resistance distances between the centroid and each point #volume = -sum(L[i][j] for i in range(n) for j in range(n) if i != j) #volume *= (4.0 / 4.3185840708) #volume = 1 """ print >> out, 'distances to the first point:' for z in z_points: print >> out, sum((a-b)**2 for a, b in zip(z, z_points[0])) print >> out, 'distances to the centroid:' for z in z_points: print >> out, sum((a-b)**2 for a, b in zip(z, centroid)) """ print >> out, 'distances to the virtual center of the tree:' origin = [0 for i in range(n)] for z in z_points: print >> out, sum((a-b)**2 for a, b in zip(z, origin)) # return the response return out.getvalue()
def hard_coded_analysis_b(): """ Numerically search for the power 2 steiner points. """ # make a distance matrix where the order is alphabetical with the states tree_string = '(a:1, (b:2, d:5):1, c:4);' tree = NewickIO.parse(tree_string, FelTree.NewickTree) states = list(sorted(node.name for node in tree.gen_tips())) distance_matrix = tree.get_distance_matrix(states) # get the pseudo inverse laplacian matrix L_pinv = get_laplacian_pseudo_inverse(distance_matrix) # get the eigendecomposition of the pseudo inverse laplacian matrix eigenvalues, eigenvectors = get_eigendecomposition(L_pinv) print 'eigenvalues of the pseudo inverse of the laplacian:' print eigenvalues # each taxon gets a transformed point z_points = list(gen_euclidean_points_from_eigendecomposition( eigenvalues, eigenvectors)) # initialize the objective function objective = MyObjective(z_points) # initialize a couple of steiner points n = len(states) va = [random.random() for i in range(n)] vb = [random.random() for i in range(n)] # define the initial guess x0 = va + vb # do the optimization result = optimize.fmin(objective, x0) print result print objective.best
def process(tree_string): """ @param tree_string: a newick string @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # build the newick tree from the string tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered names and ids ordered_ids, ordered_names = get_ordered_ids_and_names(tree) # get the distance matrix with ordered indices including all nodes in the tree nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # define mass vectors m_uniform_unscaled = [1] * nvertices m_degenerate_unscaled = [1] * nleaves + [0] * (nvertices - nleaves) m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled) m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled) # show some of the distance matrices print >> out, 'ordered names:' print >> out, ordered_names print >> out print >> out, 'embedded points with mass uniformly distributed among all vertices:' print >> out, Euclid.edm_to_weighted_points(D, m_uniform) print >> out print >> out, 'embedded points with mass uniformly distributed among the leaves:' print >> out, Euclid.edm_to_weighted_points(D, m_degenerate) print >> out # return the response return out.getvalue().strip()
def test_felsenstein(self): tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree) ordered_names = ('a', 'b', 'c', 'd', 'e') C_expected = np.dot(g_contrast_matrix, np.diag(1/np.sqrt(g_contrast_variances))) contrasts, variances = get_contrasts_and_variances(tree, ordered_names) C_observed = np.dot(np.array(contrasts).T, np.diag(1/np.sqrt(np.array(variances)))) """ print print 'felsenstein variances:' print g_contrast_variances print 'observed variances:' print variances print print 'felsenstein contrast matrix:' print C_expected print 'observed contrast matrix:' print C_observed L_expected = np.dot(C_expected, C_expected.T) L_observed = np.dot(C_observed, C_observed.T) print 'felsenstein L matrix:' print L_expected print 'observed L matrix:' print L_observed D = np.array(tree.get_distance_matrix(ordered_names)) L = Euclid.edm_to_laplacian(D) print 'L matrix derived from the D matrix:' print L """ pass
def get_form(): """ @return: the body of a form """ # define the default tree string tree = NewickIO.parse(g_default_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) # define the form objects form_objects = [ Form.MultiLine('tree', 'newick tree with branch lengths', formatted_tree_string), Form.SingleLine('lhs_a', 'the first taxon on one side of the split', 'a'), Form.SingleLine('lhs_b', 'the second taxon on one side of the split', 'b'), Form.SingleLine('rhs_a', 'the first taxon on the other side of the split', 'x'), Form.SingleLine('rhs_b', 'the second taxon on the other side of the split', 'y'), Form.CheckGroup('options', 'output options', [ Form.CheckItem('show_response', 'show the Laplacian response matrix'), Form.CheckItem('show_reduced_response', 'show the 2x2 submatrix'), Form.CheckItem('show_blen', 'show the branch length implied by the split') ]) ] return form_objects
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # read the ordered labels ordered_labels = Util.get_stripped_lines(StringIO(fs.labels)) # validate the input observed_label_set = set(node.get_name() for node in tree.gen_tips()) if set(ordered_labels) != observed_label_set: msg = 'the labels should match the labels of the leaves of the tree' raise HandlingError(msg) # get the matrix of pairwise distances among the tips D = np.array(tree.get_distance_matrix(ordered_labels)) L = Euclid.edm_to_laplacian(D) w, v = get_eigendecomposition(L) C = get_contrast_matrix(w, v) # set elements with small absolute value to zero C[abs(C) < fs.epsilon] = 0 # start to prepare the reponse out = StringIO() if fs.plain_format: print >> out, MatrixUtil.m_to_string(C) elif fs.matlab_format: print >> out, MatrixUtil.m_to_matlab_string(C) elif fs.r_format: print >> out, MatrixUtil.m_to_R_string(C) # write the response return out.getvalue()
def main(): # use the default sequence length sequence_length = 100 # use the default tree tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);' tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get arbitrarily ordered leaf names ordered_names = list(node.name for node in tree.gen_tips()) # create the sampler sampler = DMSampler.InfiniteAllelesSampler(tree, ordered_names, sequence_length) sampler.set_inf_replacement(20) sampler.set_zero_replacement(0.0) # do some sampling, saving a summary but discarding the samples allocated_seconds = 2 start_time = time.clock() run_seconds = 0 for result in sampler.gen_samples_or_none(): run_seconds = time.clock() - start_time if run_seconds > allocated_seconds: break # define the response print 'these are the results for a', run_seconds, 'second run:' print sampler.proposed, 'samples were proposed' print sampler.accepted, 'samples were accepted' msg = 'proposals had a distance estimate of zero' print sampler.proposals_with_zero, msg msg = 'proposals had a distance estimate of infinity' print sampler.proposals_with_inf, msg
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # get the selected names selection = Util.get_stripped_lines(fs.selection.splitlines()) selected_name_set = set(selection) possible_name_set = set(node.get_name() for node in tree.gen_tips()) extra_names = selected_name_set - possible_name_set if extra_names: msg_a = 'the following selected names ' msg_b = 'are not valid tips: %s' % str(tuple(extra_names)) raise HandlingError(msg_a + msg_b) complement_name_set = possible_name_set - selected_name_set # assert that neither the selected name set nor its complement is empty if not selected_name_set or not complement_name_set: raise HandlingError('the selection is degenerate') # define an ordering on the tips ordered_names = [node.get_name() for node in tree.gen_tips()] # convert the selected names to a Y vector Y_as_list = [] for name in ordered_names: if name in selected_name_set: value = 1 else: value = -1 Y_as_list.append(value) Y = np.array(Y_as_list) # get the distance matrix D = tree.get_distance_matrix(ordered_names) # get the R matrix R = Clustering.get_R_balaji(D) value = np.dot(np.dot(Y, R), Y.T) # return the taxon split evaluation return str(value) + '\n'
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) # get information about the tree topology internal = [id(node) for node in tree.gen_internal_nodes()] tips = [id(node) for node in tree.gen_tips()] vertices = internal + tips ntips = len(tips) ninternal = len(internal) nvertices = len(vertices) # get the ordered ids with the leaves first ordered_ids = vertices # get the full distance matrix D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # compute the two matrices to be compared p = ninternal q = ntips N = fs.N aug_a = get_aug_a(D, p, q, N) aug_b = get_aug_b(D, p, q, N) # show the output out = StringIO() print >> out, "-(1/2)MEDE'M':" print >> out, aug_a print >> out print >> out, "-(1/2)HMDM'H:" print >> out, aug_b print >> out print >> out, 'allclose:', np.allclose(aug_a, aug_b) return out.getvalue()
def test_get_weighted_split_count(self): """ Test the function that gets the weighted number of nontrivial splits """ # define some trees tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);' tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);' tree_string_c = '(((A:1, B:1):1, C:1):1, (D:1, (E:1, F:1):1):1);' tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree) tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree) tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree) # the weighted split counts are different, # even though both trees have internal nodes of order 3 and have the same number of leaves self.assertEqual(get_weighted_split_count(tree_a), 45) self.assertEqual(get_weighted_split_count(tree_b), 50) self.assertEqual(get_weighted_split_count(tree_c), 50)
def get_response_content(fs): # define the requested physical size of the images (in pixels) physical_size = (640, 480) # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) # Get ordered ids with the leaves first, # and get the corresponding distance matrix. ordered_ids = get_ordered_ids(tree) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # get the image extension ext = Form.g_imageformat_to_ext[fs.imageformat] # get the scaling factors and offsets if fs.hticks < 2: msg = 'expected at least two ticks on the horizontal axis' raise HandlingError(msg) width, height = physical_size xoffset = fs.border yoffset = fs.border yscale = float(height - 2 * fs.border) xscale = (width - 2 * fs.border) / float(fs.hticks - 1) # define the eigendecomposition function if fs.slow: fn = get_augmented_spectrum elif fs.fast: fn = get_augmented_spectrum_fast # define the target eigenvalues tip_ids = [id(node) for node in tree.gen_tips()] D_tips = np.array(tree.get_partial_distance_matrix(tip_ids)) G_tips = Euclid.edm_to_dccov(D_tips) target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom # draw the image return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset, D, nleaves, fs.hticks, fs.denom, fn, target_ws)
def test_get_split_branch(self): # set up the tree tree_string = '((a:1, b:2):3, c:4, d:5);' tree = NewickIO.parse(tree_string, NewickTree) # look for the branch that separates tips named 'a' and 'b' from the rest of the tree tip_selection = [ tip for tip in tree.gen_tips() if tip.get_name() in ('a', 'b') ] node, directed_branch = tree.get_split_branch(tip_selection) self.assertEqual( directed_branch.get_undirected_branch().get_branch_length(), 3) # look for the branch that separates tips named 'a' and 'c' from the rest of the tree tip_selection = [ tip for tip in tree.gen_tips() if tip.get_name() in ('a', 'c') ] result = tree.get_split_branch(tip_selection) self.assertEqual(result, None) # look for the branch that separates all tips from the rest of the tree tip_selection = list(tree.gen_tips()) result = tree.get_split_branch(tip_selection) self.assertEqual(result, None) # look for the branch that separates no tips from the rest of the tree tip_selection = [] result = tree.get_split_branch(tip_selection) self.assertEqual(result, None) # look for the branch that separates the single tip named 'd' from the rest of the tree tip_selection = [ tip for tip in tree.gen_tips() if tip.get_name() == 'd' ] node, directed_branch = tree.get_split_branch(tip_selection) self.assertEqual( directed_branch.get_undirected_branch().get_branch_length(), 5)
def get_form(): """ @return: the body of a form """ # define the default tree string tree = NewickIO.parse(g_default_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) # define the form objects form_objects = [ Form.MultiLine('tree', 'newick tree with branch lengths', formatted_tree_string), Form.SingleLine('lhs_a', 'the first taxon on one side of the split', 'a'), Form.SingleLine('lhs_b', 'the second taxon on one side of the split', 'b'), Form.SingleLine('rhs_a', 'the first taxon on the other side of the split', 'x'), Form.SingleLine('rhs_b', 'the second taxon on the other side of the split', 'y'), Form.CheckGroup('options', 'output options', [ Form.CheckItem('show_response', 'show the full Laplacian matrix'), Form.CheckItem('show_reduced_response', 'show the 2x2 submatrix'), Form.CheckItem('show_blen', 'show the branch length implied by the split')])] return form_objects
def get_default_original_tree(): tree = NewickIO.parse(g_default_string, FelTree.NewickTree) for node in tree.preorder(): blen = node.get_branch_length() if blen is not None: node.set_branch_length(blen * 0.5) return tree
def get_form(): """ @return: the body of a form """ # define the default tree string tree = NewickIO.parse(g_default_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) # define the form objects form_objects = [ Form.MultiLine('tree', 'newick tree', formatted_tree_string), Form.RadioGroup('matrix', 'nodes used for the distance matrix', [ RadioItem('standard', 'tips only', True), RadioItem('augmented', 'all nodes'), RadioItem('named', 'all named nodes') ]), Form.CheckGroup('output_options', 'output options', [ CheckItem('show_split', 'exact criterion partition', True), CheckItem('show_value', 'exact criterion value', True), CheckItem('show_value_minus_trace', 'exact criterion value minus trace', True), CheckItem('show_fiedler_split', 'show the spectral sign partition', True), CheckItem('show_fiedler_eigenvector', 'show the eigenvector of interest', True), CheckItem('show_labels', 'ordered labels', True), CheckItem('show_distance_matrix', 'distance matrix', True), CheckItem('show_M_matrix', 'M matrix', True) ]) ] return form_objects
def get_response_content(fs): # read the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # get ordered identifiers ordered_tip_name_id_pairs = list(sorted(set((node.get_name(), id(node)) for node in tree.gen_tips()))) ordered_tip_names, ordered_tip_ids = zip(*ordered_tip_name_id_pairs) ordered_internal_ids = [id(node) for node in tree.preorder() if not node.is_tip()] ordered_ids = list(ordered_tip_ids) + ordered_internal_ids # get the distance matrices full_D = tree.get_partial_distance_matrix(ordered_ids) partial_D = tree.get_partial_distance_matrix(ordered_tip_ids) # get the balaji matrices full_R = Clustering.get_R_balaji(full_D) partial_R = Clustering.get_R_balaji(partial_D) # Get the fiedler eigenvector and another eigenvector # for the full and the partial balaji matrices. full_va, full_vb = get_eigenvectors(full_R) partial_va, partial_vb = get_eigenvectors(partial_R) # create the response out = StringIO() print >> out, 'Fiedler vector associated with the graph' print >> out, 'for which the internal nodes are hidden:' print >> out, str(tuple(partial_va)) print >> out print >> out, 'The tip subvector of the Fiedler vector' print >> out, 'associated with the graph of the full tree:' print >> out, str(tuple(full_va[:len(ordered_tip_ids)])) # write the response return out.getvalue()
def get_form(): """ @return: a list of form objects """ tree = NewickIO.parse(g_default_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) return [Form.MultiLine('tree', 'tree', formatted_tree_string)]
def process(tree_string): """ @param tree_string: a newick string @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) out = StringIO() # build the newick tree from the string tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered names and ids ordered_ids, ordered_names = get_ordered_ids_and_names(tree) # get the distance matrix with ordered indices including all nodes in the tree nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # define mass vectors m_uniform_unscaled = [1]*nvertices m_degenerate_unscaled = [1]*nleaves + [0]*(nvertices-nleaves) m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled) m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled) # show some of the distance matrices print >> out, 'ordered names:' print >> out, ordered_names print >> out print >> out, 'embedded points with mass uniformly distributed among all vertices:' print >> out, Euclid.edm_to_weighted_points(D, m_uniform) print >> out print >> out, 'embedded points with mass uniformly distributed among the leaves:' print >> out, Euclid.edm_to_weighted_points(D, m_degenerate) print >> out # return the response return out.getvalue().strip()
def get_form(): """ @return: the body of a form """ # define the tree string tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);' tree = NewickIO.parse(tree_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) # define the object list form_objects = [ Form.MultiLine('tree', 'tree', formatted_tree_string), Form.Integer('sequence_length', 'use sequences that are this long', 100, low=1), Form.RadioGroup('assumption', 'distance matrix sampling model', [ RadioItem('infinite_alleles', 'infinite alleles', True), RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')]), Form.RadioGroup('infinity', 'matrices with infinite distances', [ RadioItem('reject_infinity', 'reject these matrices', True), RadioItem('replace_infinity', 'use 20 instead')]), Form.RadioGroup('zero', 'matrices with zero distances', [ RadioItem('reject_zero', 'reject these matrices'), RadioItem('replace_zero', 'use .00001 instead'), RadioItem('remain_zero', 'use 0 unmodified', True)]), Form.RadioGroup('criterion', 'tree reconstruction criterion', [ RadioItem('sign', 'spectral sign approximation', True), RadioItem('nj', 'neighbor joining'), RadioItem('random', 'random bipartition')])] # return the object list return form_objects
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # assert the the given labels are tips of the tree tip_name_set = set(node.get_name() for node in tree.gen_tips()) user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b]) bad_names = user_name_set - tip_name_set if bad_names: msg = 'these labels are not valid tips: %s' % ', '.join(bad_names) raise HandlingError(msg) # get the submatrix of the distance matrix ordered_names = list(sorted(node.get_name() for node in tree.gen_tips())) D = np.array(tree.get_distance_matrix(ordered_names)) # get the response matrix R = Clustering.get_R_stone(D) # get the two by two matrix name_to_index = dict((name, i) for i, name in enumerate(ordered_names)) R_reduced = np.zeros((2, 2)) la = name_to_index[fs.lhs_a] lb = name_to_index[fs.lhs_b] ra = name_to_index[fs.rhs_a] rb = name_to_index[fs.rhs_b] R_reduced[0][0] = R[la][ra] R_reduced[0][1] = R[la][rb] R_reduced[1][0] = R[lb][ra] R_reduced[1][1] = R[lb][rb] epsilon = 1e-13 criterion = np.linalg.det(R_reduced) if abs(criterion) < epsilon: criterion = 0 # in analogy to the four point condition, use two different ways of calculating the distance blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0 blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0 blen = min(blen_a, blen_b) # define the response out = StringIO() paragraphs = [] if fs.show_response: paragraph = [ 'response matrix with rows ordered alphabetically by leaf label:', MatrixUtil.m_to_string(R) ] paragraphs.append(paragraph) if fs.show_reduced_response: paragraph = [ '2x2 submatrix of the response matrix:', MatrixUtil.m_to_string(R_reduced) ] paragraphs.append(paragraph) if True: paragraph = [ 'determinant of the 2x2 submatrix of the response matrix:', str(criterion) ] paragraphs.append(paragraph) if fs.show_blen: paragraph = ['branch length defined by the split:', str(blen)] paragraphs.append(paragraph) # return the response return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
def get_form(): """ @return: the body of a form """ # define the default tree string tree = NewickIO.parse(g_default_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) # define the form objects form_objects = [ Form.MultiLine("tree", "newick tree", formatted_tree_string), Form.RadioGroup( "matrix", "nodes used for the distance matrix", [ RadioItem("standard", "tips only", True), RadioItem("augmented", "all nodes"), RadioItem("named", "all named nodes"), ], ), Form.CheckGroup( "output_options", "output options", [ CheckItem("show_split", "exact criterion partition", True), CheckItem("show_value", "exact criterion value", True), CheckItem("show_value_minus_trace", "exact criterion value minus trace", True), CheckItem("show_fiedler_split", "show the spectral sign partition", True), CheckItem("show_fiedler_eigenvector", "show the eigenvector of interest", True), CheckItem("show_labels", "ordered labels", True), CheckItem("show_distance_matrix", "distance matrix", True), CheckItem("show_M_matrix", "M matrix", True), ], ), ] return form_objects
def get_form(): """ @return: the body of a form """ # define the default tree string tree = NewickIO.parse(g_default_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) # define the form objects form_objects = [ Form.MultiLine('tree', 'newick tree', formatted_tree_string), Form.Integer('length', 'use sequences that are this long', 100, low=1), Form.RadioGroup('assumption', 'distance matrix sampling model', [ Form.RadioItem('infinite_alleles', 'infinite alleles', True), Form.RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')]), Form.RadioGroup('infinity', 'infinite distance estimates', [ Form.RadioItem('reject_infinity', 'reject these matrices'), Form.RadioItem('replace_infinity', 'replace inf with 20', True)]), Form.RadioGroup('zero', 'distance estimates of zero', [ Form.RadioItem('reject_zero', 'reject these matrices'), Form.RadioItem('replace_zero', 'use .00001 instead of zero'), Form.RadioItem('remain_zero', 'use 0 unmodified', True)])] return form_objects
def get_form(): """ @return: the body of a form """ # define the default tree string and ordered tip labels tree_string = "(a:1, (b:2, d:5):1, c:4);" tree = NewickIO.parse(tree_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) labels = list(sorted(tip.name for tip in tree.gen_tips())) # define the form objects form_objects = [ Form.MultiLine("tree", "newick tree", formatted_tree_string), Form.MultiLine("inlabels", "ordered labels", "\n".join(labels)), Form.Float("strength", "perturbation strength", 0.1, low_inclusive=0), Form.CheckGroup( "options", "output options", [ CheckItem("perturbed", "a perturbed distance matrix", True), CheckItem("distance", "the original distance matrix"), CheckItem("outlabels", "ordered labels"), ], ), ] return form_objects
def get_form(): """ @return: the body of a form """ # define the default tree string tree = NewickIO.parse(g_default_string, FelTree.NewickTree) formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) # define the form objects form_objects = [ Form.MultiLine('tree', 'newick tree', formatted_tree_string), Form.Integer('length', 'use sequences that are this long', 100, low=1), Form.RadioGroup('assumption', 'distance matrix sampling model', [ Form.RadioItem('infinite_alleles', 'infinite alleles', True), Form.RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)') ]), Form.RadioGroup('infinity', 'infinite distance estimates', [ Form.RadioItem('reject_infinity', 'reject these matrices'), Form.RadioItem('replace_infinity', 'replace inf with 20', True) ]), Form.RadioGroup('zero', 'distance estimates of zero', [ Form.RadioItem('reject_zero', 'reject these matrices'), Form.RadioItem('replace_zero', 'use .00001 instead of zero'), Form.RadioItem('remain_zero', 'use 0 unmodified', True) ]) ] return form_objects
def test_felsenstein(self): tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree) ordered_names = ('a', 'b', 'c', 'd', 'e') C_expected = np.dot(g_contrast_matrix, np.diag(1 / np.sqrt(g_contrast_variances))) contrasts, variances = get_contrasts_and_variances(tree, ordered_names) C_observed = np.dot( np.array(contrasts).T, np.diag(1 / np.sqrt(np.array(variances)))) """ print print 'felsenstein variances:' print g_contrast_variances print 'observed variances:' print variances print print 'felsenstein contrast matrix:' print C_expected print 'observed contrast matrix:' print C_observed L_expected = np.dot(C_expected, C_expected.T) L_observed = np.dot(C_observed, C_observed.T) print 'felsenstein L matrix:' print L_expected print 'observed L matrix:' print L_observed D = np.array(tree.get_distance_matrix(ordered_names)) L = Euclid.edm_to_laplacian(D) print 'L matrix derived from the D matrix:' print L """ pass
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) ordered_names = list(sorted(node.name for node in tree.gen_tips())) n = len(ordered_names) if n < 2: raise HandlingError('the newick tree should have at least two leaves') # get the eigendecomposition D = np.array(tree.get_distance_matrix(ordered_names)) G = (-0.5) * MatrixUtil.double_centered(D) eigenvalues, eigenvector_transposes = np.linalg.eigh(G) eigenvectors = eigenvector_transposes.T sorted_eigensystem = list(reversed(list(sorted((w, v) for w, v in zip(eigenvalues, eigenvectors))))) sorted_eigenvalues, sorted_eigenvectors = zip(*sorted_eigensystem) M = zip(*sorted_eigenvectors) # write the html out = StringIO() print >> out, '<html>' print >> out, '<body>' print >> out, HtmlTable.get_labeled_table_string( sorted_eigenvalues, ordered_names, M) print >> out, '</body>' print >> out, '</html>' # write the response return out.getvalue()
def get_response_content(fs): # arbitrarily define the size of the alphabet k = 4 # define the response out = StringIO() # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # define the order of the tip names ordered_tip_names = list( sorted(node.get_name() for node in tree.gen_tips())) n = len(ordered_tip_names) # get the matrix of pairwise distances among the tips D = np.array(tree.get_distance_matrix(ordered_tip_names)) D_vector = get_principal_coordinate(D) # get the dissimilarity matrix from the distance matrix dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D]) dissimilarity_vector = get_principal_coordinate(dissimilarity) # get the principal coordinates of the distance-like matrices print >> out, 'original distance matrix:' print >> out, MatrixUtil.m_to_string(D) print >> out print >> out, 'projections onto the principal coordinate using the original distance matrix:' for name, value in zip(ordered_tip_names, D_vector): print >> out, '\t'.join((name, str(value))) print >> out print >> out, 'dissimilarity matrix:' print >> out, MatrixUtil.m_to_string(dissimilarity) print >> out print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:' for name, value in zip(ordered_tip_names, dissimilarity_vector): print >> out, '\t'.join((name, str(value))) print >> out # return the response return out.getvalue()
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(StringIO(fs.trees)): # parse each tree # and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: msg = 'expected at least 4 tips but found ' + str(len(tip_names)) raise HandlingError(msg) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # get the threshold for negligibility of an eigenvector loading epsilon = fs.epsilon if not (0 <= epsilon < 1): raise HandlingError('invalid threshold for negligibility') # get the set of selected options selected_options = fs.options # analyze each tree results = [] for tree in trees: results.append(AnalysisResult(tree, epsilon)) # create the response out = StringIO() for result in results: for line in result.get_response_lines(selected_options): print >> out, line print >> out # return the response return out.getvalue()
def get_response_content(fs): # arbitrarily define the size of the alphabet k = 4 # define the response out = StringIO() # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # define the order of the tip names ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips())) n = len(ordered_tip_names) # get the matrix of pairwise distances among the tips D = np.array(tree.get_distance_matrix(ordered_tip_names)) D_vector = get_principal_coordinate(D) # get the dissimilarity matrix from the distance matrix dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D]) dissimilarity_vector = get_principal_coordinate(dissimilarity) # get the principal coordinates of the distance-like matrices print >> out, 'original distance matrix:' print >> out, MatrixUtil.m_to_string(D) print >> out print >> out, 'projections onto the principal coordinate using the original distance matrix:' for name, value in zip(ordered_tip_names, D_vector): print >> out, '\t'.join((name, str(value))) print >> out print >> out, 'dissimilarity matrix:' print >> out, MatrixUtil.m_to_string(dissimilarity) print >> out print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:' for name, value in zip(ordered_tip_names, dissimilarity_vector): print >> out, '\t'.join((name, str(value))) print >> out # return the response return out.getvalue()
def main(): # use the default sequence length sequence_length = 100 # use the default tree tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);' tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get arbitrarily ordered leaf names ordered_names = list(node.name for node in tree.gen_tips()) # create the sampler sampler = DMSampler.InfiniteAllelesSampler( tree, ordered_names, sequence_length) sampler.set_inf_replacement(20) sampler.set_zero_replacement(0.0) # do some sampling, saving a summary but discarding the samples allocated_seconds = 2 start_time = time.clock() run_seconds = 0 for result in sampler.gen_samples_or_none(): run_seconds = time.clock() - start_time if run_seconds > allocated_seconds: break # define the response print 'these are the results for a', run_seconds, 'second run:' print sampler.proposed, 'samples were proposed' print sampler.accepted, 'samples were accepted' msg = 'proposals had a distance estimate of zero' print sampler.proposals_with_zero, msg msg = 'proposals had a distance estimate of infinity' print sampler.proposals_with_inf, msg
def get_response_content(fs): # read the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # get ordered identifiers ordered_tip_name_id_pairs = list( sorted(set((node.get_name(), id(node)) for node in tree.gen_tips()))) ordered_tip_names, ordered_tip_ids = zip(*ordered_tip_name_id_pairs) ordered_internal_ids = [ id(node) for node in tree.preorder() if not node.is_tip() ] ordered_ids = list(ordered_tip_ids) + ordered_internal_ids # get the distance matrices full_D = tree.get_partial_distance_matrix(ordered_ids) partial_D = tree.get_partial_distance_matrix(ordered_tip_ids) # get the balaji matrices full_R = Clustering.get_R_balaji(full_D) partial_R = Clustering.get_R_balaji(partial_D) # Get the fiedler eigenvector and another eigenvector # for the full and the partial balaji matrices. full_va, full_vb = get_eigenvectors(full_R) partial_va, partial_vb = get_eigenvectors(partial_R) # create the response out = StringIO() print >> out, 'Fiedler vector associated with the graph' print >> out, 'for which the internal nodes are hidden:' print >> out, str(tuple(partial_va)) print >> out print >> out, 'The tip subvector of the Fiedler vector' print >> out, 'associated with the graph of the full tree:' print >> out, str(tuple(full_va[:len(ordered_tip_ids)])) # write the response return out.getvalue()
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) D_aug = get_augmented_distance(D, nleaves, fs.ndups) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) # get the eigendecomposition of the centered augmented distance matrix X_aug = Euclid.edm_to_points(D_aug, nvertices - 1) # explicitly compute the points for the given number of dups using weights m = [1] * ninternal + [1 + fs.ndups] * nleaves m = np.array(m, dtype=float) / sum(m) X_weighted = Euclid.edm_to_weighted_points(D, m) # explicitly compute the points for 10x dups m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves m = np.array(m, dtype=float) / sum(m) X_weighted_10x = Euclid.edm_to_weighted_points(D, m) # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, 'leaf distance matrix:' print >> out, D_leaf print >> out print >> out, 'points derived from the leaf distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, X_leaf print >> out if fs.show_aug: print >> out, 'augmented distance matrix:' print >> out, D_aug print >> out print >> out, 'points derived from the augmented distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, get_ugly_matrix(X_aug, ninternal, nleaves) print >> out print >> out, 'points computed using masses:' print >> out, X_weighted print >> out print >> out, 'points computed using masses with 10x dups:' print >> out, X_weighted_10x print >> out print >> out, 'limiting points:' print >> out, Z print >> out return out.getvalue()
def get_response_content(fs): # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # assert the the given labels are tips of the tree tip_name_set = set(node.get_name() for node in tree.gen_tips()) user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b]) bad_names = user_name_set - tip_name_set if bad_names: msg = 'these labels are not valid tips: %s' % ', '.join(bad_names) raise HandlingError(msg) # get the submatrix of the distance matrix ordered_names = list(sorted(node.get_name() for node in tree.gen_tips())) D = np.array(tree.get_distance_matrix(ordered_names)) # get the response matrix R = Clustering.get_R_stone(D) # get the two by two matrix name_to_index = dict((name, i) for i, name in enumerate(ordered_names)) R_reduced = np.zeros((2,2)) la = name_to_index[fs.lhs_a] lb = name_to_index[fs.lhs_b] ra = name_to_index[fs.rhs_a] rb = name_to_index[fs.rhs_b] R_reduced[0][0] = R[la][ra] R_reduced[0][1] = R[la][rb] R_reduced[1][0] = R[lb][ra] R_reduced[1][1] = R[lb][rb] epsilon = 1e-13 criterion = np.linalg.det(R_reduced) if abs(criterion) < epsilon: criterion = 0 # in analogy to the four point condition, use two different ways of calculating the distance blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0 blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0 blen = min(blen_a, blen_b) # define the response out = StringIO() paragraphs = [] if fs.show_response: paragraph = [ 'response matrix with rows ordered alphabetically by leaf label:', MatrixUtil.m_to_string(R)] paragraphs.append(paragraph) if fs.show_reduced_response: paragraph = [ '2x2 submatrix of the response matrix:', MatrixUtil.m_to_string(R_reduced)] paragraphs.append(paragraph) if True: paragraph = [ 'determinant of the 2x2 submatrix of the response matrix:', str(criterion)] paragraphs.append(paragraph) if fs.show_blen: paragraph = [ 'branch length defined by the split:', str(blen)] paragraphs.append(paragraph) # return the response return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
def get_response_content(fs): # build the newick tree from the string tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree) nvertices = len(list(tree.preorder())) nleaves = len(list(tree.gen_tips())) ninternal = nvertices - nleaves # get ordered ids with the internal nodes first ordered_ids = get_ordered_ids(tree) leaf_ids = [id(node) for node in tree.gen_tips()] # get the distance matrix and the augmented distance matrix D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids)) D = np.array(tree.get_partial_distance_matrix(ordered_ids)) D_aug = get_augmented_distance(D, nleaves, fs.ndups) # analyze the leaf distance matrix X_leaf = Euclid.edm_to_points(D_leaf) # get the eigendecomposition of the centered augmented distance matrix X_aug = Euclid.edm_to_points(D_aug, nvertices-1) # explicitly compute the points for the given number of dups using weights m = [1]*ninternal + [1+fs.ndups]*nleaves m = np.array(m, dtype=float) / sum(m) X_weighted = Euclid.edm_to_weighted_points(D, m) # explicitly compute the points for 10x dups m = [1]*ninternal + [1+fs.ndups*10]*nleaves m = np.array(m, dtype=float) / sum(m) X_weighted_10x = Euclid.edm_to_weighted_points(D, m) # explicitly compute the limiting points as the number of dups increases X = Euclid.edm_to_points(D) X -= np.mean(X[-nleaves:], axis=0) XL = X[-nleaves:] U, s, Vt = np.linalg.svd(XL) Z = np.dot(X, Vt.T) # report the results np.set_printoptions(linewidth=300, threshold=10000) out = StringIO() print >> out, 'leaf distance matrix:' print >> out, D_leaf print >> out print >> out, 'points derived from the leaf distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, X_leaf print >> out if fs.show_aug: print >> out, 'augmented distance matrix:' print >> out, D_aug print >> out print >> out, 'points derived from the augmented distance matrix' print >> out, '(the first column is proportional to the Fiedler vector):' print >> out, get_ugly_matrix(X_aug, ninternal, nleaves) print >> out print >> out, 'points computed using masses:' print >> out, X_weighted print >> out print >> out, 'points computed using masses with 10x dups:' print >> out, X_weighted_10x print >> out print >> out, 'limiting points:' print >> out, Z print >> out return out.getvalue()
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ # read the criterion string, creating the splitter object if fs.exact: splitter = Clustering.StoneExactDMS() elif fs.sign: splitter = Clustering.StoneSpectralSignDMS() elif fs.nj: splitter = Clustering.NeighborJoiningDMS() elif fs.random: splitter = Clustering.RandomDMS() # read the original tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # define the maximum number of steps we want max_steps = 1000000 # Make sure that the splitter object is appropriate # for the number of taxa and the number of tree reconstructions. ntaxa = len(list(tree.gen_tips())) if splitter.get_complexity(ntaxa) * fs.iterations > max_steps: msg_a = "use a faster bipartition function, " msg_b = "fewer taxa, or fewer tree reconstructions" raise HandlingError(msg_a + msg_b) # define the simulation parameters sim = Simulation(splitter, "nj", "cgi tree building simulation") sim.set_original_tree(tree) sim.set_step_limit(max_steps) # define an arbitrary but consistent ordering of the taxa ordered_names = [node.name for node in tree.gen_tips()] # attempt to simulate a bunch of distance matrices sampler = DMSampler.DMSampler(tree, ordered_names, fs.length) distance_matrices = [] for result in sampler.gen_samples_or_none(): # if a proposal was accepted then add it to the list if result: sequence_list, distance_matrix = result distance_matrices.append(distance_matrix) # if enough accepted samples have been generated then stop sampling remaining_acceptances = fs.iterations - len(distance_matrices) if not remaining_acceptances: break # If the remaining number of computrons is predicted # to be too much then stop. if sampler.get_remaining_computrons(remaining_acceptances) > max_steps: msg_a = "this combination of parameters " msg_b = "is predicted to take too long" raise HandlingError(msg) sim.run(distance_matrices, ordered_names) # define the response out = StringIO() print >> out, "partition error count frequencies:" print >> out, sim.get_histogram_string() print >> out, "" print >> out, "weighted partition errors:", sim.get_deep_loss() # return the response return out.getvalue()
def get_response_content(fs): # get the set of names selection = Util.get_stripped_lines(StringIO(fs.names)) # get the tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # assert that the name selection is compatible with the tree selected_name_set = set(selection) possible_name_set = set(node.get_name() for node in tree.gen_tips()) extra_names = selected_name_set - possible_name_set if extra_names: msg_a = "the following selected names " msg_b = "are not valid tips: %s" % str(tuple(extra_names)) raise HandlingError(msg_a + msg_b) # get the pruned tree simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree) pruned_tree = get_pruned_tree(simple_tree, selected_name_set) # begin writing the result out = StringIO() trees = (tree, pruned_tree) tree_names = ("the original tree", "the pruned tree") for tree, tree_name in zip(trees, tree_names): print >> out, "calculating splits of %s:" % tree_name print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art) # return the response return out.getvalue()
def get_response_content(fs): # read the tree tree = NewickIO.parse(fs.tree, Newick.NewickTree) # begin the response out = StringIO() # remove the branch length associated with the root if tree.get_root().blen is not None: print >> out, 'the root originally had a branch length of', tree.get_root().blen tree.get_root().blen = None else: print >> out, 'the root did not originally have a branch length' # force a trifurcation at the root if tree.get_root().get_child_count() < 3: print >> out, 'the original root had', tree.get_root().get_child_count(), 'children' max_children, best_child = max((child.get_child_count(), child) for child in tree.get_root().gen_children()) old_root = tree.get_root() tree.reroot(best_child) tree.remove_node(old_root) print >> out, 'the new root has', tree.get_root().get_child_count(), 'children' else: print >> out, 'the root has', tree.get_root().get_child_count(), 'children' # remove names of internal nodes nremoved_names = 0 for node in tree.preorder(): if node.has_children() and node.name is not None: node.name = None nremoved_names += 1 print >> out, 'removed', nremoved_names, 'internal node names' # draw the new formatted newick string after a break print >> out formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 120) print >> out, formatted_tree_string # return the response return out.getvalue()