Exemple #1
0
def get_response_content(fs):
    # get the set of names
    selection = Util.get_stripped_lines(StringIO(fs.names))
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert that the name selection is compatible with the tree
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    # get the pruned tree
    simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    pruned_tree = get_pruned_tree(simple_tree, selected_name_set)
    # begin writing the result
    out = StringIO()
    trees = (tree, pruned_tree)
    tree_names = ('the original tree', 'the pruned tree')
    for tree, tree_name in zip(trees, tree_names):
        print >> out, 'calculating splits of %s:' % tree_name
        print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art)
    # return the response
    return out.getvalue()
Exemple #2
0
 def test_get_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, C:1, (D:1, E:1):1);'
     tree_string_b = '((A:1, B:1):1, D:1, (C:1, E:1):1);'
     tree_string_c = '((A:1, D:1):1, C:1, (B:1, E:1):1);'
     tree_string_d = '((A:1, D:1):1, (C:1, B:1, E:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     tree_d = NewickIO.parse(tree_string_d, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_split_distance(tree_b, tree_b), 0)
     self.assertEqual(get_split_distance(tree_c, tree_c), 0)
     self.assertEqual(get_split_distance(tree_d, tree_d), 0)
     # some of the distances are symmetric
     self.assertEqual(get_split_distance(tree_a, tree_b), 1)
     self.assertEqual(get_split_distance(tree_b, tree_a), 1)
     self.assertEqual(get_split_distance(tree_b, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_b), 2)
     self.assertEqual(get_split_distance(tree_a, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_a), 2)
     # it is possible for the distance to be asymmetric if internal nodes are not order 3
     self.assertEqual(get_split_distance(tree_a, tree_d), 1)
     self.assertEqual(get_split_distance(tree_d, tree_a), 2)
Exemple #3
0
def get_response_content(fs):
    # read the query tree
    query_tree = NewickIO.parse(fs.query, FelTree.NewickTree)
    # read the reference tree
    reference_tree = NewickIO.parse(fs.reference, FelTree.NewickTree)
    # calculate the loss using the requested loss function
    if fs.uniform:
        loss_numerator = TreeComparison.get_split_distance(
            query_tree, reference_tree)
    elif fs.weighted:
        loss_numerator = TreeComparison.get_weighted_split_distance(
            query_tree, reference_tree)
    # do the normalization if requested
    if fs.normalize:
        if fs.uniform:
            loss_denominator = float(
                TreeComparison.get_nontrivial_split_count(reference_tree))
        elif fs.weighted:
            loss_denominator = float(
                TreeComparison.get_weighted_split_count(reference_tree))
    else:
        loss_denominator = 1
    # return the response
    if loss_denominator:
        return str(loss_numerator / loss_denominator) + '\n'
    else:
        return 'normalization failed\n'
Exemple #4
0
 def test_update_generalized_nj_big(self):
     """
     Test the generation of successor distance matrices from a more complicated initial distance matrix.
     """
     # define the initial tree and the two subtrees
     s_tree_initial = '(((3:9, 2:2):4, 1:2):1, (4:1, 5:3):7, 6:2);'
     s_tree_a = '((3:9, 2:2):4, 1:2, B:0.5);'
     s_tree_b = '((4:1, 5:3):7, 6:2, A:0.5);'
     # Define an ordering of the taxa.
     # The initial ordering is arbitrary,
     # and the subsequent orderings are dependent on the initial ordering.
     taxa_initial = ['1', '4', '2', '5', '3', '6']
     taxa_a = ['1', 'B', '2', '3']
     taxa_b = ['A', '4', '5', '6']
     # Define the distance matrices.
     D_initial = np.array(
         NewickIO.parse(
             s_tree_initial,
             FelTree.NewickTree).get_distance_matrix(taxa_initial))
     D_a = np.array(
         NewickIO.parse(s_tree_a,
                        FelTree.NewickTree).get_distance_matrix(taxa_a))
     D_b = np.array(
         NewickIO.parse(s_tree_b,
                        FelTree.NewickTree).get_distance_matrix(taxa_b))
     # assert that the correct distance matrices are created
     D_out_a = update_generalized_nj(D_initial, set([1, 3, 5]))
     D_out_b = update_generalized_nj(D_initial, set([0, 2, 4]))
     self.assertTrue(np.allclose(D_a, D_out_a))
     self.assertTrue(np.allclose(D_b, D_out_b))
Exemple #5
0
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list("0123")
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt ** 2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of Euclidean distances between tetrahedron vertices:"
    print >> out, D_sqrt
    print >> out, "neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):"
    print >> out, sqrt_tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of squared distances between tetrahedron vertices:"
    print >> out, D
    print >> out, "neighbor joining tree constructed from D = squared Euclidean distances (normal):"
    print >> out, tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_reconstructed
    return out.getvalue().strip()
Exemple #6
0
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list('0123')
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt**2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of Euclidean distances between tetrahedron vertices:'
    print >> out, D_sqrt
    print >> out, 'neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):'
    print >> out, sqrt_tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of squared distances between tetrahedron vertices:'
    print >> out, D
    print >> out, 'neighbor joining tree constructed from D = squared Euclidean distances (normal):'
    print >> out, tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_reconstructed
    return out.getvalue().strip()
Exemple #7
0
def get_response_content(fs):
    # read the query tree
    query_tree = NewickIO.parse(fs.query, FelTree.NewickTree)
    # read the reference tree
    reference_tree = NewickIO.parse(fs.reference, FelTree.NewickTree)
    # calculate the loss using the requested loss function
    if fs.uniform:
        loss_numerator = TreeComparison.get_split_distance(
                query_tree, reference_tree)
    elif fs.weighted:
        loss_numerator = TreeComparison.get_weighted_split_distance(
                query_tree, reference_tree)
    # do the normalization if requested
    if fs.normalize:
        if fs.uniform:
            loss_denominator = float(
                    TreeComparison.get_nontrivial_split_count(reference_tree))
        elif fs.weighted:
            loss_denominator = float(
                    TreeComparison.get_weighted_split_count(reference_tree))
    else:
        loss_denominator = 1
    # return the response
    if loss_denominator:
        return str(loss_numerator / loss_denominator) + '\n'
    else:
        return 'normalization failed\n'
Exemple #8
0
 def test_get_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, C:1, (D:1, E:1):1);'
     tree_string_b = '((A:1, B:1):1, D:1, (C:1, E:1):1);'
     tree_string_c = '((A:1, D:1):1, C:1, (B:1, E:1):1);'
     tree_string_d = '((A:1, D:1):1, (C:1, B:1, E:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     tree_d = NewickIO.parse(tree_string_d, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_split_distance(tree_b, tree_b), 0)
     self.assertEqual(get_split_distance(tree_c, tree_c), 0)
     self.assertEqual(get_split_distance(tree_d, tree_d), 0)
     # some of the distances are symmetric
     self.assertEqual(get_split_distance(tree_a, tree_b), 1)
     self.assertEqual(get_split_distance(tree_b, tree_a), 1)
     self.assertEqual(get_split_distance(tree_b, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_b), 2)
     self.assertEqual(get_split_distance(tree_a, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_a), 2)
     # it is possible for the distance to be asymmetric if internal nodes are not order 3
     self.assertEqual(get_split_distance(tree_a, tree_d), 1)
     self.assertEqual(get_split_distance(tree_d, tree_a), 2)
Exemple #9
0
 def _create_trees(self):
     """
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     """
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     remove_redundant_nodes(temp_tree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string, FelTree.NewickTree)
Exemple #10
0
 def _create_trees(self):
     """
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     """
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     remove_redundant_nodes(temp_tree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string,
                                       FelTree.NewickTree)
Exemple #11
0
 def test_get_weighted_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_weighted_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_b), 0)
     # the distance is not necessarily symmetric
     self.assertEqual(get_weighted_split_distance(tree_a, tree_b), 20)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_a), 15)
Exemple #12
0
 def test_get_weighted_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_weighted_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_b), 0)
     # the distance is not necessarily symmetric
     self.assertEqual(get_weighted_split_distance(tree_a, tree_b), 20)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_a), 15)
Exemple #13
0
def hard_coded_analysis_a():
    tree_string = '(a:1, (b:2, d:5):1, c:4);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    states = []
    id_list = []
    for state, id_ in sorted((node.name, id(node))
            for node in tree.gen_tips()):
        id_list.append(id_)
        states.append(state)
    for node in tree.gen_internal_nodes():
        id_list.append(id(node))
        states.append('')
    n = len(states)
    for method in ('tips', 'full'):
        # get the distance matrix from the tree
        if method == 'tips':
            print 'leaves only:'
            distance_matrix = tree.get_distance_matrix(states)
        else:
            print 'leaves and internal nodes:'
            distance_matrix = tree.get_full_distance_matrix(id_list)
        print 'distance matrix from the tree:'
        print MatrixUtil.m_to_string(distance_matrix)
        # get the equivalent euclidean points
        z_points = list(gen_euclidean_points(distance_matrix))
        for state, point in zip(states, z_points):
            print state, point
        # get the distance matrix from the transformed points
        print 'distance matrix from the transformed points:'
        distance_matrix = get_euclidean_distance_matrix(z_points)
        print MatrixUtil.m_to_string(distance_matrix)
        print
Exemple #14
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    states = list(sorted(node.name for node in tree.gen_tips()))
    n = len(states)
    # start to prepare the reponse
    out = StringIO()
    # get the distance matrix
    distance_matrix = tree.get_distance_matrix(states)
    # get the equivalent euclidean points
    z_points = list(gen_euclidean_points(distance_matrix))
    # get the centroid
    centroid = [sum(values)/n for values in zip(*z_points)]
    # get the resistance distances between the centroid and each point
    #volume = -sum(L[i][j] for i in range(n) for j in range(n) if i != j)
    #volume *= (4.0 / 4.3185840708)
    #volume = 1
    """
    print >> out, 'distances to the first point:'
    for z in z_points:
        print >> out, sum((a-b)**2 for a, b in zip(z, z_points[0]))
    print >> out, 'distances to the centroid:'
    for z in z_points:
        print >> out, sum((a-b)**2 for a, b in zip(z, centroid))
    """
    print >> out, 'distances to the virtual center of the tree:'
    origin = [0 for i in range(n)]
    for z in z_points:
        print >> out, sum((a-b)**2 for a, b in zip(z, origin))
    # return the response
    return out.getvalue()
Exemple #15
0
def hard_coded_analysis_b():
    """
    Numerically search for the power 2 steiner points.
    """
    # make a distance matrix where the order is alphabetical with the states
    tree_string = '(a:1, (b:2, d:5):1, c:4);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    states = list(sorted(node.name for node in tree.gen_tips()))
    distance_matrix = tree.get_distance_matrix(states)
    # get the pseudo inverse laplacian matrix
    L_pinv = get_laplacian_pseudo_inverse(distance_matrix)
    # get the eigendecomposition of the pseudo inverse laplacian matrix
    eigenvalues, eigenvectors = get_eigendecomposition(L_pinv)
    print 'eigenvalues of the pseudo inverse of the laplacian:'
    print eigenvalues
    # each taxon gets a transformed point
    z_points = list(gen_euclidean_points_from_eigendecomposition(
        eigenvalues, eigenvectors))
    # initialize the objective function
    objective = MyObjective(z_points)
    # initialize a couple of steiner points
    n = len(states)
    va = [random.random() for i in range(n)]
    vb = [random.random() for i in range(n)]
    # define the initial guess
    x0 = va + vb
    # do the optimization
    result = optimize.fmin(objective, x0)
    print result
    print objective.best
Exemple #16
0
def process(tree_string):
    """
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1] * nvertices
    m_degenerate_unscaled = [1] * nleaves + [0] * (nvertices - nleaves)
    m_uniform = np.array(m_uniform_unscaled,
                         dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled,
                            dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
Exemple #17
0
 def test_felsenstein(self):
     tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C_expected = np.dot(g_contrast_matrix, np.diag(1/np.sqrt(g_contrast_variances)))
     contrasts, variances = get_contrasts_and_variances(tree, ordered_names)
     C_observed = np.dot(np.array(contrasts).T, np.diag(1/np.sqrt(np.array(variances))))
     """
     print
     print 'felsenstein variances:'
     print g_contrast_variances
     print 'observed variances:'
     print variances
     print
     print 'felsenstein contrast matrix:'
     print C_expected
     print 'observed contrast matrix:'
     print C_observed
     L_expected = np.dot(C_expected, C_expected.T)
     L_observed = np.dot(C_observed, C_observed.T)
     print 'felsenstein L matrix:'
     print L_expected
     print 'observed L matrix:'
     print L_observed
     D = np.array(tree.get_distance_matrix(ordered_names))
     L = Euclid.edm_to_laplacian(D)
     print 'L matrix derived from the D matrix:'
     print L
     """
     pass
Exemple #18
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree with branch lengths',
                       formatted_tree_string),
        Form.SingleLine('lhs_a', 'the first taxon on one side of the split',
                        'a'),
        Form.SingleLine('lhs_b', 'the second taxon on one side of the split',
                        'b'),
        Form.SingleLine('rhs_a',
                        'the first taxon on the other side of the split', 'x'),
        Form.SingleLine('rhs_b',
                        'the second taxon on the other side of the split',
                        'y'),
        Form.CheckGroup('options', 'output options', [
            Form.CheckItem('show_response',
                           'show the Laplacian response matrix'),
            Form.CheckItem('show_reduced_response', 'show the 2x2 submatrix'),
            Form.CheckItem('show_blen',
                           'show the branch length implied by the split')
        ])
    ]
    return form_objects
Exemple #19
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
Exemple #20
0
def main():
    # use the default sequence length
    sequence_length = 100
    # use the default tree
    tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get arbitrarily ordered leaf names
    ordered_names = list(node.name for node in tree.gen_tips())
    # create the sampler
    sampler = DMSampler.InfiniteAllelesSampler(tree, ordered_names,
                                               sequence_length)
    sampler.set_inf_replacement(20)
    sampler.set_zero_replacement(0.0)
    # do some sampling, saving a summary but discarding the samples
    allocated_seconds = 2
    start_time = time.clock()
    run_seconds = 0
    for result in sampler.gen_samples_or_none():
        run_seconds = time.clock() - start_time
        if run_seconds > allocated_seconds:
            break
    # define the response
    print 'these are the results for a', run_seconds, 'second run:'
    print sampler.proposed, 'samples were proposed'
    print sampler.accepted, 'samples were accepted'
    msg = 'proposals had a distance estimate of zero'
    print sampler.proposals_with_zero, msg
    msg = 'proposals had a distance estimate of infinity'
    print sampler.proposals_with_inf, msg
Exemple #21
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get the selected names
    selection = Util.get_stripped_lines(fs.selection.splitlines())
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    complement_name_set = possible_name_set - selected_name_set
    # assert that neither the selected name set nor its complement is empty
    if not selected_name_set or not complement_name_set:
        raise HandlingError('the selection is degenerate')
    # define an ordering on the tips
    ordered_names = [node.get_name() for node in tree.gen_tips()]
    # convert the selected names to a Y vector
    Y_as_list = []
    for name in ordered_names:
        if name in selected_name_set:
            value = 1
        else:
            value = -1
        Y_as_list.append(value)
    Y = np.array(Y_as_list)
    # get the distance matrix
    D = tree.get_distance_matrix(ordered_names)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    value = np.dot(np.dot(Y, R), Y.T)
    # return the taxon split evaluation
    return str(value) + '\n'
Exemple #22
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # compute the two matrices to be compared
    p = ninternal
    q = ntips
    N = fs.N
    aug_a = get_aug_a(D, p, q, N)
    aug_b = get_aug_b(D, p, q, N)
    # show the output
    out = StringIO()
    print >> out, "-(1/2)MEDE'M':"
    print >> out, aug_a
    print >> out
    print >> out, "-(1/2)HMDM'H:"
    print >> out, aug_b
    print >> out
    print >> out, 'allclose:', np.allclose(aug_a, aug_b)
    return out.getvalue()
Exemple #23
0
 def test_get_weighted_split_count(self):
     """
     Test the function that gets the weighted number of nontrivial splits
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_string_c = '(((A:1, B:1):1, C:1):1, (D:1, (E:1, F:1):1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     # the weighted split counts are different,
     # even though both trees have internal nodes of order 3 and have the same number of leaves
     self.assertEqual(get_weighted_split_count(tree_a), 45)
     self.assertEqual(get_weighted_split_count(tree_b), 50)
     self.assertEqual(get_weighted_split_count(tree_c), 50)
Exemple #24
0
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # get the image extension
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    # get the scaling factors and offsets
    if fs.hticks < 2:
        msg = 'expected at least two ticks on the horizontal axis'
        raise HandlingError(msg)
    width, height = physical_size
    xoffset = fs.border
    yoffset = fs.border
    yscale = float(height - 2 * fs.border)
    xscale = (width - 2 * fs.border) / float(fs.hticks - 1)
    # define the eigendecomposition function
    if fs.slow:
        fn = get_augmented_spectrum
    elif fs.fast:
        fn = get_augmented_spectrum_fast
    # define the target eigenvalues
    tip_ids = [id(node) for node in tree.gen_tips()]
    D_tips = np.array(tree.get_partial_distance_matrix(tip_ids))
    G_tips = Euclid.edm_to_dccov(D_tips)
    target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom
    # draw the image
    return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset,
                        D, nleaves, fs.hticks, fs.denom, fn, target_ws)
Exemple #25
0
 def test_get_split_branch(self):
     # set up the tree
     tree_string = '((a:1, b:2):3, c:4, d:5);'
     tree = NewickIO.parse(tree_string, NewickTree)
     # look for the branch that separates tips named 'a' and 'b' from the rest of the tree
     tip_selection = [
         tip for tip in tree.gen_tips() if tip.get_name() in ('a', 'b')
     ]
     node, directed_branch = tree.get_split_branch(tip_selection)
     self.assertEqual(
         directed_branch.get_undirected_branch().get_branch_length(), 3)
     # look for the branch that separates tips named 'a' and 'c' from the rest of the tree
     tip_selection = [
         tip for tip in tree.gen_tips() if tip.get_name() in ('a', 'c')
     ]
     result = tree.get_split_branch(tip_selection)
     self.assertEqual(result, None)
     # look for the branch that separates all tips from the rest of the tree
     tip_selection = list(tree.gen_tips())
     result = tree.get_split_branch(tip_selection)
     self.assertEqual(result, None)
     # look for the branch that separates no tips from the rest of the tree
     tip_selection = []
     result = tree.get_split_branch(tip_selection)
     self.assertEqual(result, None)
     # look for the branch that separates the single tip named 'd' from the rest of the tree
     tip_selection = [
         tip for tip in tree.gen_tips() if tip.get_name() == 'd'
     ]
     node, directed_branch = tree.get_split_branch(tip_selection)
     self.assertEqual(
         directed_branch.get_undirected_branch().get_branch_length(), 5)
Exemple #26
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree',
                'newick tree with branch lengths', formatted_tree_string),
            Form.SingleLine('lhs_a',
                'the first taxon on one side of the split', 'a'),
            Form.SingleLine('lhs_b',
                'the second taxon on one side of the split', 'b'),
            Form.SingleLine('rhs_a',
                'the first taxon on the other side of the split', 'x'),
            Form.SingleLine('rhs_b',
                'the second taxon on the other side of the split', 'y'),
            Form.CheckGroup('options', 'output options', [
                Form.CheckItem('show_response',
                    'show the full Laplacian matrix'),
                Form.CheckItem('show_reduced_response',
                    'show the 2x2 submatrix'),
                Form.CheckItem('show_blen',
                    'show the branch length implied by the split')])]
    return form_objects
Exemple #27
0
def get_default_original_tree():
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    for node in tree.preorder():
        blen = node.get_branch_length()
        if blen is not None:
            node.set_branch_length(blen * 0.5)
    return tree
Exemple #28
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string),
        Form.RadioGroup('matrix', 'nodes used for the distance matrix', [
            RadioItem('standard', 'tips only', True),
            RadioItem('augmented', 'all nodes'),
            RadioItem('named', 'all named nodes')
        ]),
        Form.CheckGroup('output_options', 'output options', [
            CheckItem('show_split', 'exact criterion partition', True),
            CheckItem('show_value', 'exact criterion value', True),
            CheckItem('show_value_minus_trace',
                      'exact criterion value minus trace', True),
            CheckItem('show_fiedler_split', 'show the spectral sign partition',
                      True),
            CheckItem('show_fiedler_eigenvector',
                      'show the eigenvector of interest', True),
            CheckItem('show_labels', 'ordered labels', True),
            CheckItem('show_distance_matrix', 'distance matrix', True),
            CheckItem('show_M_matrix', 'M matrix', True)
        ])
    ]
    return form_objects
Exemple #29
0
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get ordered identifiers
    ordered_tip_name_id_pairs = list(sorted(set((node.get_name(), id(node))
        for node in tree.gen_tips())))
    ordered_tip_names, ordered_tip_ids = zip(*ordered_tip_name_id_pairs)
    ordered_internal_ids = [id(node)
            for node in tree.preorder() if not node.is_tip()]
    ordered_ids = list(ordered_tip_ids) + ordered_internal_ids
    # get the distance matrices
    full_D = tree.get_partial_distance_matrix(ordered_ids)
    partial_D = tree.get_partial_distance_matrix(ordered_tip_ids)
    # get the balaji matrices
    full_R = Clustering.get_R_balaji(full_D)
    partial_R = Clustering.get_R_balaji(partial_D)
    # Get the fiedler eigenvector and another eigenvector
    # for the full and the partial balaji matrices.
    full_va, full_vb = get_eigenvectors(full_R)
    partial_va, partial_vb = get_eigenvectors(partial_R)
    # create the response
    out = StringIO()
    print >> out, 'Fiedler vector associated with the graph'
    print >> out, 'for which the internal nodes are hidden:'
    print >> out, str(tuple(partial_va))
    print >> out
    print >> out, 'The tip subvector of the Fiedler vector'
    print >> out, 'associated with the graph of the full tree:'
    print >> out, str(tuple(full_va[:len(ordered_tip_ids)]))
    # write the response
    return out.getvalue()
Exemple #30
0
def get_form():
    """
    @return: a list of form objects
    """
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    return [Form.MultiLine('tree', 'tree', formatted_tree_string)]
Exemple #31
0
def process(tree_string):
    """
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    """
    np.set_printoptions(linewidth=200)
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1]*nvertices
    m_degenerate_unscaled = [1]*nleaves + [0]*(nvertices-nleaves)
    m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
Exemple #32
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
Exemple #33
0
def get_form():
    """
    @return: the body of a form
    """
    # define the tree string
    tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the object list
    form_objects = [
            Form.MultiLine('tree', 'tree',
                formatted_tree_string),
            Form.Integer('sequence_length', 'use sequences that are this long',
                100, low=1),
            Form.RadioGroup('assumption', 'distance matrix sampling model', [
                RadioItem('infinite_alleles', 'infinite alleles', True),
                RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')]),
            Form.RadioGroup('infinity', 'matrices with infinite distances', [
                RadioItem('reject_infinity', 'reject these matrices', True),
                RadioItem('replace_infinity', 'use 20 instead')]),
            Form.RadioGroup('zero', 'matrices with zero distances', [
                RadioItem('reject_zero', 'reject these matrices'),
                RadioItem('replace_zero', 'use .00001 instead'),
                RadioItem('remain_zero', 'use 0 unmodified', True)]),
            Form.RadioGroup('criterion', 'tree reconstruction criterion', [
                RadioItem('sign', 'spectral sign approximation', True),
                RadioItem('nj', 'neighbor joining'),
                RadioItem('random', 'random bipartition')])]
    # return the object list
    return form_objects
Exemple #34
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert the the given labels are tips of the tree
    tip_name_set = set(node.get_name() for node in tree.gen_tips())
    user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b])
    bad_names = user_name_set - tip_name_set
    if bad_names:
        msg = 'these labels are not valid tips: %s' % ', '.join(bad_names)
        raise HandlingError(msg)
    # get the submatrix of the distance matrix
    ordered_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    D = np.array(tree.get_distance_matrix(ordered_names))
    # get the response matrix
    R = Clustering.get_R_stone(D)
    # get the two by two matrix
    name_to_index = dict((name, i) for i, name in enumerate(ordered_names))
    R_reduced = np.zeros((2, 2))
    la = name_to_index[fs.lhs_a]
    lb = name_to_index[fs.lhs_b]
    ra = name_to_index[fs.rhs_a]
    rb = name_to_index[fs.rhs_b]
    R_reduced[0][0] = R[la][ra]
    R_reduced[0][1] = R[la][rb]
    R_reduced[1][0] = R[lb][ra]
    R_reduced[1][1] = R[lb][rb]
    epsilon = 1e-13
    criterion = np.linalg.det(R_reduced)
    if abs(criterion) < epsilon:
        criterion = 0
    # in analogy to the four point condition, use two different ways of calculating the distance
    blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0
    blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0
    blen = min(blen_a, blen_b)
    # define the response
    out = StringIO()
    paragraphs = []
    if fs.show_response:
        paragraph = [
            'response matrix with rows ordered alphabetically by leaf label:',
            MatrixUtil.m_to_string(R)
        ]
        paragraphs.append(paragraph)
    if fs.show_reduced_response:
        paragraph = [
            '2x2 submatrix of the response matrix:',
            MatrixUtil.m_to_string(R_reduced)
        ]
        paragraphs.append(paragraph)
    if True:
        paragraph = [
            'determinant of the 2x2 submatrix of the response matrix:',
            str(criterion)
        ]
        paragraphs.append(paragraph)
    if fs.show_blen:
        paragraph = ['branch length defined by the split:', str(blen)]
        paragraphs.append(paragraph)
    # return the response
    return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
Exemple #35
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine("tree", "newick tree", formatted_tree_string),
        Form.RadioGroup(
            "matrix",
            "nodes used for the distance matrix",
            [
                RadioItem("standard", "tips only", True),
                RadioItem("augmented", "all nodes"),
                RadioItem("named", "all named nodes"),
            ],
        ),
        Form.CheckGroup(
            "output_options",
            "output options",
            [
                CheckItem("show_split", "exact criterion partition", True),
                CheckItem("show_value", "exact criterion value", True),
                CheckItem("show_value_minus_trace", "exact criterion value minus trace", True),
                CheckItem("show_fiedler_split", "show the spectral sign partition", True),
                CheckItem("show_fiedler_eigenvector", "show the eigenvector of interest", True),
                CheckItem("show_labels", "ordered labels", True),
                CheckItem("show_distance_matrix", "distance matrix", True),
                CheckItem("show_M_matrix", "M matrix", True),
            ],
        ),
    ]
    return form_objects
Exemple #36
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree',
                formatted_tree_string),
            Form.Integer('length', 'use sequences that are this long',
                100, low=1),
            Form.RadioGroup('assumption', 'distance matrix sampling model', [
                Form.RadioItem('infinite_alleles', 'infinite alleles', True),
                Form.RadioItem('jukes_cantor',
                    'Jukes-Cantor model (4 alleles)')]),
            Form.RadioGroup('infinity', 'infinite distance estimates', [
                Form.RadioItem('reject_infinity', 'reject these matrices'),
                Form.RadioItem('replace_infinity',
                    'replace inf with 20', True)]),
            Form.RadioGroup('zero', 'distance estimates of zero', [
                Form.RadioItem('reject_zero', 'reject these matrices'),
                Form.RadioItem('replace_zero', 'use .00001 instead of zero'),
                Form.RadioItem('remain_zero', 'use 0 unmodified', True)])]
    return form_objects
Exemple #37
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string and ordered tip labels
    tree_string = "(a:1, (b:2, d:5):1, c:4);"
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    labels = list(sorted(tip.name for tip in tree.gen_tips()))
    # define the form objects
    form_objects = [
        Form.MultiLine("tree", "newick tree", formatted_tree_string),
        Form.MultiLine("inlabels", "ordered labels", "\n".join(labels)),
        Form.Float("strength", "perturbation strength", 0.1, low_inclusive=0),
        Form.CheckGroup(
            "options",
            "output options",
            [
                CheckItem("perturbed", "a perturbed distance matrix", True),
                CheckItem("distance", "the original distance matrix"),
                CheckItem("outlabels", "ordered labels"),
            ],
        ),
    ]
    return form_objects
Exemple #38
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string),
        Form.Integer('length', 'use sequences that are this long', 100, low=1),
        Form.RadioGroup('assumption', 'distance matrix sampling model', [
            Form.RadioItem('infinite_alleles', 'infinite alleles', True),
            Form.RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')
        ]),
        Form.RadioGroup('infinity', 'infinite distance estimates', [
            Form.RadioItem('reject_infinity', 'reject these matrices'),
            Form.RadioItem('replace_infinity', 'replace inf with 20', True)
        ]),
        Form.RadioGroup('zero', 'distance estimates of zero', [
            Form.RadioItem('reject_zero', 'reject these matrices'),
            Form.RadioItem('replace_zero', 'use .00001 instead of zero'),
            Form.RadioItem('remain_zero', 'use 0 unmodified', True)
        ])
    ]
    return form_objects
Exemple #39
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # compute the two matrices to be compared
    p = ninternal
    q = ntips
    N = fs.N
    aug_a = get_aug_a(D, p, q, N)
    aug_b = get_aug_b(D, p, q, N)
    # show the output
    out = StringIO()
    print >> out, "-(1/2)MEDE'M':"
    print >> out, aug_a
    print >> out
    print >> out, "-(1/2)HMDM'H:"
    print >> out, aug_b
    print >> out
    print >> out, 'allclose:', np.allclose(aug_a, aug_b)
    return out.getvalue()
Exemple #40
0
def get_default_original_tree():
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    for node in tree.preorder():
        blen = node.get_branch_length()
        if blen is not None:
            node.set_branch_length(blen * 0.5)
    return tree
Exemple #41
0
 def test_felsenstein(self):
     tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C_expected = np.dot(g_contrast_matrix,
                         np.diag(1 / np.sqrt(g_contrast_variances)))
     contrasts, variances = get_contrasts_and_variances(tree, ordered_names)
     C_observed = np.dot(
         np.array(contrasts).T, np.diag(1 / np.sqrt(np.array(variances))))
     """
     print
     print 'felsenstein variances:'
     print g_contrast_variances
     print 'observed variances:'
     print variances
     print
     print 'felsenstein contrast matrix:'
     print C_expected
     print 'observed contrast matrix:'
     print C_observed
     L_expected = np.dot(C_expected, C_expected.T)
     L_observed = np.dot(C_observed, C_observed.T)
     print 'felsenstein L matrix:'
     print L_expected
     print 'observed L matrix:'
     print L_observed
     D = np.array(tree.get_distance_matrix(ordered_names))
     L = Euclid.edm_to_laplacian(D)
     print 'L matrix derived from the D matrix:'
     print L
     """
     pass
Exemple #42
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    ordered_names = list(sorted(node.name for node in tree.gen_tips()))
    n = len(ordered_names)
    if n < 2:
        raise HandlingError('the newick tree should have at least two leaves')
    # get the eigendecomposition
    D = np.array(tree.get_distance_matrix(ordered_names))
    G = (-0.5) * MatrixUtil.double_centered(D)
    eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
    eigenvectors = eigenvector_transposes.T
    sorted_eigensystem = list(reversed(list(sorted((w, v) for w, v in zip(eigenvalues, eigenvectors)))))
    sorted_eigenvalues, sorted_eigenvectors = zip(*sorted_eigensystem)
    M = zip(*sorted_eigenvectors)
    # write the html
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    print >> out, HtmlTable.get_labeled_table_string(
            sorted_eigenvalues, ordered_names, M)
    print >> out, '</body>'
    print >> out, '</html>'
    # write the response
    return out.getvalue()
Exemple #43
0
def get_response_content(fs):
    # arbitrarily define the size of the alphabet
    k = 4
    # define the response
    out = StringIO()
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the order of the tip names
    ordered_tip_names = list(
        sorted(node.get_name() for node in tree.gen_tips()))
    n = len(ordered_tip_names)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_tip_names))
    D_vector = get_principal_coordinate(D)
    # get the dissimilarity matrix from the distance matrix
    dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row]
                              for row in D])
    dissimilarity_vector = get_principal_coordinate(dissimilarity)
    # get the principal coordinates of the distance-like matrices
    print >> out, 'original distance matrix:'
    print >> out, MatrixUtil.m_to_string(D)
    print >> out
    print >> out, 'projections onto the principal coordinate using the original distance matrix:'
    for name, value in zip(ordered_tip_names, D_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    print >> out, 'dissimilarity matrix:'
    print >> out, MatrixUtil.m_to_string(dissimilarity)
    print >> out
    print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:'
    for name, value in zip(ordered_tip_names, dissimilarity_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    # return the response
    return out.getvalue()
Exemple #44
0
 def test_get_weighted_split_count(self):
     """
     Test the function that gets the weighted number of nontrivial splits
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_string_c = '(((A:1, B:1):1, C:1):1, (D:1, (E:1, F:1):1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     # the weighted split counts are different,
     # even though both trees have internal nodes of order 3 and have the same number of leaves
     self.assertEqual(get_weighted_split_count(tree_a), 45)
     self.assertEqual(get_weighted_split_count(tree_b), 50)
     self.assertEqual(get_weighted_split_count(tree_c), 50)
Exemple #45
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree
        # and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg = 'expected at least 4 tips but found ' + str(len(tip_names))
            raise HandlingError(msg)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # get the threshold for negligibility of an eigenvector loading
    epsilon = fs.epsilon
    if not (0 <= epsilon < 1):
        raise HandlingError('invalid threshold for negligibility')
    # get the set of selected options
    selected_options = fs.options
    # analyze each tree
    results = []
    for tree in trees:
        results.append(AnalysisResult(tree, epsilon))
    # create the response
    out = StringIO()
    for result in results:
        for line in result.get_response_lines(selected_options):
            print >> out, line
        print >> out
    # return the response
    return out.getvalue()
Exemple #46
0
def get_response_content(fs):
    # arbitrarily define the size of the alphabet
    k = 4
    # define the response
    out = StringIO()
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the order of the tip names
    ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    n = len(ordered_tip_names)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_tip_names))
    D_vector = get_principal_coordinate(D)
    # get the dissimilarity matrix from the distance matrix
    dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D])
    dissimilarity_vector = get_principal_coordinate(dissimilarity)
    # get the principal coordinates of the distance-like matrices
    print >> out, 'original distance matrix:'
    print >> out, MatrixUtil.m_to_string(D)
    print >> out
    print >> out, 'projections onto the principal coordinate using the original distance matrix:'
    for name, value in zip(ordered_tip_names, D_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    print >> out, 'dissimilarity matrix:'
    print >> out, MatrixUtil.m_to_string(dissimilarity)
    print >> out
    print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:'
    for name, value in zip(ordered_tip_names, dissimilarity_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    # return the response
    return out.getvalue()
Exemple #47
0
def main():
    # use the default sequence length
    sequence_length = 100
    # use the default tree
    tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get arbitrarily ordered leaf names
    ordered_names = list(node.name for node in tree.gen_tips())
    # create the sampler
    sampler = DMSampler.InfiniteAllelesSampler(
            tree, ordered_names, sequence_length)
    sampler.set_inf_replacement(20)
    sampler.set_zero_replacement(0.0)
    # do some sampling, saving a summary but discarding the samples
    allocated_seconds = 2
    start_time = time.clock()
    run_seconds = 0
    for result in sampler.gen_samples_or_none():
        run_seconds = time.clock() - start_time
        if run_seconds > allocated_seconds:
            break
    # define the response
    print 'these are the results for a', run_seconds, 'second run:'
    print sampler.proposed, 'samples were proposed'
    print sampler.accepted, 'samples were accepted'
    msg = 'proposals had a distance estimate of zero'
    print sampler.proposals_with_zero, msg
    msg = 'proposals had a distance estimate of infinity'
    print sampler.proposals_with_inf, msg
Exemple #48
0
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get ordered identifiers
    ordered_tip_name_id_pairs = list(
        sorted(set((node.get_name(), id(node)) for node in tree.gen_tips())))
    ordered_tip_names, ordered_tip_ids = zip(*ordered_tip_name_id_pairs)
    ordered_internal_ids = [
        id(node) for node in tree.preorder() if not node.is_tip()
    ]
    ordered_ids = list(ordered_tip_ids) + ordered_internal_ids
    # get the distance matrices
    full_D = tree.get_partial_distance_matrix(ordered_ids)
    partial_D = tree.get_partial_distance_matrix(ordered_tip_ids)
    # get the balaji matrices
    full_R = Clustering.get_R_balaji(full_D)
    partial_R = Clustering.get_R_balaji(partial_D)
    # Get the fiedler eigenvector and another eigenvector
    # for the full and the partial balaji matrices.
    full_va, full_vb = get_eigenvectors(full_R)
    partial_va, partial_vb = get_eigenvectors(partial_R)
    # create the response
    out = StringIO()
    print >> out, 'Fiedler vector associated with the graph'
    print >> out, 'for which the internal nodes are hidden:'
    print >> out, str(tuple(partial_va))
    print >> out
    print >> out, 'The tip subvector of the Fiedler vector'
    print >> out, 'associated with the graph of the full tree:'
    print >> out, str(tuple(full_va[:len(ordered_tip_ids)]))
    # write the response
    return out.getvalue()
Exemple #49
0
def get_form():
    """
    @return: a list of form objects
    """
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    return [Form.MultiLine('tree', 'tree', formatted_tree_string)]
Exemple #50
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get the selected names
    selection = Util.get_stripped_lines(fs.selection.splitlines())
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    complement_name_set = possible_name_set - selected_name_set
    # assert that neither the selected name set nor its complement is empty
    if not selected_name_set or not complement_name_set:
        raise HandlingError('the selection is degenerate')
    # define an ordering on the tips
    ordered_names = [node.get_name() for node in tree.gen_tips()]
    # convert the selected names to a Y vector
    Y_as_list = []
    for name in ordered_names:
        if name in selected_name_set:
            value = 1
        else:
            value = -1
        Y_as_list.append(value)
    Y = np.array(Y_as_list)
    # get the distance matrix
    D = tree.get_distance_matrix(ordered_names)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    value = np.dot(np.dot(Y, R), Y.T)
    # return the taxon split evaluation
    return str(value) + '\n'
Exemple #51
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree
        # and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg = 'expected at least 4 tips but found ' + str(len(tip_names))
            raise HandlingError(msg)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # get the threshold for negligibility of an eigenvector loading
    epsilon = fs.epsilon
    if not (0 <= epsilon < 1):
        raise HandlingError('invalid threshold for negligibility')
    # get the set of selected options
    selected_options = fs.options
    # analyze each tree
    results = []
    for tree in trees:
        results.append(AnalysisResult(tree, epsilon))
    # create the response
    out = StringIO()
    for result in results:
        for line in result.get_response_lines(selected_options):
            print >> out, line
        print >> out
    # return the response
    return out.getvalue()
Exemple #52
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices - 1)
    # explicitly compute the points for the given number of dups using weights
    m = [1] * ninternal + [1 + fs.ndups] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Exemple #53
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert the the given labels are tips of the tree
    tip_name_set = set(node.get_name() for node in tree.gen_tips())
    user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b])
    bad_names = user_name_set - tip_name_set
    if bad_names:
        msg = 'these labels are not valid tips: %s' % ', '.join(bad_names)
        raise HandlingError(msg)
    # get the submatrix of the distance matrix
    ordered_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    D = np.array(tree.get_distance_matrix(ordered_names))
    # get the response matrix
    R = Clustering.get_R_stone(D)
    # get the two by two matrix
    name_to_index = dict((name, i) for i, name in enumerate(ordered_names))
    R_reduced = np.zeros((2,2))
    la = name_to_index[fs.lhs_a]
    lb = name_to_index[fs.lhs_b]
    ra = name_to_index[fs.rhs_a]
    rb = name_to_index[fs.rhs_b]
    R_reduced[0][0] = R[la][ra]
    R_reduced[0][1] = R[la][rb]
    R_reduced[1][0] = R[lb][ra]
    R_reduced[1][1] = R[lb][rb]
    epsilon = 1e-13
    criterion = np.linalg.det(R_reduced)
    if abs(criterion) < epsilon:
        criterion = 0
    # in analogy to the four point condition, use two different ways of calculating the distance
    blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0
    blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0
    blen = min(blen_a, blen_b)
    # define the response
    out = StringIO()
    paragraphs = []
    if fs.show_response:
        paragraph = [
                'response matrix with rows ordered alphabetically by leaf label:',
                MatrixUtil.m_to_string(R)]
        paragraphs.append(paragraph)
    if fs.show_reduced_response:
        paragraph = [
                '2x2 submatrix of the response matrix:',
                MatrixUtil.m_to_string(R_reduced)]
        paragraphs.append(paragraph)
    if True:
        paragraph = [
                'determinant of the 2x2 submatrix of the response matrix:',
                str(criterion)]
        paragraphs.append(paragraph)
    if fs.show_blen:
        paragraph = [
                'branch length defined by the split:',
                str(blen)]
        paragraphs.append(paragraph)
    # return the response
    return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
Exemple #54
0
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices-1)
    # explicitly compute the points for the given number of dups using weights
    m = [1]*ninternal + [1+fs.ndups]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1]*ninternal + [1+fs.ndups*10]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Exemple #55
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    # read the criterion string, creating the splitter object
    if fs.exact:
        splitter = Clustering.StoneExactDMS()
    elif fs.sign:
        splitter = Clustering.StoneSpectralSignDMS()
    elif fs.nj:
        splitter = Clustering.NeighborJoiningDMS()
    elif fs.random:
        splitter = Clustering.RandomDMS()
    # read the original tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the maximum number of steps we want
    max_steps = 1000000
    # Make sure that the splitter object is appropriate
    # for the number of taxa and the number of tree reconstructions.
    ntaxa = len(list(tree.gen_tips()))
    if splitter.get_complexity(ntaxa) * fs.iterations > max_steps:
        msg_a = "use a faster bipartition function, "
        msg_b = "fewer taxa, or fewer tree reconstructions"
        raise HandlingError(msg_a + msg_b)
    # define the simulation parameters
    sim = Simulation(splitter, "nj", "cgi tree building simulation")
    sim.set_original_tree(tree)
    sim.set_step_limit(max_steps)
    # define an arbitrary but consistent ordering of the taxa
    ordered_names = [node.name for node in tree.gen_tips()]
    # attempt to simulate a bunch of distance matrices
    sampler = DMSampler.DMSampler(tree, ordered_names, fs.length)
    distance_matrices = []
    for result in sampler.gen_samples_or_none():
        # if a proposal was accepted then add it to the list
        if result:
            sequence_list, distance_matrix = result
            distance_matrices.append(distance_matrix)
        # if enough accepted samples have been generated then stop sampling
        remaining_acceptances = fs.iterations - len(distance_matrices)
        if not remaining_acceptances:
            break
        # If the remaining number of computrons is predicted
        # to be too much then stop.
        if sampler.get_remaining_computrons(remaining_acceptances) > max_steps:
            msg_a = "this combination of parameters "
            msg_b = "is predicted to take too long"
            raise HandlingError(msg)
    sim.run(distance_matrices, ordered_names)
    # define the response
    out = StringIO()
    print >> out, "partition error count frequencies:"
    print >> out, sim.get_histogram_string()
    print >> out, ""
    print >> out, "weighted partition errors:", sim.get_deep_loss()
    # return the response
    return out.getvalue()
Exemple #56
-1
def get_response_content(fs):
    # get the set of names
    selection = Util.get_stripped_lines(StringIO(fs.names))
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert that the name selection is compatible with the tree
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = "the following selected names "
        msg_b = "are not valid tips: %s" % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    # get the pruned tree
    simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    pruned_tree = get_pruned_tree(simple_tree, selected_name_set)
    # begin writing the result
    out = StringIO()
    trees = (tree, pruned_tree)
    tree_names = ("the original tree", "the pruned tree")
    for tree, tree_name in zip(trees, tree_names):
        print >> out, "calculating splits of %s:" % tree_name
        print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art)
    # return the response
    return out.getvalue()
Exemple #57
-1
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, Newick.NewickTree) 
    # begin the response
    out = StringIO()
    # remove the branch length associated with the root
    if tree.get_root().blen is not None:
        print >> out, 'the root originally had a branch length of', tree.get_root().blen
        tree.get_root().blen = None
    else:
        print >> out, 'the root did not originally have a branch length'
    # force a trifurcation at the root
    if tree.get_root().get_child_count() < 3:
        print >> out, 'the original root had', tree.get_root().get_child_count(), 'children'
        max_children, best_child = max((child.get_child_count(), child) for child in tree.get_root().gen_children())
        old_root = tree.get_root()
        tree.reroot(best_child)
        tree.remove_node(old_root)
        print >> out, 'the new root has', tree.get_root().get_child_count(), 'children'
    else:
        print >> out, 'the root has', tree.get_root().get_child_count(), 'children'
    # remove names of internal nodes
    nremoved_names = 0
    for node in tree.preorder():
        if node.has_children() and node.name is not None:
            node.name = None
            nremoved_names += 1
    print >> out, 'removed', nremoved_names, 'internal node names'
    # draw the new formatted newick string after a break
    print >> out
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 120) 
    print >> out, formatted_tree_string
    # return the response
    return out.getvalue()