Ejemplo n.º 1
def get_response_content(fs):
    # get the set of names
    selection = Util.get_stripped_lines(StringIO(fs.names))
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert that the name selection is compatible with the tree
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    # get the pruned tree
    simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    pruned_tree = get_pruned_tree(simple_tree, selected_name_set)
    # begin writing the result
    out = StringIO()
    trees = (tree, pruned_tree)
    tree_names = ('the original tree', 'the pruned tree')
    for tree, tree_name in zip(trees, tree_names):
        print >> out, 'calculating splits of %s:' % tree_name
        print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art)
    # return the response
    return out.getvalue()
Ejemplo n.º 2
 def test_get_split_distance(self):
     Test the function that gets the number of missing nontrivial partitions.
     # define some trees
     tree_string_a = '((A:1, B:1):1, C:1, (D:1, E:1):1);'
     tree_string_b = '((A:1, B:1):1, D:1, (C:1, E:1):1);'
     tree_string_c = '((A:1, D:1):1, C:1, (B:1, E:1):1);'
     tree_string_d = '((A:1, D:1):1, (C:1, B:1, E:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     tree_d = NewickIO.parse(tree_string_d, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_split_distance(tree_b, tree_b), 0)
     self.assertEqual(get_split_distance(tree_c, tree_c), 0)
     self.assertEqual(get_split_distance(tree_d, tree_d), 0)
     # some of the distances are symmetric
     self.assertEqual(get_split_distance(tree_a, tree_b), 1)
     self.assertEqual(get_split_distance(tree_b, tree_a), 1)
     self.assertEqual(get_split_distance(tree_b, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_b), 2)
     self.assertEqual(get_split_distance(tree_a, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_a), 2)
     # it is possible for the distance to be asymmetric if internal nodes are not order 3
     self.assertEqual(get_split_distance(tree_a, tree_d), 1)
     self.assertEqual(get_split_distance(tree_d, tree_a), 2)
Ejemplo n.º 3
def get_response_content(fs):
    # read the query tree
    query_tree = NewickIO.parse(fs.query, FelTree.NewickTree)
    # read the reference tree
    reference_tree = NewickIO.parse(fs.reference, FelTree.NewickTree)
    # calculate the loss using the requested loss function
    if fs.uniform:
        loss_numerator = TreeComparison.get_split_distance(
            query_tree, reference_tree)
    elif fs.weighted:
        loss_numerator = TreeComparison.get_weighted_split_distance(
            query_tree, reference_tree)
    # do the normalization if requested
    if fs.normalize:
        if fs.uniform:
            loss_denominator = float(
        elif fs.weighted:
            loss_denominator = float(
        loss_denominator = 1
    # return the response
    if loss_denominator:
        return str(loss_numerator / loss_denominator) + '\n'
        return 'normalization failed\n'
Ejemplo n.º 4
 def test_update_generalized_nj_big(self):
     Test the generation of successor distance matrices from a more complicated initial distance matrix.
     # define the initial tree and the two subtrees
     s_tree_initial = '(((3:9, 2:2):4, 1:2):1, (4:1, 5:3):7, 6:2);'
     s_tree_a = '((3:9, 2:2):4, 1:2, B:0.5);'
     s_tree_b = '((4:1, 5:3):7, 6:2, A:0.5);'
     # Define an ordering of the taxa.
     # The initial ordering is arbitrary,
     # and the subsequent orderings are dependent on the initial ordering.
     taxa_initial = ['1', '4', '2', '5', '3', '6']
     taxa_a = ['1', 'B', '2', '3']
     taxa_b = ['A', '4', '5', '6']
     # Define the distance matrices.
     D_initial = np.array(
     D_a = np.array(
     D_b = np.array(
     # assert that the correct distance matrices are created
     D_out_a = update_generalized_nj(D_initial, set([1, 3, 5]))
     D_out_b = update_generalized_nj(D_initial, set([0, 2, 4]))
     self.assertTrue(np.allclose(D_a, D_out_a))
     self.assertTrue(np.allclose(D_b, D_out_b))
Ejemplo n.º 5
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list("0123")
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt ** 2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of Euclidean distances between tetrahedron vertices:"
    print >> out, D_sqrt
    print >> out, "neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):"
    print >> out, sqrt_tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of squared distances between tetrahedron vertices:"
    print >> out, D
    print >> out, "neighbor joining tree constructed from D = squared Euclidean distances (normal):"
    print >> out, tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_reconstructed
    return out.getvalue().strip()
Ejemplo n.º 6
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list('0123')
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt**2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of Euclidean distances between tetrahedron vertices:'
    print >> out, D_sqrt
    print >> out, 'neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):'
    print >> out, sqrt_tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of squared distances between tetrahedron vertices:'
    print >> out, D
    print >> out, 'neighbor joining tree constructed from D = squared Euclidean distances (normal):'
    print >> out, tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_reconstructed
    return out.getvalue().strip()
Ejemplo n.º 7
def get_response_content(fs):
    # read the query tree
    query_tree = NewickIO.parse(fs.query, FelTree.NewickTree)
    # read the reference tree
    reference_tree = NewickIO.parse(fs.reference, FelTree.NewickTree)
    # calculate the loss using the requested loss function
    if fs.uniform:
        loss_numerator = TreeComparison.get_split_distance(
                query_tree, reference_tree)
    elif fs.weighted:
        loss_numerator = TreeComparison.get_weighted_split_distance(
                query_tree, reference_tree)
    # do the normalization if requested
    if fs.normalize:
        if fs.uniform:
            loss_denominator = float(
        elif fs.weighted:
            loss_denominator = float(
        loss_denominator = 1
    # return the response
    if loss_denominator:
        return str(loss_numerator / loss_denominator) + '\n'
        return 'normalization failed\n'
Ejemplo n.º 8
 def test_get_split_distance(self):
     Test the function that gets the number of missing nontrivial partitions.
     # define some trees
     tree_string_a = '((A:1, B:1):1, C:1, (D:1, E:1):1);'
     tree_string_b = '((A:1, B:1):1, D:1, (C:1, E:1):1);'
     tree_string_c = '((A:1, D:1):1, C:1, (B:1, E:1):1);'
     tree_string_d = '((A:1, D:1):1, (C:1, B:1, E:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     tree_d = NewickIO.parse(tree_string_d, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_split_distance(tree_b, tree_b), 0)
     self.assertEqual(get_split_distance(tree_c, tree_c), 0)
     self.assertEqual(get_split_distance(tree_d, tree_d), 0)
     # some of the distances are symmetric
     self.assertEqual(get_split_distance(tree_a, tree_b), 1)
     self.assertEqual(get_split_distance(tree_b, tree_a), 1)
     self.assertEqual(get_split_distance(tree_b, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_b), 2)
     self.assertEqual(get_split_distance(tree_a, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_a), 2)
     # it is possible for the distance to be asymmetric if internal nodes are not order 3
     self.assertEqual(get_split_distance(tree_a, tree_d), 1)
     self.assertEqual(get_split_distance(tree_d, tree_a), 2)
Ejemplo n.º 9
 def _create_trees(self):
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string, FelTree.NewickTree)
Ejemplo n.º 10
 def _create_trees(self):
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string,
Ejemplo n.º 11
 def test_get_weighted_split_distance(self):
     Test the function that gets the number of missing nontrivial partitions.
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_weighted_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_b), 0)
     # the distance is not necessarily symmetric
     self.assertEqual(get_weighted_split_distance(tree_a, tree_b), 20)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_a), 15)
Ejemplo n.º 12
 def test_get_weighted_split_distance(self):
     Test the function that gets the number of missing nontrivial partitions.
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_weighted_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_b), 0)
     # the distance is not necessarily symmetric
     self.assertEqual(get_weighted_split_distance(tree_a, tree_b), 20)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_a), 15)
Ejemplo n.º 13
def hard_coded_analysis_a():
    tree_string = '(a:1, (b:2, d:5):1, c:4);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    states = []
    id_list = []
    for state, id_ in sorted((node.name, id(node))
            for node in tree.gen_tips()):
    for node in tree.gen_internal_nodes():
    n = len(states)
    for method in ('tips', 'full'):
        # get the distance matrix from the tree
        if method == 'tips':
            print 'leaves only:'
            distance_matrix = tree.get_distance_matrix(states)
            print 'leaves and internal nodes:'
            distance_matrix = tree.get_full_distance_matrix(id_list)
        print 'distance matrix from the tree:'
        print MatrixUtil.m_to_string(distance_matrix)
        # get the equivalent euclidean points
        z_points = list(gen_euclidean_points(distance_matrix))
        for state, point in zip(states, z_points):
            print state, point
        # get the distance matrix from the transformed points
        print 'distance matrix from the transformed points:'
        distance_matrix = get_euclidean_distance_matrix(z_points)
        print MatrixUtil.m_to_string(distance_matrix)
Ejemplo n.º 14
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    states = list(sorted(node.name for node in tree.gen_tips()))
    n = len(states)
    # start to prepare the reponse
    out = StringIO()
    # get the distance matrix
    distance_matrix = tree.get_distance_matrix(states)
    # get the equivalent euclidean points
    z_points = list(gen_euclidean_points(distance_matrix))
    # get the centroid
    centroid = [sum(values)/n for values in zip(*z_points)]
    # get the resistance distances between the centroid and each point
    #volume = -sum(L[i][j] for i in range(n) for j in range(n) if i != j)
    #volume *= (4.0 / 4.3185840708)
    #volume = 1
    print >> out, 'distances to the first point:'
    for z in z_points:
        print >> out, sum((a-b)**2 for a, b in zip(z, z_points[0]))
    print >> out, 'distances to the centroid:'
    for z in z_points:
        print >> out, sum((a-b)**2 for a, b in zip(z, centroid))
    print >> out, 'distances to the virtual center of the tree:'
    origin = [0 for i in range(n)]
    for z in z_points:
        print >> out, sum((a-b)**2 for a, b in zip(z, origin))
    # return the response
    return out.getvalue()
Ejemplo n.º 15
def hard_coded_analysis_b():
    Numerically search for the power 2 steiner points.
    # make a distance matrix where the order is alphabetical with the states
    tree_string = '(a:1, (b:2, d:5):1, c:4);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    states = list(sorted(node.name for node in tree.gen_tips()))
    distance_matrix = tree.get_distance_matrix(states)
    # get the pseudo inverse laplacian matrix
    L_pinv = get_laplacian_pseudo_inverse(distance_matrix)
    # get the eigendecomposition of the pseudo inverse laplacian matrix
    eigenvalues, eigenvectors = get_eigendecomposition(L_pinv)
    print 'eigenvalues of the pseudo inverse of the laplacian:'
    print eigenvalues
    # each taxon gets a transformed point
    z_points = list(gen_euclidean_points_from_eigendecomposition(
        eigenvalues, eigenvectors))
    # initialize the objective function
    objective = MyObjective(z_points)
    # initialize a couple of steiner points
    n = len(states)
    va = [random.random() for i in range(n)]
    vb = [random.random() for i in range(n)]
    # define the initial guess
    x0 = va + vb
    # do the optimization
    result = optimize.fmin(objective, x0)
    print result
    print objective.best
Ejemplo n.º 16
def process(tree_string):
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1] * nvertices
    m_degenerate_unscaled = [1] * nleaves + [0] * (nvertices - nleaves)
    m_uniform = np.array(m_uniform_unscaled,
                         dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled,
                            dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
Ejemplo n.º 17
 def test_felsenstein(self):
     tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C_expected = np.dot(g_contrast_matrix, np.diag(1/np.sqrt(g_contrast_variances)))
     contrasts, variances = get_contrasts_and_variances(tree, ordered_names)
     C_observed = np.dot(np.array(contrasts).T, np.diag(1/np.sqrt(np.array(variances))))
     print 'felsenstein variances:'
     print g_contrast_variances
     print 'observed variances:'
     print variances
     print 'felsenstein contrast matrix:'
     print C_expected
     print 'observed contrast matrix:'
     print C_observed
     L_expected = np.dot(C_expected, C_expected.T)
     L_observed = np.dot(C_observed, C_observed.T)
     print 'felsenstein L matrix:'
     print L_expected
     print 'observed L matrix:'
     print L_observed
     D = np.array(tree.get_distance_matrix(ordered_names))
     L = Euclid.edm_to_laplacian(D)
     print 'L matrix derived from the D matrix:'
     print L
Ejemplo n.º 18
def get_form():
    @return: the body of a form
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree with branch lengths',
        Form.SingleLine('lhs_a', 'the first taxon on one side of the split',
        Form.SingleLine('lhs_b', 'the second taxon on one side of the split',
                        'the first taxon on the other side of the split', 'x'),
                        'the second taxon on the other side of the split',
        Form.CheckGroup('options', 'output options', [
                           'show the Laplacian response matrix'),
            Form.CheckItem('show_reduced_response', 'show the 2x2 submatrix'),
                           'show the branch length implied by the split')
    return form_objects
Ejemplo n.º 19
def get_response_content(fs):
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
Ejemplo n.º 20
def main():
    # use the default sequence length
    sequence_length = 100
    # use the default tree
    tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get arbitrarily ordered leaf names
    ordered_names = list(node.name for node in tree.gen_tips())
    # create the sampler
    sampler = DMSampler.InfiniteAllelesSampler(tree, ordered_names,
    # do some sampling, saving a summary but discarding the samples
    allocated_seconds = 2
    start_time = time.clock()
    run_seconds = 0
    for result in sampler.gen_samples_or_none():
        run_seconds = time.clock() - start_time
        if run_seconds > allocated_seconds:
    # define the response
    print 'these are the results for a', run_seconds, 'second run:'
    print sampler.proposed, 'samples were proposed'
    print sampler.accepted, 'samples were accepted'
    msg = 'proposals had a distance estimate of zero'
    print sampler.proposals_with_zero, msg
    msg = 'proposals had a distance estimate of infinity'
    print sampler.proposals_with_inf, msg
Ejemplo n.º 21
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get the selected names
    selection = Util.get_stripped_lines(fs.selection.splitlines())
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    complement_name_set = possible_name_set - selected_name_set
    # assert that neither the selected name set nor its complement is empty
    if not selected_name_set or not complement_name_set:
        raise HandlingError('the selection is degenerate')
    # define an ordering on the tips
    ordered_names = [node.get_name() for node in tree.gen_tips()]
    # convert the selected names to a Y vector
    Y_as_list = []
    for name in ordered_names:
        if name in selected_name_set:
            value = 1
            value = -1
    Y = np.array(Y_as_list)
    # get the distance matrix
    D = tree.get_distance_matrix(ordered_names)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    value = np.dot(np.dot(Y, R), Y.T)
    # return the taxon split evaluation
    return str(value) + '\n'
Ejemplo n.º 22
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # compute the two matrices to be compared
    p = ninternal
    q = ntips
    N = fs.N
    aug_a = get_aug_a(D, p, q, N)
    aug_b = get_aug_b(D, p, q, N)
    # show the output
    out = StringIO()
    print >> out, "-(1/2)MEDE'M':"
    print >> out, aug_a
    print >> out
    print >> out, "-(1/2)HMDM'H:"
    print >> out, aug_b
    print >> out
    print >> out, 'allclose:', np.allclose(aug_a, aug_b)
    return out.getvalue()
Ejemplo n.º 23
 def test_get_weighted_split_count(self):
     Test the function that gets the weighted number of nontrivial splits
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_string_c = '(((A:1, B:1):1, C:1):1, (D:1, (E:1, F:1):1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     # the weighted split counts are different,
     # even though both trees have internal nodes of order 3 and have the same number of leaves
     self.assertEqual(get_weighted_split_count(tree_a), 45)
     self.assertEqual(get_weighted_split_count(tree_b), 50)
     self.assertEqual(get_weighted_split_count(tree_c), 50)
Ejemplo n.º 24
def get_response_content(fs):
    # define the requested physical size of the images (in pixels)
    physical_size = (640, 480)
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    # Get ordered ids with the leaves first,
    # and get the corresponding distance matrix.
    ordered_ids = get_ordered_ids(tree)
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # get the image extension
    ext = Form.g_imageformat_to_ext[fs.imageformat]
    # get the scaling factors and offsets
    if fs.hticks < 2:
        msg = 'expected at least two ticks on the horizontal axis'
        raise HandlingError(msg)
    width, height = physical_size
    xoffset = fs.border
    yoffset = fs.border
    yscale = float(height - 2 * fs.border)
    xscale = (width - 2 * fs.border) / float(fs.hticks - 1)
    # define the eigendecomposition function
    if fs.slow:
        fn = get_augmented_spectrum
    elif fs.fast:
        fn = get_augmented_spectrum_fast
    # define the target eigenvalues
    tip_ids = [id(node) for node in tree.gen_tips()]
    D_tips = np.array(tree.get_partial_distance_matrix(tip_ids))
    G_tips = Euclid.edm_to_dccov(D_tips)
    target_ws = scipy.linalg.eigh(G_tips, eigvals_only=True) * fs.denom
    # draw the image
    return create_image(ext, physical_size, xscale, yscale, xoffset, yoffset,
                        D, nleaves, fs.hticks, fs.denom, fn, target_ws)
Ejemplo n.º 25
 def test_get_split_branch(self):
     # set up the tree
     tree_string = '((a:1, b:2):3, c:4, d:5);'
     tree = NewickIO.parse(tree_string, NewickTree)
     # look for the branch that separates tips named 'a' and 'b' from the rest of the tree
     tip_selection = [
         tip for tip in tree.gen_tips() if tip.get_name() in ('a', 'b')
     node, directed_branch = tree.get_split_branch(tip_selection)
         directed_branch.get_undirected_branch().get_branch_length(), 3)
     # look for the branch that separates tips named 'a' and 'c' from the rest of the tree
     tip_selection = [
         tip for tip in tree.gen_tips() if tip.get_name() in ('a', 'c')
     result = tree.get_split_branch(tip_selection)
     self.assertEqual(result, None)
     # look for the branch that separates all tips from the rest of the tree
     tip_selection = list(tree.gen_tips())
     result = tree.get_split_branch(tip_selection)
     self.assertEqual(result, None)
     # look for the branch that separates no tips from the rest of the tree
     tip_selection = []
     result = tree.get_split_branch(tip_selection)
     self.assertEqual(result, None)
     # look for the branch that separates the single tip named 'd' from the rest of the tree
     tip_selection = [
         tip for tip in tree.gen_tips() if tip.get_name() == 'd'
     node, directed_branch = tree.get_split_branch(tip_selection)
         directed_branch.get_undirected_branch().get_branch_length(), 5)
Ejemplo n.º 26
def get_form():
    @return: the body of a form
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
                'newick tree with branch lengths', formatted_tree_string),
                'the first taxon on one side of the split', 'a'),
                'the second taxon on one side of the split', 'b'),
                'the first taxon on the other side of the split', 'x'),
                'the second taxon on the other side of the split', 'y'),
            Form.CheckGroup('options', 'output options', [
                    'show the full Laplacian matrix'),
                    'show the 2x2 submatrix'),
                    'show the branch length implied by the split')])]
    return form_objects
Ejemplo n.º 27
def get_default_original_tree():
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    for node in tree.preorder():
        blen = node.get_branch_length()
        if blen is not None:
            node.set_branch_length(blen * 0.5)
    return tree
Ejemplo n.º 28
def get_form():
    @return: the body of a form
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string),
        Form.RadioGroup('matrix', 'nodes used for the distance matrix', [
            RadioItem('standard', 'tips only', True),
            RadioItem('augmented', 'all nodes'),
            RadioItem('named', 'all named nodes')
        Form.CheckGroup('output_options', 'output options', [
            CheckItem('show_split', 'exact criterion partition', True),
            CheckItem('show_value', 'exact criterion value', True),
                      'exact criterion value minus trace', True),
            CheckItem('show_fiedler_split', 'show the spectral sign partition',
                      'show the eigenvector of interest', True),
            CheckItem('show_labels', 'ordered labels', True),
            CheckItem('show_distance_matrix', 'distance matrix', True),
            CheckItem('show_M_matrix', 'M matrix', True)
    return form_objects
Ejemplo n.º 29
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get ordered identifiers
    ordered_tip_name_id_pairs = list(sorted(set((node.get_name(), id(node))
        for node in tree.gen_tips())))
    ordered_tip_names, ordered_tip_ids = zip(*ordered_tip_name_id_pairs)
    ordered_internal_ids = [id(node)
            for node in tree.preorder() if not node.is_tip()]
    ordered_ids = list(ordered_tip_ids) + ordered_internal_ids
    # get the distance matrices
    full_D = tree.get_partial_distance_matrix(ordered_ids)
    partial_D = tree.get_partial_distance_matrix(ordered_tip_ids)
    # get the balaji matrices
    full_R = Clustering.get_R_balaji(full_D)
    partial_R = Clustering.get_R_balaji(partial_D)
    # Get the fiedler eigenvector and another eigenvector
    # for the full and the partial balaji matrices.
    full_va, full_vb = get_eigenvectors(full_R)
    partial_va, partial_vb = get_eigenvectors(partial_R)
    # create the response
    out = StringIO()
    print >> out, 'Fiedler vector associated with the graph'
    print >> out, 'for which the internal nodes are hidden:'
    print >> out, str(tuple(partial_va))
    print >> out
    print >> out, 'The tip subvector of the Fiedler vector'
    print >> out, 'associated with the graph of the full tree:'
    print >> out, str(tuple(full_va[:len(ordered_tip_ids)]))
    # write the response
    return out.getvalue()
Ejemplo n.º 30
def get_form():
    @return: a list of form objects
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    return [Form.MultiLine('tree', 'tree', formatted_tree_string)]
Ejemplo n.º 31
def process(tree_string):
    @param tree_string: a newick string
    @return: a multi-line string that summarizes the results
    out = StringIO()
    # build the newick tree from the string
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get ordered names and ids
    ordered_ids, ordered_names = get_ordered_ids_and_names(tree)
    # get the distance matrix with ordered indices including all nodes in the tree
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # define mass vectors
    m_uniform_unscaled = [1]*nvertices
    m_degenerate_unscaled = [1]*nleaves + [0]*(nvertices-nleaves)
    m_uniform = np.array(m_uniform_unscaled, dtype=float) / sum(m_uniform_unscaled)
    m_degenerate = np.array(m_degenerate_unscaled, dtype=float) / sum(m_degenerate_unscaled)
    # show some of the distance matrices
    print >> out, 'ordered names:'
    print >> out, ordered_names
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among all vertices:'
    print >> out, Euclid.edm_to_weighted_points(D, m_uniform)
    print >> out
    print >> out, 'embedded points with mass uniformly distributed among the leaves:'
    print >> out, Euclid.edm_to_weighted_points(D, m_degenerate)
    print >> out
    # return the response
    return out.getvalue().strip()
Ejemplo n.º 32
def get_response_content(fs):
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # read the ordered labels
    ordered_labels = Util.get_stripped_lines(StringIO(fs.labels))
    # validate the input
    observed_label_set = set(node.get_name() for node in tree.gen_tips())
    if set(ordered_labels) != observed_label_set:
        msg = 'the labels should match the labels of the leaves of the tree'
        raise HandlingError(msg)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_labels))
    L = Euclid.edm_to_laplacian(D)
    w, v = get_eigendecomposition(L)
    C = get_contrast_matrix(w, v)
    # set elements with small absolute value to zero
    C[abs(C) < fs.epsilon] = 0
    # start to prepare the reponse
    out = StringIO()
    if fs.plain_format:
        print >> out, MatrixUtil.m_to_string(C)
    elif fs.matlab_format:
        print >> out, MatrixUtil.m_to_matlab_string(C)
    elif fs.r_format:
        print >> out, MatrixUtil.m_to_R_string(C)
    # write the response
    return out.getvalue()
Ejemplo n.º 33
def get_form():
    @return: the body of a form
    # define the tree string
    tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the object list
    form_objects = [
            Form.MultiLine('tree', 'tree',
            Form.Integer('sequence_length', 'use sequences that are this long',
                100, low=1),
            Form.RadioGroup('assumption', 'distance matrix sampling model', [
                RadioItem('infinite_alleles', 'infinite alleles', True),
                RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')]),
            Form.RadioGroup('infinity', 'matrices with infinite distances', [
                RadioItem('reject_infinity', 'reject these matrices', True),
                RadioItem('replace_infinity', 'use 20 instead')]),
            Form.RadioGroup('zero', 'matrices with zero distances', [
                RadioItem('reject_zero', 'reject these matrices'),
                RadioItem('replace_zero', 'use .00001 instead'),
                RadioItem('remain_zero', 'use 0 unmodified', True)]),
            Form.RadioGroup('criterion', 'tree reconstruction criterion', [
                RadioItem('sign', 'spectral sign approximation', True),
                RadioItem('nj', 'neighbor joining'),
                RadioItem('random', 'random bipartition')])]
    # return the object list
    return form_objects
Ejemplo n.º 34
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert the the given labels are tips of the tree
    tip_name_set = set(node.get_name() for node in tree.gen_tips())
    user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b])
    bad_names = user_name_set - tip_name_set
    if bad_names:
        msg = 'these labels are not valid tips: %s' % ', '.join(bad_names)
        raise HandlingError(msg)
    # get the submatrix of the distance matrix
    ordered_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    D = np.array(tree.get_distance_matrix(ordered_names))
    # get the response matrix
    R = Clustering.get_R_stone(D)
    # get the two by two matrix
    name_to_index = dict((name, i) for i, name in enumerate(ordered_names))
    R_reduced = np.zeros((2, 2))
    la = name_to_index[fs.lhs_a]
    lb = name_to_index[fs.lhs_b]
    ra = name_to_index[fs.rhs_a]
    rb = name_to_index[fs.rhs_b]
    R_reduced[0][0] = R[la][ra]
    R_reduced[0][1] = R[la][rb]
    R_reduced[1][0] = R[lb][ra]
    R_reduced[1][1] = R[lb][rb]
    epsilon = 1e-13
    criterion = np.linalg.det(R_reduced)
    if abs(criterion) < epsilon:
        criterion = 0
    # in analogy to the four point condition, use two different ways of calculating the distance
    blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0
    blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0
    blen = min(blen_a, blen_b)
    # define the response
    out = StringIO()
    paragraphs = []
    if fs.show_response:
        paragraph = [
            'response matrix with rows ordered alphabetically by leaf label:',
    if fs.show_reduced_response:
        paragraph = [
            '2x2 submatrix of the response matrix:',
    if True:
        paragraph = [
            'determinant of the 2x2 submatrix of the response matrix:',
    if fs.show_blen:
        paragraph = ['branch length defined by the split:', str(blen)]
    # return the response
    return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
Ejemplo n.º 35
def get_form():
    @return: the body of a form
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine("tree", "newick tree", formatted_tree_string),
            "nodes used for the distance matrix",
                RadioItem("standard", "tips only", True),
                RadioItem("augmented", "all nodes"),
                RadioItem("named", "all named nodes"),
            "output options",
                CheckItem("show_split", "exact criterion partition", True),
                CheckItem("show_value", "exact criterion value", True),
                CheckItem("show_value_minus_trace", "exact criterion value minus trace", True),
                CheckItem("show_fiedler_split", "show the spectral sign partition", True),
                CheckItem("show_fiedler_eigenvector", "show the eigenvector of interest", True),
                CheckItem("show_labels", "ordered labels", True),
                CheckItem("show_distance_matrix", "distance matrix", True),
                CheckItem("show_M_matrix", "M matrix", True),
    return form_objects
Ejemplo n.º 36
def get_form():
    @return: the body of a form
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree',
            Form.Integer('length', 'use sequences that are this long',
                100, low=1),
            Form.RadioGroup('assumption', 'distance matrix sampling model', [
                Form.RadioItem('infinite_alleles', 'infinite alleles', True),
                    'Jukes-Cantor model (4 alleles)')]),
            Form.RadioGroup('infinity', 'infinite distance estimates', [
                Form.RadioItem('reject_infinity', 'reject these matrices'),
                    'replace inf with 20', True)]),
            Form.RadioGroup('zero', 'distance estimates of zero', [
                Form.RadioItem('reject_zero', 'reject these matrices'),
                Form.RadioItem('replace_zero', 'use .00001 instead of zero'),
                Form.RadioItem('remain_zero', 'use 0 unmodified', True)])]
    return form_objects
Ejemplo n.º 37
def get_form():
    @return: the body of a form
    # define the default tree string and ordered tip labels
    tree_string = "(a:1, (b:2, d:5):1, c:4);"
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    labels = list(sorted(tip.name for tip in tree.gen_tips()))
    # define the form objects
    form_objects = [
        Form.MultiLine("tree", "newick tree", formatted_tree_string),
        Form.MultiLine("inlabels", "ordered labels", "\n".join(labels)),
        Form.Float("strength", "perturbation strength", 0.1, low_inclusive=0),
            "output options",
                CheckItem("perturbed", "a perturbed distance matrix", True),
                CheckItem("distance", "the original distance matrix"),
                CheckItem("outlabels", "ordered labels"),
    return form_objects
Ejemplo n.º 38
def get_form():
    @return: the body of a form
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string),
        Form.Integer('length', 'use sequences that are this long', 100, low=1),
        Form.RadioGroup('assumption', 'distance matrix sampling model', [
            Form.RadioItem('infinite_alleles', 'infinite alleles', True),
            Form.RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')
        Form.RadioGroup('infinity', 'infinite distance estimates', [
            Form.RadioItem('reject_infinity', 'reject these matrices'),
            Form.RadioItem('replace_infinity', 'replace inf with 20', True)
        Form.RadioGroup('zero', 'distance estimates of zero', [
            Form.RadioItem('reject_zero', 'reject these matrices'),
            Form.RadioItem('replace_zero', 'use .00001 instead of zero'),
            Form.RadioItem('remain_zero', 'use 0 unmodified', True)
    return form_objects
Ejemplo n.º 39
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # compute the two matrices to be compared
    p = ninternal
    q = ntips
    N = fs.N
    aug_a = get_aug_a(D, p, q, N)
    aug_b = get_aug_b(D, p, q, N)
    # show the output
    out = StringIO()
    print >> out, "-(1/2)MEDE'M':"
    print >> out, aug_a
    print >> out
    print >> out, "-(1/2)HMDM'H:"
    print >> out, aug_b
    print >> out
    print >> out, 'allclose:', np.allclose(aug_a, aug_b)
    return out.getvalue()
Ejemplo n.º 40
def get_default_original_tree():
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    for node in tree.preorder():
        blen = node.get_branch_length()
        if blen is not None:
            node.set_branch_length(blen * 0.5)
    return tree
Ejemplo n.º 41
 def test_felsenstein(self):
     tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C_expected = np.dot(g_contrast_matrix,
                         np.diag(1 / np.sqrt(g_contrast_variances)))
     contrasts, variances = get_contrasts_and_variances(tree, ordered_names)
     C_observed = np.dot(
         np.array(contrasts).T, np.diag(1 / np.sqrt(np.array(variances))))
     print 'felsenstein variances:'
     print g_contrast_variances
     print 'observed variances:'
     print variances
     print 'felsenstein contrast matrix:'
     print C_expected
     print 'observed contrast matrix:'
     print C_observed
     L_expected = np.dot(C_expected, C_expected.T)
     L_observed = np.dot(C_observed, C_observed.T)
     print 'felsenstein L matrix:'
     print L_expected
     print 'observed L matrix:'
     print L_observed
     D = np.array(tree.get_distance_matrix(ordered_names))
     L = Euclid.edm_to_laplacian(D)
     print 'L matrix derived from the D matrix:'
     print L
Ejemplo n.º 42
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    ordered_names = list(sorted(node.name for node in tree.gen_tips()))
    n = len(ordered_names)
    if n < 2:
        raise HandlingError('the newick tree should have at least two leaves')
    # get the eigendecomposition
    D = np.array(tree.get_distance_matrix(ordered_names))
    G = (-0.5) * MatrixUtil.double_centered(D)
    eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
    eigenvectors = eigenvector_transposes.T
    sorted_eigensystem = list(reversed(list(sorted((w, v) for w, v in zip(eigenvalues, eigenvectors)))))
    sorted_eigenvalues, sorted_eigenvectors = zip(*sorted_eigensystem)
    M = zip(*sorted_eigenvectors)
    # write the html
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    print >> out, HtmlTable.get_labeled_table_string(
            sorted_eigenvalues, ordered_names, M)
    print >> out, '</body>'
    print >> out, '</html>'
    # write the response
    return out.getvalue()
Ejemplo n.º 43
def get_response_content(fs):
    # arbitrarily define the size of the alphabet
    k = 4
    # define the response
    out = StringIO()
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the order of the tip names
    ordered_tip_names = list(
        sorted(node.get_name() for node in tree.gen_tips()))
    n = len(ordered_tip_names)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_tip_names))
    D_vector = get_principal_coordinate(D)
    # get the dissimilarity matrix from the distance matrix
    dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row]
                              for row in D])
    dissimilarity_vector = get_principal_coordinate(dissimilarity)
    # get the principal coordinates of the distance-like matrices
    print >> out, 'original distance matrix:'
    print >> out, MatrixUtil.m_to_string(D)
    print >> out
    print >> out, 'projections onto the principal coordinate using the original distance matrix:'
    for name, value in zip(ordered_tip_names, D_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    print >> out, 'dissimilarity matrix:'
    print >> out, MatrixUtil.m_to_string(dissimilarity)
    print >> out
    print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:'
    for name, value in zip(ordered_tip_names, dissimilarity_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    # return the response
    return out.getvalue()
Ejemplo n.º 44
 def test_get_weighted_split_count(self):
     Test the function that gets the weighted number of nontrivial splits
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_string_c = '(((A:1, B:1):1, C:1):1, (D:1, (E:1, F:1):1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     # the weighted split counts are different,
     # even though both trees have internal nodes of order 3 and have the same number of leaves
     self.assertEqual(get_weighted_split_count(tree_a), 45)
     self.assertEqual(get_weighted_split_count(tree_b), 50)
     self.assertEqual(get_weighted_split_count(tree_c), 50)
Ejemplo n.º 45
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree
        # and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg = 'expected at least 4 tips but found ' + str(len(tip_names))
            raise HandlingError(msg)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
    # get the threshold for negligibility of an eigenvector loading
    epsilon = fs.epsilon
    if not (0 <= epsilon < 1):
        raise HandlingError('invalid threshold for negligibility')
    # get the set of selected options
    selected_options = fs.options
    # analyze each tree
    results = []
    for tree in trees:
        results.append(AnalysisResult(tree, epsilon))
    # create the response
    out = StringIO()
    for result in results:
        for line in result.get_response_lines(selected_options):
            print >> out, line
        print >> out
    # return the response
    return out.getvalue()
Ejemplo n.º 46
def get_response_content(fs):
    # arbitrarily define the size of the alphabet
    k = 4
    # define the response
    out = StringIO()
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the order of the tip names
    ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    n = len(ordered_tip_names)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_tip_names))
    D_vector = get_principal_coordinate(D)
    # get the dissimilarity matrix from the distance matrix
    dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D])
    dissimilarity_vector = get_principal_coordinate(dissimilarity)
    # get the principal coordinates of the distance-like matrices
    print >> out, 'original distance matrix:'
    print >> out, MatrixUtil.m_to_string(D)
    print >> out
    print >> out, 'projections onto the principal coordinate using the original distance matrix:'
    for name, value in zip(ordered_tip_names, D_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    print >> out, 'dissimilarity matrix:'
    print >> out, MatrixUtil.m_to_string(dissimilarity)
    print >> out
    print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:'
    for name, value in zip(ordered_tip_names, dissimilarity_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    # return the response
    return out.getvalue()
Ejemplo n.º 47
def main():
    # use the default sequence length
    sequence_length = 100
    # use the default tree
    tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    # get arbitrarily ordered leaf names
    ordered_names = list(node.name for node in tree.gen_tips())
    # create the sampler
    sampler = DMSampler.InfiniteAllelesSampler(
            tree, ordered_names, sequence_length)
    # do some sampling, saving a summary but discarding the samples
    allocated_seconds = 2
    start_time = time.clock()
    run_seconds = 0
    for result in sampler.gen_samples_or_none():
        run_seconds = time.clock() - start_time
        if run_seconds > allocated_seconds:
    # define the response
    print 'these are the results for a', run_seconds, 'second run:'
    print sampler.proposed, 'samples were proposed'
    print sampler.accepted, 'samples were accepted'
    msg = 'proposals had a distance estimate of zero'
    print sampler.proposals_with_zero, msg
    msg = 'proposals had a distance estimate of infinity'
    print sampler.proposals_with_inf, msg
Ejemplo n.º 48
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get ordered identifiers
    ordered_tip_name_id_pairs = list(
        sorted(set((node.get_name(), id(node)) for node in tree.gen_tips())))
    ordered_tip_names, ordered_tip_ids = zip(*ordered_tip_name_id_pairs)
    ordered_internal_ids = [
        id(node) for node in tree.preorder() if not node.is_tip()
    ordered_ids = list(ordered_tip_ids) + ordered_internal_ids
    # get the distance matrices
    full_D = tree.get_partial_distance_matrix(ordered_ids)
    partial_D = tree.get_partial_distance_matrix(ordered_tip_ids)
    # get the balaji matrices
    full_R = Clustering.get_R_balaji(full_D)
    partial_R = Clustering.get_R_balaji(partial_D)
    # Get the fiedler eigenvector and another eigenvector
    # for the full and the partial balaji matrices.
    full_va, full_vb = get_eigenvectors(full_R)
    partial_va, partial_vb = get_eigenvectors(partial_R)
    # create the response
    out = StringIO()
    print >> out, 'Fiedler vector associated with the graph'
    print >> out, 'for which the internal nodes are hidden:'
    print >> out, str(tuple(partial_va))
    print >> out
    print >> out, 'The tip subvector of the Fiedler vector'
    print >> out, 'associated with the graph of the full tree:'
    print >> out, str(tuple(full_va[:len(ordered_tip_ids)]))
    # write the response
    return out.getvalue()
Ejemplo n.º 49
def get_form():
    @return: a list of form objects
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    return [Form.MultiLine('tree', 'tree', formatted_tree_string)]
Ejemplo n.º 50
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get the selected names
    selection = Util.get_stripped_lines(fs.selection.splitlines())
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    complement_name_set = possible_name_set - selected_name_set
    # assert that neither the selected name set nor its complement is empty
    if not selected_name_set or not complement_name_set:
        raise HandlingError('the selection is degenerate')
    # define an ordering on the tips
    ordered_names = [node.get_name() for node in tree.gen_tips()]
    # convert the selected names to a Y vector
    Y_as_list = []
    for name in ordered_names:
        if name in selected_name_set:
            value = 1
            value = -1
    Y = np.array(Y_as_list)
    # get the distance matrix
    D = tree.get_distance_matrix(ordered_names)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    value = np.dot(np.dot(Y, R), Y.T)
    # return the taxon split evaluation
    return str(value) + '\n'
Ejemplo n.º 51
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree
        # and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            msg = 'expected at least 4 tips but found ' + str(len(tip_names))
            raise HandlingError(msg)
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
    # get the threshold for negligibility of an eigenvector loading
    epsilon = fs.epsilon
    if not (0 <= epsilon < 1):
        raise HandlingError('invalid threshold for negligibility')
    # get the set of selected options
    selected_options = fs.options
    # analyze each tree
    results = []
    for tree in trees:
        results.append(AnalysisResult(tree, epsilon))
    # create the response
    out = StringIO()
    for result in results:
        for line in result.get_response_lines(selected_options):
            print >> out, line
        print >> out
    # return the response
    return out.getvalue()
Ejemplo n.º 52
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices - 1)
    # explicitly compute the points for the given number of dups using weights
    m = [1] * ninternal + [1 + fs.ndups] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1] * ninternal + [1 + fs.ndups * 10] * nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Ejemplo n.º 53
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert the the given labels are tips of the tree
    tip_name_set = set(node.get_name() for node in tree.gen_tips())
    user_name_set = set([fs.lhs_a, fs.lhs_b, fs.rhs_a, fs.rhs_b])
    bad_names = user_name_set - tip_name_set
    if bad_names:
        msg = 'these labels are not valid tips: %s' % ', '.join(bad_names)
        raise HandlingError(msg)
    # get the submatrix of the distance matrix
    ordered_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    D = np.array(tree.get_distance_matrix(ordered_names))
    # get the response matrix
    R = Clustering.get_R_stone(D)
    # get the two by two matrix
    name_to_index = dict((name, i) for i, name in enumerate(ordered_names))
    R_reduced = np.zeros((2,2))
    la = name_to_index[fs.lhs_a]
    lb = name_to_index[fs.lhs_b]
    ra = name_to_index[fs.rhs_a]
    rb = name_to_index[fs.rhs_b]
    R_reduced[0][0] = R[la][ra]
    R_reduced[0][1] = R[la][rb]
    R_reduced[1][0] = R[lb][ra]
    R_reduced[1][1] = R[lb][rb]
    epsilon = 1e-13
    criterion = np.linalg.det(R_reduced)
    if abs(criterion) < epsilon:
        criterion = 0
    # in analogy to the four point condition, use two different ways of calculating the distance
    blen_a = (D[la][rb] + D[lb][ra] - D[la][lb] - D[ra][rb]) / 2.0
    blen_b = (D[la][ra] + D[lb][rb] - D[la][lb] - D[ra][rb]) / 2.0
    blen = min(blen_a, blen_b)
    # define the response
    out = StringIO()
    paragraphs = []
    if fs.show_response:
        paragraph = [
                'response matrix with rows ordered alphabetically by leaf label:',
    if fs.show_reduced_response:
        paragraph = [
                '2x2 submatrix of the response matrix:',
    if True:
        paragraph = [
                'determinant of the 2x2 submatrix of the response matrix:',
    if fs.show_blen:
        paragraph = [
                'branch length defined by the split:',
    # return the response
    return '\n\n'.join('\n'.join(p) for p in paragraphs) + '\n'
Ejemplo n.º 54
def get_response_content(fs):
    # build the newick tree from the string
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    nvertices = len(list(tree.preorder()))
    nleaves = len(list(tree.gen_tips()))
    ninternal = nvertices - nleaves
    # get ordered ids with the internal nodes first
    ordered_ids = get_ordered_ids(tree)
    leaf_ids = [id(node) for node in tree.gen_tips()]
    # get the distance matrix and the augmented distance matrix
    D_leaf = np.array(tree.get_partial_distance_matrix(leaf_ids))
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    D_aug = get_augmented_distance(D, nleaves, fs.ndups)
    # analyze the leaf distance matrix
    X_leaf = Euclid.edm_to_points(D_leaf)
    # get the eigendecomposition of the centered augmented distance matrix
    X_aug = Euclid.edm_to_points(D_aug, nvertices-1)
    # explicitly compute the points for the given number of dups using weights
    m = [1]*ninternal + [1+fs.ndups]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the points for 10x dups
    m = [1]*ninternal + [1+fs.ndups*10]*nleaves
    m = np.array(m, dtype=float) / sum(m)
    X_weighted_10x = Euclid.edm_to_weighted_points(D, m)
    # explicitly compute the limiting points as the number of dups increases
    X = Euclid.edm_to_points(D)
    X -= np.mean(X[-nleaves:], axis=0)
    XL = X[-nleaves:]
    U, s, Vt = np.linalg.svd(XL)
    Z = np.dot(X, Vt.T)
    # report the results
    np.set_printoptions(linewidth=300, threshold=10000)
    out = StringIO()
    print >> out, 'leaf distance matrix:'
    print >> out, D_leaf
    print >> out
    print >> out, 'points derived from the leaf distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, X_leaf
    print >> out
    if fs.show_aug:
        print >> out, 'augmented distance matrix:'
        print >> out, D_aug
        print >> out
    print >> out, 'points derived from the augmented distance matrix'
    print >> out, '(the first column is proportional to the Fiedler vector):'
    print >> out, get_ugly_matrix(X_aug, ninternal, nleaves)
    print >> out
    print >> out, 'points computed using masses:'
    print >> out, X_weighted
    print >> out
    print >> out, 'points computed using masses with 10x dups:'
    print >> out, X_weighted_10x
    print >> out
    print >> out, 'limiting points:'
    print >> out, Z
    print >> out
    return out.getvalue()
Ejemplo n.º 55
def get_response_content(fs):
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    # read the criterion string, creating the splitter object
    if fs.exact:
        splitter = Clustering.StoneExactDMS()
    elif fs.sign:
        splitter = Clustering.StoneSpectralSignDMS()
    elif fs.nj:
        splitter = Clustering.NeighborJoiningDMS()
    elif fs.random:
        splitter = Clustering.RandomDMS()
    # read the original tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the maximum number of steps we want
    max_steps = 1000000
    # Make sure that the splitter object is appropriate
    # for the number of taxa and the number of tree reconstructions.
    ntaxa = len(list(tree.gen_tips()))
    if splitter.get_complexity(ntaxa) * fs.iterations > max_steps:
        msg_a = "use a faster bipartition function, "
        msg_b = "fewer taxa, or fewer tree reconstructions"
        raise HandlingError(msg_a + msg_b)
    # define the simulation parameters
    sim = Simulation(splitter, "nj", "cgi tree building simulation")
    # define an arbitrary but consistent ordering of the taxa
    ordered_names = [node.name for node in tree.gen_tips()]
    # attempt to simulate a bunch of distance matrices
    sampler = DMSampler.DMSampler(tree, ordered_names, fs.length)
    distance_matrices = []
    for result in sampler.gen_samples_or_none():
        # if a proposal was accepted then add it to the list
        if result:
            sequence_list, distance_matrix = result
        # if enough accepted samples have been generated then stop sampling
        remaining_acceptances = fs.iterations - len(distance_matrices)
        if not remaining_acceptances:
        # If the remaining number of computrons is predicted
        # to be too much then stop.
        if sampler.get_remaining_computrons(remaining_acceptances) > max_steps:
            msg_a = "this combination of parameters "
            msg_b = "is predicted to take too long"
            raise HandlingError(msg)
    sim.run(distance_matrices, ordered_names)
    # define the response
    out = StringIO()
    print >> out, "partition error count frequencies:"
    print >> out, sim.get_histogram_string()
    print >> out, ""
    print >> out, "weighted partition errors:", sim.get_deep_loss()
    # return the response
    return out.getvalue()
Ejemplo n.º 56
def get_response_content(fs):
    # get the set of names
    selection = Util.get_stripped_lines(StringIO(fs.names))
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert that the name selection is compatible with the tree
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = "the following selected names "
        msg_b = "are not valid tips: %s" % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    # get the pruned tree
    simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    pruned_tree = get_pruned_tree(simple_tree, selected_name_set)
    # begin writing the result
    out = StringIO()
    trees = (tree, pruned_tree)
    tree_names = ("the original tree", "the pruned tree")
    for tree, tree_name in zip(trees, tree_names):
        print >> out, "calculating splits of %s:" % tree_name
        print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art)
    # return the response
    return out.getvalue()
Ejemplo n.º 57
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, Newick.NewickTree) 
    # begin the response
    out = StringIO()
    # remove the branch length associated with the root
    if tree.get_root().blen is not None:
        print >> out, 'the root originally had a branch length of', tree.get_root().blen
        tree.get_root().blen = None
        print >> out, 'the root did not originally have a branch length'
    # force a trifurcation at the root
    if tree.get_root().get_child_count() < 3:
        print >> out, 'the original root had', tree.get_root().get_child_count(), 'children'
        max_children, best_child = max((child.get_child_count(), child) for child in tree.get_root().gen_children())
        old_root = tree.get_root()
        print >> out, 'the new root has', tree.get_root().get_child_count(), 'children'
        print >> out, 'the root has', tree.get_root().get_child_count(), 'children'
    # remove names of internal nodes
    nremoved_names = 0
    for node in tree.preorder():
        if node.has_children() and node.name is not None:
            node.name = None
            nremoved_names += 1
    print >> out, 'removed', nremoved_names, 'internal node names'
    # draw the new formatted newick string after a break
    print >> out
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 120) 
    print >> out, formatted_tree_string
    # return the response
    return out.getvalue()