Example #1
0
def get_form():
    """
    @return: a list of form objects
    """
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    return [Form.MultiLine('tree', 'tree', formatted_tree_string)]
Example #2
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string),
        Form.Integer('length', 'use sequences that are this long', 100, low=1),
        Form.RadioGroup('assumption', 'distance matrix sampling model', [
            Form.RadioItem('infinite_alleles', 'infinite alleles', True),
            Form.RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')
        ]),
        Form.RadioGroup('infinity', 'infinite distance estimates', [
            Form.RadioItem('reject_infinity', 'reject these matrices'),
            Form.RadioItem('replace_infinity', 'replace inf with 20', True)
        ]),
        Form.RadioGroup('zero', 'distance estimates of zero', [
            Form.RadioItem('reject_zero', 'reject these matrices'),
            Form.RadioItem('replace_zero', 'use .00001 instead of zero'),
            Form.RadioItem('remain_zero', 'use 0 unmodified', True)
        ])
    ]
    return form_objects
Example #3
0
 def get_response_lines(self, options):
     """
     Yield lines that form the result of the analysis.
     @param options: a subset of strings specifying what to show
     """
     preamble_lines = []
     error_lines = []
     if 'show_incomplete' in options and self.is_incomplete:
         error_lines.append(
             'the sequential splits defined by the eigenvectors were insufficient to reconstruct the tree'
         )
     if 'show_conflicting' in options and self.is_conflicting:
         error_lines.append(
             'the reconstructed tree has a split that is incompatible with the original tree'
         )
     if 'show_negligible' in options and self.is_negligible:
         error_lines.append(
             'during reconstruction a negligible eigenvector loading was encountered'
         )
     if 'show_all' in options or error_lines:
         preamble_lines.extend(
             ['original tree:',
              NewickIO.get_newick_string(self.tree)])
         if self.reconstructed_tree:
             preamble_lines.extend([
                 'reconstructed tree:',
                 NewickIO.get_newick_string(self.reconstructed_tree)
             ])
     return preamble_lines + error_lines
Example #4
0
def get_response_content(fs):
    # read the query tree
    query_tree = NewickIO.parse(fs.query, FelTree.NewickTree)
    # read the reference tree
    reference_tree = NewickIO.parse(fs.reference, FelTree.NewickTree)
    # calculate the loss using the requested loss function
    if fs.uniform:
        loss_numerator = TreeComparison.get_split_distance(
                query_tree, reference_tree)
    elif fs.weighted:
        loss_numerator = TreeComparison.get_weighted_split_distance(
                query_tree, reference_tree)
    # do the normalization if requested
    if fs.normalize:
        if fs.uniform:
            loss_denominator = float(
                    TreeComparison.get_nontrivial_split_count(reference_tree))
        elif fs.weighted:
            loss_denominator = float(
                    TreeComparison.get_weighted_split_count(reference_tree))
    else:
        loss_denominator = 1
    # return the response
    if loss_denominator:
        return str(loss_numerator / loss_denominator) + '\n'
    else:
        return 'normalization failed\n'
Example #5
0
 def test_get_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, C:1, (D:1, E:1):1);'
     tree_string_b = '((A:1, B:1):1, D:1, (C:1, E:1):1);'
     tree_string_c = '((A:1, D:1):1, C:1, (B:1, E:1):1);'
     tree_string_d = '((A:1, D:1):1, (C:1, B:1, E:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     tree_d = NewickIO.parse(tree_string_d, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_split_distance(tree_b, tree_b), 0)
     self.assertEqual(get_split_distance(tree_c, tree_c), 0)
     self.assertEqual(get_split_distance(tree_d, tree_d), 0)
     # some of the distances are symmetric
     self.assertEqual(get_split_distance(tree_a, tree_b), 1)
     self.assertEqual(get_split_distance(tree_b, tree_a), 1)
     self.assertEqual(get_split_distance(tree_b, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_b), 2)
     self.assertEqual(get_split_distance(tree_a, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_a), 2)
     # it is possible for the distance to be asymmetric if internal nodes are not order 3
     self.assertEqual(get_split_distance(tree_a, tree_d), 1)
     self.assertEqual(get_split_distance(tree_d, tree_a), 2)
Example #6
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine("tree", "newick tree", formatted_tree_string),
        Form.RadioGroup(
            "matrix",
            "nodes used for the distance matrix",
            [
                RadioItem("standard", "tips only", True),
                RadioItem("augmented", "all nodes"),
                RadioItem("named", "all named nodes"),
            ],
        ),
        Form.CheckGroup(
            "output_options",
            "output options",
            [
                CheckItem("show_split", "exact criterion partition", True),
                CheckItem("show_value", "exact criterion value", True),
                CheckItem("show_value_minus_trace", "exact criterion value minus trace", True),
                CheckItem("show_fiedler_split", "show the spectral sign partition", True),
                CheckItem("show_fiedler_eigenvector", "show the eigenvector of interest", True),
                CheckItem("show_labels", "ordered labels", True),
                CheckItem("show_distance_matrix", "distance matrix", True),
                CheckItem("show_M_matrix", "M matrix", True),
            ],
        ),
    ]
    return form_objects
Example #7
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string),
        Form.RadioGroup('matrix', 'nodes used for the distance matrix', [
            RadioItem('standard', 'tips only', True),
            RadioItem('augmented', 'all nodes'),
            RadioItem('named', 'all named nodes')
        ]),
        Form.CheckGroup('output_options', 'output options', [
            CheckItem('show_split', 'exact criterion partition', True),
            CheckItem('show_value', 'exact criterion value', True),
            CheckItem('show_value_minus_trace',
                      'exact criterion value minus trace', True),
            CheckItem('show_fiedler_split', 'show the spectral sign partition',
                      True),
            CheckItem('show_fiedler_eigenvector',
                      'show the eigenvector of interest', True),
            CheckItem('show_labels', 'ordered labels', True),
            CheckItem('show_distance_matrix', 'distance matrix', True),
            CheckItem('show_M_matrix', 'M matrix', True)
        ])
    ]
    return form_objects
Example #8
0
def get_form():
    """
    @return: the body of a form
    """
    # define the tree string
    tree_string = '(((a:0.05, b:0.05):0.15, c:0.2):0.8, x:1.0, (((m:0.05, n:0.05):0.15, p:0.2):0.8, y:1.0):1.0);'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the object list
    form_objects = [
            Form.MultiLine('tree', 'tree',
                formatted_tree_string),
            Form.Integer('sequence_length', 'use sequences that are this long',
                100, low=1),
            Form.RadioGroup('assumption', 'distance matrix sampling model', [
                RadioItem('infinite_alleles', 'infinite alleles', True),
                RadioItem('jukes_cantor', 'Jukes-Cantor model (4 alleles)')]),
            Form.RadioGroup('infinity', 'matrices with infinite distances', [
                RadioItem('reject_infinity', 'reject these matrices', True),
                RadioItem('replace_infinity', 'use 20 instead')]),
            Form.RadioGroup('zero', 'matrices with zero distances', [
                RadioItem('reject_zero', 'reject these matrices'),
                RadioItem('replace_zero', 'use .00001 instead'),
                RadioItem('remain_zero', 'use 0 unmodified', True)]),
            Form.RadioGroup('criterion', 'tree reconstruction criterion', [
                RadioItem('sign', 'spectral sign approximation', True),
                RadioItem('nj', 'neighbor joining'),
                RadioItem('random', 'random bipartition')])]
    # return the object list
    return form_objects
Example #9
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string and ordered tip labels
    tree_string = "(a:1, (b:2, d:5):1, c:4);"
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    labels = list(sorted(tip.name for tip in tree.gen_tips()))
    # define the form objects
    form_objects = [
        Form.MultiLine("tree", "newick tree", formatted_tree_string),
        Form.MultiLine("inlabels", "ordered labels", "\n".join(labels)),
        Form.Float("strength", "perturbation strength", 0.1, low_inclusive=0),
        Form.CheckGroup(
            "options",
            "output options",
            [
                CheckItem("perturbed", "a perturbed distance matrix", True),
                CheckItem("distance", "the original distance matrix"),
                CheckItem("outlabels", "ordered labels"),
            ],
        ),
    ]
    return form_objects
Example #10
0
def get_form():
    """
    @return: a list of form objects
    """
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    return [Form.MultiLine('tree', 'tree', formatted_tree_string)]
Example #11
0
def get_response_content(fs):
    # read the query tree
    query_tree = NewickIO.parse(fs.query, FelTree.NewickTree)
    # read the reference tree
    reference_tree = NewickIO.parse(fs.reference, FelTree.NewickTree)
    # calculate the loss using the requested loss function
    if fs.uniform:
        loss_numerator = TreeComparison.get_split_distance(
            query_tree, reference_tree)
    elif fs.weighted:
        loss_numerator = TreeComparison.get_weighted_split_distance(
            query_tree, reference_tree)
    # do the normalization if requested
    if fs.normalize:
        if fs.uniform:
            loss_denominator = float(
                TreeComparison.get_nontrivial_split_count(reference_tree))
        elif fs.weighted:
            loss_denominator = float(
                TreeComparison.get_weighted_split_count(reference_tree))
    else:
        loss_denominator = 1
    # return the response
    if loss_denominator:
        return str(loss_numerator / loss_denominator) + '\n'
    else:
        return 'normalization failed\n'
Example #12
0
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list("0123")
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt ** 2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of Euclidean distances between tetrahedron vertices:"
    print >> out, D_sqrt
    print >> out, "neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):"
    print >> out, sqrt_tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, "matrix of squared distances between tetrahedron vertices:"
    print >> out, D
    print >> out, "neighbor joining tree constructed from D = squared Euclidean distances (normal):"
    print >> out, tree_string
    print >> out, "distance matrix implied by this tree:"
    print >> out, D_reconstructed
    return out.getvalue().strip()
Example #13
0
def do_distance_analysis(X):
    # get the matrix of squared distances
    labels = list('0123')
    # reconstruct the matrix of Euclidean distances from a tree
    D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X])
    sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels)
    sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree)
    sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree)
    D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels))
    # reconstruct the matrix of squared Euclidean distances from a tree
    D = D_sqrt**2
    tree = NeighborJoining.make_tree(D, labels)
    tree_string = NewickIO.get_newick_string(tree)
    feltree = NewickIO.parse(tree_string, FelTree.NewickTree)
    D_reconstructed = np.array(feltree.get_distance_matrix(labels))
    # start writing
    out = StringIO()
    # matrix of Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of Euclidean distances between tetrahedron vertices:'
    print >> out, D_sqrt
    print >> out, 'neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):'
    print >> out, sqrt_tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_sqrt_reconstructed
    # matrix of squared Euclidean distances and its reconstruction from a tree
    print >> out, 'matrix of squared distances between tetrahedron vertices:'
    print >> out, D
    print >> out, 'neighbor joining tree constructed from D = squared Euclidean distances (normal):'
    print >> out, tree_string
    print >> out, 'distance matrix implied by this tree:'
    print >> out, D_reconstructed
    return out.getvalue().strip()
Example #14
0
def get_response_content(fs):
    # get the set of names
    selection = Util.get_stripped_lines(StringIO(fs.names))
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert that the name selection is compatible with the tree
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    # get the pruned tree
    simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    pruned_tree = get_pruned_tree(simple_tree, selected_name_set)
    # begin writing the result
    out = StringIO()
    trees = (tree, pruned_tree)
    tree_names = ('the original tree', 'the pruned tree')
    for tree, tree_name in zip(trees, tree_names):
        print >> out, 'calculating splits of %s:' % tree_name
        print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art)
    # return the response
    return out.getvalue()
Example #15
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree',
                formatted_tree_string),
            Form.Integer('length', 'use sequences that are this long',
                100, low=1),
            Form.RadioGroup('assumption', 'distance matrix sampling model', [
                Form.RadioItem('infinite_alleles', 'infinite alleles', True),
                Form.RadioItem('jukes_cantor',
                    'Jukes-Cantor model (4 alleles)')]),
            Form.RadioGroup('infinity', 'infinite distance estimates', [
                Form.RadioItem('reject_infinity', 'reject these matrices'),
                Form.RadioItem('replace_infinity',
                    'replace inf with 20', True)]),
            Form.RadioGroup('zero', 'distance estimates of zero', [
                Form.RadioItem('reject_zero', 'reject these matrices'),
                Form.RadioItem('replace_zero', 'use .00001 instead of zero'),
                Form.RadioItem('remain_zero', 'use 0 unmodified', True)])]
    return form_objects
Example #16
0
 def test_update_generalized_nj_big(self):
     """
     Test the generation of successor distance matrices from a more complicated initial distance matrix.
     """
     # define the initial tree and the two subtrees
     s_tree_initial = '(((3:9, 2:2):4, 1:2):1, (4:1, 5:3):7, 6:2);'
     s_tree_a = '((3:9, 2:2):4, 1:2, B:0.5);'
     s_tree_b = '((4:1, 5:3):7, 6:2, A:0.5);'
     # Define an ordering of the taxa.
     # The initial ordering is arbitrary,
     # and the subsequent orderings are dependent on the initial ordering.
     taxa_initial = ['1', '4', '2', '5', '3', '6']
     taxa_a = ['1', 'B', '2', '3']
     taxa_b = ['A', '4', '5', '6']
     # Define the distance matrices.
     D_initial = np.array(
         NewickIO.parse(
             s_tree_initial,
             FelTree.NewickTree).get_distance_matrix(taxa_initial))
     D_a = np.array(
         NewickIO.parse(s_tree_a,
                        FelTree.NewickTree).get_distance_matrix(taxa_a))
     D_b = np.array(
         NewickIO.parse(s_tree_b,
                        FelTree.NewickTree).get_distance_matrix(taxa_b))
     # assert that the correct distance matrices are created
     D_out_a = update_generalized_nj(D_initial, set([1, 3, 5]))
     D_out_b = update_generalized_nj(D_initial, set([0, 2, 4]))
     self.assertTrue(np.allclose(D_a, D_out_a))
     self.assertTrue(np.allclose(D_b, D_out_b))
Example #17
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree with branch lengths',
                       formatted_tree_string),
        Form.SingleLine('lhs_a', 'the first taxon on one side of the split',
                        'a'),
        Form.SingleLine('lhs_b', 'the second taxon on one side of the split',
                        'b'),
        Form.SingleLine('rhs_a',
                        'the first taxon on the other side of the split', 'x'),
        Form.SingleLine('rhs_b',
                        'the second taxon on the other side of the split',
                        'y'),
        Form.CheckGroup('options', 'output options', [
            Form.CheckItem('show_response',
                           'show the Laplacian response matrix'),
            Form.CheckItem('show_reduced_response', 'show the 2x2 submatrix'),
            Form.CheckItem('show_blen',
                           'show the branch length implied by the split')
        ])
    ]
    return form_objects
Example #18
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree',
                'newick tree with branch lengths', formatted_tree_string),
            Form.SingleLine('lhs_a',
                'the first taxon on one side of the split', 'a'),
            Form.SingleLine('lhs_b',
                'the second taxon on one side of the split', 'b'),
            Form.SingleLine('rhs_a',
                'the first taxon on the other side of the split', 'x'),
            Form.SingleLine('rhs_b',
                'the second taxon on the other side of the split', 'y'),
            Form.CheckGroup('options', 'output options', [
                Form.CheckItem('show_response',
                    'show the full Laplacian matrix'),
                Form.CheckItem('show_reduced_response',
                    'show the 2x2 submatrix'),
                Form.CheckItem('show_blen',
                    'show the branch length implied by the split')])]
    return form_objects
Example #19
0
 def test_get_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, C:1, (D:1, E:1):1);'
     tree_string_b = '((A:1, B:1):1, D:1, (C:1, E:1):1);'
     tree_string_c = '((A:1, D:1):1, C:1, (B:1, E:1):1);'
     tree_string_d = '((A:1, D:1):1, (C:1, B:1, E:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     tree_d = NewickIO.parse(tree_string_d, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_split_distance(tree_b, tree_b), 0)
     self.assertEqual(get_split_distance(tree_c, tree_c), 0)
     self.assertEqual(get_split_distance(tree_d, tree_d), 0)
     # some of the distances are symmetric
     self.assertEqual(get_split_distance(tree_a, tree_b), 1)
     self.assertEqual(get_split_distance(tree_b, tree_a), 1)
     self.assertEqual(get_split_distance(tree_b, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_b), 2)
     self.assertEqual(get_split_distance(tree_a, tree_c), 2)
     self.assertEqual(get_split_distance(tree_c, tree_a), 2)
     # it is possible for the distance to be asymmetric if internal nodes are not order 3
     self.assertEqual(get_split_distance(tree_a, tree_d), 1)
     self.assertEqual(get_split_distance(tree_d, tree_a), 2)
Example #20
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' +
                                str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(
            tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip
                                 for tip, v in zip(ordered_tip_names, loadings)
                                 if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()
Example #21
0
def main():
    filename = 'counterexamples.out'
    fout = open(filename, 'wt')
    print 'Does monotonically transforming the pairwise leaf distances affect the compatibility'
    print 'of the split found using principal coordinate analysis?'
    print 'I am looking through random trees for a tree that is split incompatibly'
    print 'when distances are squared.'
    print 'Use control-c to stop the program when you get bored.'
    try:
        count = 0
        ncounterexamples = 0
        nerrors = 0
        while True:
            count += 1
            # get a random tree
            n_base_leaves = 4
            n_expected_extra_leaves = 1
            expected_branch_length = 1
            tree = TreeSampler.sample_tree(n_base_leaves,
                                           n_expected_extra_leaves,
                                           expected_branch_length)
            # get the set of valid partitions implied by the tree
            valid_parts = TreeComparison.get_partitions(tree)
            ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
            # assert that the partition implied by the correct formula is valid
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            loadings = get_principal_coordinate(D)
            nonneg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                nerrors += 1
                print >> fout, 'error: a partition that was supposed to be valid was found to be invalid'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
            # check the validity of the partition implied by the incorrect formula
            Q = D * D
            loadings = get_principal_coordinate(Q)
            nonneg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(
                tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                ncounterexamples += 1
                print >> fout, 'found a counterexample!'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
    except KeyboardInterrupt, e:
        print 'trees examined:', count
        print 'errors:', nerrors
        print 'counterexamples:', ncounterexamples
Example #22
0
def get_form():
    """
    @return: a list of form objects
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # return the form objects
    form_objects = [Form.MultiLine("tree", "newick tree with branch lengths", formatted_tree_string)]
    return form_objects
Example #23
0
 def test_contrast_matrix_to_tree(self):
     original_tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C = get_contrast_matrix(original_tree, ordered_names)
     assert_contrast_matrix(C)
     reconstructed_tree = contrast_matrix_to_tree(C, ordered_names)
     newick_string = NewickIO.get_newick_string(reconstructed_tree)
     print
     print newick_string
     pass
Example #24
0
def get_form():
    """
    @return: the body of a form
    """
    # define the formatted tree string
    tree = NewickIO.parse(g_tree_data, Newick.NewickTree) 
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60) 
    # return the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree', formatted_tree_string)]
    return form_objects
Example #25
0
 def test_contrast_matrix_to_tree(self):
     original_tree = NewickIO.parse(g_felsenstein_tree_string,
                                    FelTree.NewickTree)
     ordered_names = ('a', 'b', 'c', 'd', 'e')
     C = get_contrast_matrix(original_tree, ordered_names)
     assert_contrast_matrix(C)
     reconstructed_tree = contrast_matrix_to_tree(C, ordered_names)
     newick_string = NewickIO.get_newick_string(reconstructed_tree)
     print
     print newick_string
     pass
Example #26
0
def get_form():
    """
    @return: the body of a form
    """
    # define the formatted tree string
    tree = NewickIO.parse(g_tree_data, Newick.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # return the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string)
    ]
    return form_objects
Example #27
0
def get_form():
    """
    @return: a list of form objects
    """
    # define the default tree string
    tree = NewickIO.parse(g_tree_string, FelTree.NewickTree)
    formatted_default_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the list of form objects
    form_objects = [
            Form.MultiLine('tree', 'tree', formatted_default_tree_string),
            Form.ImageFormat()]
    return form_objects
Example #28
0
def get_form():
    """
    @return: a list of form objects
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # return the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree with branch lengths',
                formatted_tree_string)]
    return form_objects
Example #29
0
 def __init__(self, tree, epsilon):
     """
     @param tree: a newick tree in the felsenstein-inspired format
     @param epsilon: determines whether loadings are considered negligible
     """
     # clear some flags that describe events that occur during reconstruction
     self.is_negligible = False
     self.is_incomplete = False
     self.is_conflicting = False
     # define the trees
     self.tree = tree
     self.reconstructed_tree = None
     # set the threshold for loading negligibility
     self.epsilon = epsilon
     # define some arbitrary ordering of tip names
     self.ordered_names = [node.get_name() for node in tree.gen_tips()]
     # get the distance matrix with respect to this ordering
     D = tree.get_distance_matrix(self.ordered_names)
     # get the Gower doubly centered matrix
     G = MatrixUtil.double_centered(np.array(D))
     # get the eigendecomposition of the Gower matrix
     eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
     eigenvectors = eigenvector_transposes.T
     self.sorted_eigensystem = list(
         reversed(
             list(
                 sorted((abs(w), v)
                        for w, v in zip(eigenvalues, eigenvectors)))))
     # build the tree recursively using the sorted eigensystem
     indices = set(range(len(self.ordered_names)))
     try:
         # try to reconstruct the tree
         root = self._build_tree(indices, 0)
         root.set_branch_length(None)
         output_tree = Newick.NewickTree(root)
         # convert the tree to the FelTree format
         newick_string = NewickIO.get_newick_string(output_tree)
         self.reconstructed_tree = NewickIO.parse(newick_string,
                                                  FelTree.NewickTree)
     except NegligibleError:
         self.is_negligible = True
     except IncompleteError:
         self.is_incomplete = True
     else:
         # compare the splits defined by the reconstructed tree
         # to splits in the original tree
         expected_partitions = TreeComparison.get_nontrivial_partitions(
             self.tree)
         observed_partitions = TreeComparison.get_nontrivial_partitions(
             self.reconstructed_tree)
         invalid_partitions = observed_partitions - expected_partitions
         if invalid_partitions:
             self.is_conflicting = True
Example #30
0
def get_form():
    """
    @return: the body of a form
    """
    default_tree = NewickIO.parse(g_tree_data, FelTree.NewickTree)
    default_tree_string = NewickIO.get_narrow_newick_string(default_tree, 60)
    # define the list of form objects
    form_objects = [
        Form.MultiLine('tree', 'tree', default_tree_string),
        Form.MultiLine('annotation', 'SNP annotations', g_annotation_data)
    ]
    return form_objects
Example #31
0
def get_form():
    """
    @return: a list of form objects
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # return the form objects
    return [
            Form.MultiLine('tree', 'newick tree with branch lengths',
                formatted_tree_string),
            Form.Integer('precision', 'precision', 4, low=2, high=17)]
Example #32
0
def get_form():
    """
    @return: a list of form objects
    """
    # define the default tree string
    tree = NewickIO.parse(g_tree_string, FelTree.NewickTree)
    formatted_default_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the list of form objects
    form_objects = [
        Form.MultiLine('tree', 'tree', formatted_default_tree_string),
        Form.ImageFormat()
    ]
    return form_objects
Example #33
0
def get_art(tree):
    """
    @param tree: a FelTree
    @return: a multi-line ascii art
    """
    newick_string = NewickIO.get_newick_string(tree)
    simple_tree = NewickIO.parse(newick_string, Newick.NewickTree)
    drawer = DrawTree.DrawTree()
    drawer.use_branch_lengths = True
    drawer.force_ultrametric = False
    drawer.vertical_spacing = 1
    drawer.horizontal_spacing = 1
    return drawer.draw(simple_tree)
Example #34
0
def get_form():
    """
    @return: a list of form objects
    """
    # define the default tree string
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # return the form objects
    return [
        Form.MultiLine('tree', 'newick tree with branch lengths',
                       formatted_tree_string),
        Form.Integer('precision', 'precision', 4, low=2, high=17)
    ]
Example #35
0
def get_art(tree):
    """
    @param tree: a FelTree
    @return: a multi-line ascii art
    """
    newick_string = NewickIO.get_newick_string(tree)
    simple_tree = NewickIO.parse(newick_string, Newick.NewickTree)
    drawer = DrawTree.DrawTree() 
    drawer.use_branch_lengths = True 
    drawer.force_ultrametric = False 
    drawer.vertical_spacing = 1 
    drawer.horizontal_spacing = 1 
    return drawer.draw(simple_tree)
Example #36
0
def get_form():
    """
    @return: a list of form objects
    """
    # define the default tree string
    # ordered_labels = ('a', 'b', 'c', 'x', 'm', 'n', 'p', 'y', 'ab', 'abc', 'mn', 'mnp', 'mnpy', 'abcxmnpy')
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # return the form objects
    return [
        Form.MultiLine("tree", "newick tree with branch lengths", formatted_tree_string),
        Form.Integer("precision", "precision", 4, low=2, high=17),
    ]
Example #37
0
def get_form():
    """
    @return: the body of a form
    """
    default_tree = NewickIO.parse(g_tree_data, FelTree.NewickTree)
    default_tree_string = NewickIO.get_narrow_newick_string(default_tree, 60)
    # define the list of form objects
    form_objects = [
            Form.MultiLine('tree', 'tree',
                default_tree_string),
            Form.MultiLine('annotation', 'SNP annotations',
                g_annotation_data)]
    return form_objects
Example #38
0
 def _create_trees(self):
     """
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     """
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     remove_redundant_nodes(temp_tree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string, FelTree.NewickTree)
Example #39
0
def get_response_content(fs):
    # get the newick trees.
    trees = []
    for tree_string in iterutils.stripped_lines(StringIO(fs.trees)):
        # parse each tree and make sure that it conforms to various requirements
        tree = NewickIO.parse(tree_string, FelTree.NewickTree)
        tip_names = [tip.get_name() for tip in tree.gen_tips()]
        if len(tip_names) < 4:
            raise HandlingError('expected at least four tips but found ' + str(len(tip_names)))
        if any(name is None for name in tip_names):
            raise HandlingError('each terminal node must be labeled')
        if len(set(tip_names)) != len(tip_names):
            raise HandlingError('each terminal node label must be unique')
        trees.append(tree)
    # begin the response
    out = StringIO()
    # look at each tree
    nerrors = 0
    ncounterexamples = 0
    for tree in trees:
        # get the set of valid partitions implied by the tree
        valid_parts = TreeComparison.get_partitions(tree)
        ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
        # assert that the partition implied by the correct formula is valid
        D = np.array(tree.get_distance_matrix(ordered_tip_names))
        loadings = get_principal_coordinate(D)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            nerrors += 1
            print >> out, 'error: a partition that was supposed to be valid was found to be invalid'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
        # check the validity of the partition implied by the incorrect formula
        Q = D * D
        loadings = get_principal_coordinate(Q)
        nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
        neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
        part = frozenset([nonneg_leaf_set, neg_leaf_set])
        if part not in valid_parts:
            ncounterexamples += 1
            print >> out, 'found a counterexample!'
            print >> out, 'tree:', NewickIO.get_newick_string(tree)
            print >> out, 'invalid partition:', partition_to_string(part)
            print >> out
    print >> out, 'errors found:', nerrors
    print >> out, 'counterexamples found:', ncounterexamples
    # return the response
    return out.getvalue()
Example #40
0
def get_form():
    """
    @return: the body of a form
    """
    # Define the default tree string with branch lengths
    # and named internal nodes.
    tree_string = '(a:2, (b:2, c:9)g:4, ((d:1, e:3)i:7, f:2)j:1)h;'
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree with branch lengths',
                formatted_tree_string)]
    return form_objects
Example #41
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree
    default_tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    default_tree_string = NewickIO.get_narrow_newick_string(default_tree, 60)
    # define the list of form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree', default_tree_string),
            Form.SingleLine('chromosome', 'chromosome', 'chr17'),
            Form.Integer('position', 'position', 70360012, low=0),
            Form.SingleLine('aminoacid', 'the amino acid of interest', 'P')]
    return form_objects
Example #42
0
 def _create_trees(self):
     """
     Create the full tree and the pruned tree.
     The full tree is a Newick.NewickTree,
     and the pruned tree is a FelTree.NewickTree object.
     """
     # create the full tree
     self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     # create the pruned tree through a temporary tree that will be modified
     temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree)
     remove_redundant_nodes(temp_tree)
     pruned_newick_string = NewickIO.get_newick_string(temp_tree)
     self.pruned_tree = NewickIO.parse(pruned_newick_string,
                                       FelTree.NewickTree)
Example #43
0
def main():
    filename = 'counterexamples.out'
    fout = open(filename, 'wt')
    print 'Does monotonically transforming the pairwise leaf distances affect the compatibility'
    print 'of the split found using principal coordinate analysis?'
    print 'I am looking through random trees for a tree that is split incompatibly'
    print 'when distances are squared.'
    print 'Use control-c to stop the program when you get bored.'
    try:
        count = 0
        ncounterexamples = 0
        nerrors = 0
        while True:
            count += 1
            # get a random tree
            n_base_leaves = 4
            n_expected_extra_leaves = 1
            expected_branch_length = 1
            tree = TreeSampler.sample_tree(n_base_leaves, n_expected_extra_leaves, expected_branch_length)
            # get the set of valid partitions implied by the tree
            valid_parts = TreeComparison.get_partitions(tree)
            ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()]
            # assert that the partition implied by the correct formula is valid
            D = np.array(tree.get_distance_matrix(ordered_tip_names))
            loadings = get_principal_coordinate(D)
            nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                nerrors += 1
                print >> fout, 'error: a partition that was supposed to be valid was found to be invalid'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
            # check the validity of the partition implied by the incorrect formula
            Q = D * D
            loadings = get_principal_coordinate(Q)
            nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0)
            neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0)
            part = frozenset([nonneg_leaf_set, neg_leaf_set])
            if part not in valid_parts:
                ncounterexamples += 1
                print >> fout, 'found a counterexample!'
                print >> fout, 'tree:', NewickIO.get_newick_string(tree)
                print >> fout, 'invalid partition:', partition_to_string(part)
                print >> fout
    except KeyboardInterrupt, e:
        print 'trees examined:', count
        print 'errors:', nerrors
        print 'counterexamples:', ncounterexamples
Example #44
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree_string = NewickIO.daylight_example_tree
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
            Form.MultiLine('tree', 'newick tree', formatted_tree_string),
            Form.MultiLine('selection', 'selected taxa', '\n'.join('ABFG')),
            Form.ImageFormat()]
    return form_objects
Example #45
0
 def __init__(self, tree, epsilon):
     """
     @param tree: a newick tree in the felsenstein-inspired format
     @param epsilon: determines whether loadings are considered negligible
     """
     # clear some flags that describe events that occur during reconstruction
     self.is_negligible = False
     self.is_incomplete = False
     self.is_conflicting = False
     # define the trees
     self.tree = tree
     self.reconstructed_tree = None
     # set the threshold for loading negligibility
     self.epsilon = epsilon
     # define some arbitrary ordering of tip names
     self.ordered_names = [node.get_name() for node in tree.gen_tips()]
     # get the distance matrix with respect to this ordering
     D = tree.get_distance_matrix(self.ordered_names)
     # get the Gower doubly centered matrix
     G = MatrixUtil.double_centered(np.array(D))
     # get the eigendecomposition of the Gower matrix
     eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
     eigenvectors = eigenvector_transposes.T
     self.sorted_eigensystem = list(reversed(list(sorted((abs(w), v) for w, v in zip(eigenvalues, eigenvectors)))))
     # build the tree recursively using the sorted eigensystem
     indices = set(range(len(self.ordered_names)))
     try:
         # try to reconstruct the tree
         root = self._build_tree(indices, 0)
         root.set_branch_length(None)
         output_tree = Newick.NewickTree(root)
         # convert the tree to the FelTree format
         newick_string = NewickIO.get_newick_string(output_tree)
         self.reconstructed_tree = NewickIO.parse(
                 newick_string, FelTree.NewickTree)
     except NegligibleError:
         self.is_negligible = True
     except IncompleteError:
         self.is_incomplete = True
     else:
         # compare the splits defined by the reconstructed tree
         # to splits in the original tree
         expected_partitions = TreeComparison.get_nontrivial_partitions(
                 self.tree)
         observed_partitions = TreeComparison.get_nontrivial_partitions(
                 self.reconstructed_tree)
         invalid_partitions = observed_partitions - expected_partitions
         if invalid_partitions:
             self.is_conflicting = True
Example #46
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree
    default_tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    default_tree_string = NewickIO.get_narrow_newick_string(default_tree, 60)
    # Define the default lines
    # that tell the amino acids in the column of the alignment.
    default_alignment_lines = [
            'Hg18 F',
            'bosTau3 V',
            'canFam2 V',
            'danRer5 L',
            'galGal3 A',
            'mm9 L',
            'monDom4 H',
            'panTro2 F',
            'rheMac2 F',
            'rn4 L']
    default_alignment_string = '\n'.join(default_alignment_lines)
    # define the list of form objects
    form_objects = [
            Form.MultiLine('tree', 'tree', default_tree_string),
            Form.MultiLine('column',
                'amino acid of each taxon', default_alignment_string),
            Form.CheckGroup('options',
                'show these intermediate values', [
                Form.CheckItem('show_raw_pc_table',
                    'the raw physicochemical property table', True),
                Form.CheckItem('show_standardized_pc_table',
                    'the standardized physicochemical property table', True),
                Form.CheckItem('show_pc_correlation_matrix',
                    'the physicochemical property correlation matrix', True),
                Form.CheckItem('show_tree',
                    'the pruned phylogenetic tree', True),
                Form.CheckItem('show_weights',
                    'the taxon weights', True),
                Form.CheckItem('show_aa_distribution',
                    'the estimated amino acid distribution', True),
                Form.CheckItem('show_pc_distribution',
                    'the estimated physicochemical property distn', True),
                Form.CheckItem('show_deviations',
                    'the aa physicochemical property deviations', True),
                Form.CheckItem('show_impact_scores',
                    'the impact score for each amino acid', True),
                Form.CheckItem('show_p_values',
                    'the p-value for each amino acid', True)])]
    return form_objects
Example #47
0
 def test_get_weighted_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_weighted_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_b), 0)
     # the distance is not necessarily symmetric
     self.assertEqual(get_weighted_split_distance(tree_a, tree_b), 20)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_a), 15)
Example #48
0
 def test_get_weighted_split_distance(self):
     """
     Test the function that gets the number of missing nontrivial partitions.
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     # the distance from a tree to itself should be zero
     self.assertEqual(get_weighted_split_distance(tree_a, tree_a), 0)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_b), 0)
     # the distance is not necessarily symmetric
     self.assertEqual(get_weighted_split_distance(tree_a, tree_b), 20)
     self.assertEqual(get_weighted_split_distance(tree_b, tree_a), 15)
Example #49
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree
    default_tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    default_tree_string = NewickIO.get_narrow_newick_string(default_tree, 60)
    # define the list of form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', default_tree_string),
        Form.SingleLine('chromosome', 'chromosome', 'chr17'),
        Form.Integer('position', 'position', 70360012, low=0),
        Form.SingleLine('aminoacid', 'the amino acid of interest', 'P')
    ]
    return form_objects
Example #50
0
def get_form():
    """
    @return: the body of a form
    """
    # define the default tree string
    tree_string = NewickIO.daylight_example_tree
    tree = NewickIO.parse(tree_string, FelTree.NewickTree)
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 60)
    # define the form objects
    form_objects = [
        Form.MultiLine('tree', 'newick tree', formatted_tree_string),
        Form.MultiLine('selection', 'selected taxa', '\n'.join('ABFG')),
        Form.ImageFormat()
    ]
    return form_objects
Example #51
0
 def test_get_weighted_split_count(self):
     """
     Test the function that gets the weighted number of nontrivial splits
     """
     # define some trees
     tree_string_a = '((A:1, B:1):1, (C:1, D:1):1, (E:1, F:1):1);'
     tree_string_b = '(((A:1, B:1):1, C:1):1, D:1, (E:1, F:1):1);'
     tree_string_c = '(((A:1, B:1):1, C:1):1, (D:1, (E:1, F:1):1):1);'
     tree_a = NewickIO.parse(tree_string_a, FelTree.NewickTree)
     tree_b = NewickIO.parse(tree_string_b, FelTree.NewickTree)
     tree_c = NewickIO.parse(tree_string_c, FelTree.NewickTree)
     # the weighted split counts are different,
     # even though both trees have internal nodes of order 3 and have the same number of leaves
     self.assertEqual(get_weighted_split_count(tree_a), 45)
     self.assertEqual(get_weighted_split_count(tree_b), 50)
     self.assertEqual(get_weighted_split_count(tree_c), 50)
Example #52
0
def get_response_content(fs):
    # arbitrarily define the size of the alphabet
    k = 4
    # define the response
    out = StringIO()
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # define the order of the tip names
    ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips()))
    n = len(ordered_tip_names)
    # get the matrix of pairwise distances among the tips
    D = np.array(tree.get_distance_matrix(ordered_tip_names))
    D_vector = get_principal_coordinate(D)
    # get the dissimilarity matrix from the distance matrix
    dissimilarity = np.array([[distance_to_dissimilarity(d, k) for d in row] for row in D])
    dissimilarity_vector = get_principal_coordinate(dissimilarity)
    # get the principal coordinates of the distance-like matrices
    print >> out, 'original distance matrix:'
    print >> out, MatrixUtil.m_to_string(D)
    print >> out
    print >> out, 'projections onto the principal coordinate using the original distance matrix:'
    for name, value in zip(ordered_tip_names, D_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    print >> out, 'dissimilarity matrix:'
    print >> out, MatrixUtil.m_to_string(dissimilarity)
    print >> out
    print >> out, 'projections onto the principal coordinate using the dissimilarity matrix:'
    for name, value in zip(ordered_tip_names, dissimilarity_vector):
        print >> out, '\t'.join((name, str(value)))
    print >> out
    # return the response
    return out.getvalue()
Example #53
0
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get ordered identifiers
    ordered_tip_name_id_pairs = list(
        sorted(set((node.get_name(), id(node)) for node in tree.gen_tips())))
    ordered_tip_names, ordered_tip_ids = zip(*ordered_tip_name_id_pairs)
    ordered_internal_ids = [
        id(node) for node in tree.preorder() if not node.is_tip()
    ]
    ordered_ids = list(ordered_tip_ids) + ordered_internal_ids
    # get the distance matrices
    full_D = tree.get_partial_distance_matrix(ordered_ids)
    partial_D = tree.get_partial_distance_matrix(ordered_tip_ids)
    # get the balaji matrices
    full_R = Clustering.get_R_balaji(full_D)
    partial_R = Clustering.get_R_balaji(partial_D)
    # Get the fiedler eigenvector and another eigenvector
    # for the full and the partial balaji matrices.
    full_va, full_vb = get_eigenvectors(full_R)
    partial_va, partial_vb = get_eigenvectors(partial_R)
    # create the response
    out = StringIO()
    print >> out, 'Fiedler vector associated with the graph'
    print >> out, 'for which the internal nodes are hidden:'
    print >> out, str(tuple(partial_va))
    print >> out
    print >> out, 'The tip subvector of the Fiedler vector'
    print >> out, 'associated with the graph of the full tree:'
    print >> out, str(tuple(full_va[:len(ordered_tip_ids)]))
    # write the response
    return out.getvalue()
Example #54
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree_string, FelTree.NewickTree)
    # get information about the tree topology
    internal = [id(node) for node in tree.gen_internal_nodes()]
    tips = [id(node) for node in tree.gen_tips()]
    vertices = internal + tips
    ntips = len(tips)
    ninternal = len(internal)
    nvertices = len(vertices)
    # get the ordered ids with the leaves first
    ordered_ids = vertices
    # get the full distance matrix
    D = np.array(tree.get_partial_distance_matrix(ordered_ids))
    # compute the two matrices to be compared
    p = ninternal
    q = ntips
    N = fs.N
    aug_a = get_aug_a(D, p, q, N)
    aug_b = get_aug_b(D, p, q, N)
    # show the output
    out = StringIO()
    print >> out, "-(1/2)MEDE'M':"
    print >> out, aug_a
    print >> out
    print >> out, "-(1/2)HMDM'H:"
    print >> out, aug_b
    print >> out
    print >> out, 'allclose:', np.allclose(aug_a, aug_b)
    return out.getvalue()
Example #55
0
def get_default_original_tree():
    tree = NewickIO.parse(g_default_string, FelTree.NewickTree)
    for node in tree.preorder():
        blen = node.get_branch_length()
        if blen is not None:
            node.set_branch_length(blen * 0.5)
    return tree
Example #56
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    ordered_names = list(sorted(node.name for node in tree.gen_tips()))
    n = len(ordered_names)
    if n < 2:
        raise HandlingError('the newick tree should have at least two leaves')
    # get the eigendecomposition
    D = np.array(tree.get_distance_matrix(ordered_names))
    G = (-0.5) * MatrixUtil.double_centered(D)
    eigenvalues, eigenvector_transposes = np.linalg.eigh(G)
    eigenvectors = eigenvector_transposes.T
    sorted_eigensystem = list(reversed(list(sorted((w, v) for w, v in zip(eigenvalues, eigenvectors)))))
    sorted_eigenvalues, sorted_eigenvectors = zip(*sorted_eigensystem)
    M = zip(*sorted_eigenvectors)
    # write the html
    out = StringIO()
    print >> out, '<html>'
    print >> out, '<body>'
    print >> out, HtmlTable.get_labeled_table_string(
            sorted_eigenvalues, ordered_names, M)
    print >> out, '</body>'
    print >> out, '</html>'
    # write the response
    return out.getvalue()
Example #57
0
def get_response_content(fs):
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # get the selected names
    selection = Util.get_stripped_lines(fs.selection.splitlines())
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = 'the following selected names '
        msg_b = 'are not valid tips: %s' % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    complement_name_set = possible_name_set - selected_name_set
    # assert that neither the selected name set nor its complement is empty
    if not selected_name_set or not complement_name_set:
        raise HandlingError('the selection is degenerate')
    # define an ordering on the tips
    ordered_names = [node.get_name() for node in tree.gen_tips()]
    # convert the selected names to a Y vector
    Y_as_list = []
    for name in ordered_names:
        if name in selected_name_set:
            value = 1
        else:
            value = -1
        Y_as_list.append(value)
    Y = np.array(Y_as_list)
    # get the distance matrix
    D = tree.get_distance_matrix(ordered_names)
    # get the R matrix
    R = Clustering.get_R_balaji(D)
    value = np.dot(np.dot(Y, R), Y.T)
    # return the taxon split evaluation
    return str(value) + '\n'
Example #58
-1
def get_response_content(fs):
    # read the tree
    tree = NewickIO.parse(fs.tree, Newick.NewickTree) 
    # begin the response
    out = StringIO()
    # remove the branch length associated with the root
    if tree.get_root().blen is not None:
        print >> out, 'the root originally had a branch length of', tree.get_root().blen
        tree.get_root().blen = None
    else:
        print >> out, 'the root did not originally have a branch length'
    # force a trifurcation at the root
    if tree.get_root().get_child_count() < 3:
        print >> out, 'the original root had', tree.get_root().get_child_count(), 'children'
        max_children, best_child = max((child.get_child_count(), child) for child in tree.get_root().gen_children())
        old_root = tree.get_root()
        tree.reroot(best_child)
        tree.remove_node(old_root)
        print >> out, 'the new root has', tree.get_root().get_child_count(), 'children'
    else:
        print >> out, 'the root has', tree.get_root().get_child_count(), 'children'
    # remove names of internal nodes
    nremoved_names = 0
    for node in tree.preorder():
        if node.has_children() and node.name is not None:
            node.name = None
            nremoved_names += 1
    print >> out, 'removed', nremoved_names, 'internal node names'
    # draw the new formatted newick string after a break
    print >> out
    formatted_tree_string = NewickIO.get_narrow_newick_string(tree, 120) 
    print >> out, formatted_tree_string
    # return the response
    return out.getvalue()
Example #59
-1
def get_response_content(fs):
    # get the set of names
    selection = Util.get_stripped_lines(StringIO(fs.names))
    # get the tree
    tree = NewickIO.parse(fs.tree, FelTree.NewickTree)
    # assert that the name selection is compatible with the tree
    selected_name_set = set(selection)
    possible_name_set = set(node.get_name() for node in tree.gen_tips())
    extra_names = selected_name_set - possible_name_set
    if extra_names:
        msg_a = "the following selected names "
        msg_b = "are not valid tips: %s" % str(tuple(extra_names))
        raise HandlingError(msg_a + msg_b)
    # get the pruned tree
    simple_tree = NewickIO.parse(fs.tree, Newick.NewickTree)
    pruned_tree = get_pruned_tree(simple_tree, selected_name_set)
    # begin writing the result
    out = StringIO()
    trees = (tree, pruned_tree)
    tree_names = ("the original tree", "the pruned tree")
    for tree, tree_name in zip(trees, tree_names):
        print >> out, "calculating splits of %s:" % tree_name
        print >> out, process_tree(tree, tree_name, fs.show_newick, fs.show_art)
    # return the response
    return out.getvalue()