Beispiel #1
0
def sample_agglomerated_tree(ntaxa):
    """
    Sample a weighted phylogenetic xtree.
    All of the branch lengths will be default lengths.
    This method follows the simulation method
    Used in "Why neighbor-joining works".
    It agglomerates subtrees at random.
    @param ntaxa: the number of leaves in the tree
    @return: the root of a weighted phylogenetic xtree
    """
    # a tree must have at least three leaves before it has an internal node
    assert ntaxa > 2
    # initialize the pool of subtrees
    subtrees = []
    for i in range(ntaxa):
        v = Xtree.WPXVertex()
        v.label = i
        subtrees.append(v)
    # repeatedly agglomerate pairs of subtrees
    while len(subtrees) > 3:
        root = Xtree.WPXVertex()
        # select the items and efficiently delete them from the list
        for i in range(2):
            a = random.randrange(len(subtrees))
            root.add_child(subtrees[a])
            subtrees[a] = subtrees[-1]
            del subtrees[-1]
        # add the new subtree to the list
        subtrees.append(root)
    # agglomerate the final three subtrees
    root = Xtree.WPXVertex()
    for t in subtrees:
        root.add_child(t)
    return root
Beispiel #2
0
 def evaluate(self, true_splits, D_estimated, atteson, use_nj, use_modified_nj, use_all_spectral, use_one_spectral):
     """
     @param true_splits: the set of all full splits implied by the true tree
     @param D_estimated: the estimated distance matrix
     @param atteson: True iff the distance matrix is Atteson
     """
     # initialize the errors
     nj_error = None
     modified_nj_error = None
     all_spectral_error = None
     one_spectral_error = None
     if use_nj:
         nj_splits = BuildTreeTopology.get_splits(D_estimated, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj)
         nj_error = Xtree.splits_to_rf_distance(nj_splits, true_splits)
     if use_modified_nj:
         modified_nj_splits = BuildTreeTopology.get_splits(D_estimated, BuildTreeTopology.split_nj, BuildTreeTopology.update_using_laplacian)
         modified_nj_error = Xtree.splits_to_rf_distance(modified_nj_splits, true_splits)
     if use_all_spectral:
         splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback
         updater = BuildTreeTopology.update_using_laplacian
         all_spectral_splits = BuildTreeTopology.get_splits(D_estimated, splitter, updater)
         all_spectral_error = Xtree.splits_to_rf_distance(all_spectral_splits, true_splits)
     if use_one_spectral:
         splitter = SplitFunctor(len(D_estimated))
         updater = UpdateFunctor(len(D_estimated))
         one_spectral_splits = BuildTreeTopology.get_splits(D_estimated, splitter, updater)
         one_spectral_error = Xtree.splits_to_rf_distance(one_spectral_splits, true_splits)
     # add the data point
     self.scatter_points.append(ScatterPoint(atteson, nj_error, modified_nj_error, all_spectral_error, one_spectral_error))
Beispiel #3
0
 def setUp(self):
     """
     Define a perturbed and a true distance matrix from a paper.
     The paper is Why Neighbor Joining Works.
     """
     self.D_perturbed = np.array([[0, 2.7, 2.6, 2.6, 2.6, 4.4, 4.4, 4.4],
                                  [2.7, 0, 4.4, 4.4, 4.4, 2.6, 2.6, 2.6],
                                  [2.6, 4.4, 0, 0.1, 0.4, 2.7, 2.7, 2.7],
                                  [2.6, 4.4, 0.1, 0, 0.4, 2.7, 2.7, 2.7],
                                  [2.6, 4.4, 0.4, 0.4, 0, 2.7, 2.7, 2.7],
                                  [4.4, 2.6, 2.7, 2.7, 2.7, 0, 0.1, 0.4],
                                  [4.4, 2.6, 2.7, 2.7, 2.7, 0.1, 0, 0.4],
                                  [4.4, 2.6, 2.7, 2.7, 2.7, 0.4, 0.4, 0]])
     self.D = np.array([[0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0],
                        [3.0, 0.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0],
                        [2.0, 3.0, 0.0, 0.1, 0.4, 3.0, 3.0, 3.0],
                        [2.0, 3.0, 0.1, 0.0, 0.4, 3.0, 3.0, 3.0],
                        [2.0, 3.0, 0.4, 0.4, 0.0, 3.0, 3.0, 3.0],
                        [3.0, 2.0, 3.0, 3.0, 3.0, 0.0, 0.1, 0.4],
                        [3.0, 2.0, 3.0, 3.0, 3.0, 0.1, 0.0, 0.4],
                        [3.0, 2.0, 3.0, 3.0, 3.0, 0.4, 0.4, 0.0]])
     abc = (4, 0.2), (((2, 0.05), (3, 0.05)), 0.15)
     mnp = (7, 0.2), (((5, 0.05), (6, 0.05)), 0.15)
     self.tree = Xtree.list_to_weighted_tree(
         ((((abc, 0.8), (0, 1.0)), 1.0), (1, 1.0), (mnp, 0.8)))
     self.true_splits = set([
         make_split((2, 3), (0, 1, 4, 5, 6, 7)),
         make_split((2, 3, 4), (0, 1, 5, 6, 7)),
         make_split((2, 3, 4, 0), (1, 5, 6, 7)),
         make_split((2, 3, 4, 0, 1), (5, 6, 7)),
         make_split((2, 3, 4, 0, 1, 7), (5, 6))
     ])
Beispiel #4
0
 def evaluate(self, true_splits, D_estimated, atteson, use_nj,
              use_modified_nj, use_all_spectral, use_one_spectral):
     """
     @param true_splits: the set of all full splits implied by the true tree
     @param D_estimated: the estimated distance matrix
     @param atteson: True iff the distance matrix is Atteson
     """
     # initialize the errors
     nj_error = None
     modified_nj_error = None
     all_spectral_error = None
     one_spectral_error = None
     if use_nj:
         nj_splits = BuildTreeTopology.get_splits(
             D_estimated, BuildTreeTopology.split_nj,
             BuildTreeTopology.update_nj)
         nj_error = Xtree.splits_to_rf_distance(nj_splits, true_splits)
     if use_modified_nj:
         modified_nj_splits = BuildTreeTopology.get_splits(
             D_estimated, BuildTreeTopology.split_nj,
             BuildTreeTopology.update_using_laplacian)
         modified_nj_error = Xtree.splits_to_rf_distance(
             modified_nj_splits, true_splits)
     if use_all_spectral:
         splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback
         updater = BuildTreeTopology.update_using_laplacian
         all_spectral_splits = BuildTreeTopology.get_splits(
             D_estimated, splitter, updater)
         all_spectral_error = Xtree.splits_to_rf_distance(
             all_spectral_splits, true_splits)
     if use_one_spectral:
         splitter = SplitFunctor(len(D_estimated))
         updater = UpdateFunctor(len(D_estimated))
         one_spectral_splits = BuildTreeTopology.get_splits(
             D_estimated, splitter, updater)
         one_spectral_error = Xtree.splits_to_rf_distance(
             one_spectral_splits, true_splits)
     # add the data point
     self.scatter_points.append(
         ScatterPoint(atteson, nj_error, modified_nj_error,
                      all_spectral_error, one_spectral_error))
Beispiel #5
0
 def __init__(self):
     """
     Define the topology of a tree for which branch lengths will be sought.
     """
     TreeSearch.__init__(self)
     # create the fixed tree topology
     topo = [0, [1, 2], [[3, 4], 5]]
     self.tree = Xtree.list_to_uniformly_weighted_tree(topo)
     # define the expected primary split
     self.desired_primary_split = frozenset([frozenset([0, 1, 2]), frozenset([3, 4, 5])])
     # create the mapping from node id to node index
     self.id_to_index = dict((id(node), node.label) for node in self.tree.get_labeled_vertices())
     # define the internal nodes in the left hand subtree
     self.id_to_index[id(self.tree.children[1])] = 6
     self.id_to_index[id(self.tree)] = 7
     # define the internal nodes in the right hand subtree
     self.id_to_index[id(self.tree.children[2].children[0])] = 8
     self.id_to_index[id(self.tree.children[2])] = 9
Beispiel #6
0
 def __init__(self):
     """
     Define the topology of a tree for which branch lengths will be sought.
     """
     TreeSearch.__init__(self)
     # create the fixed tree topology
     topo = [0, [1, 2], [[3, 4], 5]]
     self.tree = Xtree.list_to_uniformly_weighted_tree(topo)
     # define the expected primary split
     self.desired_primary_split = frozenset([frozenset([0, 1, 2]), frozenset([3, 4, 5])])
     # create the mapping from node id to node index
     self.id_to_index = dict((id(node), node.label) for node in self.tree.get_labeled_vertices())
     # define the internal nodes in the left hand subtree
     self.id_to_index[id(self.tree.children[1])] = 6
     self.id_to_index[id(self.tree)] = 7
     # define the internal nodes in the right hand subtree
     self.id_to_index[id(self.tree.children[2].children[0])] = 8
     self.id_to_index[id(self.tree.children[2])] = 9
Beispiel #7
0
 def setUp(self):
     """
     Define a perturbed and a true distance matrix from a paper.
     The paper is Why Neighbor Joining Works.
     """
     self.D_perturbed = np.array(
         [
             [0, 2.7, 2.6, 2.6, 2.6, 4.4, 4.4, 4.4],
             [2.7, 0, 4.4, 4.4, 4.4, 2.6, 2.6, 2.6],
             [2.6, 4.4, 0, 0.1, 0.4, 2.7, 2.7, 2.7],
             [2.6, 4.4, 0.1, 0, 0.4, 2.7, 2.7, 2.7],
             [2.6, 4.4, 0.4, 0.4, 0, 2.7, 2.7, 2.7],
             [4.4, 2.6, 2.7, 2.7, 2.7, 0, 0.1, 0.4],
             [4.4, 2.6, 2.7, 2.7, 2.7, 0.1, 0, 0.4],
             [4.4, 2.6, 2.7, 2.7, 2.7, 0.4, 0.4, 0],
         ]
     )
     self.D = np.array(
         [
             [0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0],
             [3.0, 0.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0],
             [2.0, 3.0, 0.0, 0.1, 0.4, 3.0, 3.0, 3.0],
             [2.0, 3.0, 0.1, 0.0, 0.4, 3.0, 3.0, 3.0],
             [2.0, 3.0, 0.4, 0.4, 0.0, 3.0, 3.0, 3.0],
             [3.0, 2.0, 3.0, 3.0, 3.0, 0.0, 0.1, 0.4],
             [3.0, 2.0, 3.0, 3.0, 3.0, 0.1, 0.0, 0.4],
             [3.0, 2.0, 3.0, 3.0, 3.0, 0.4, 0.4, 0.0],
         ]
     )
     abc = (4, 0.2), (((2, 0.05), (3, 0.05)), 0.15)
     mnp = (7, 0.2), (((5, 0.05), (6, 0.05)), 0.15)
     self.tree = Xtree.list_to_weighted_tree(((((abc, 0.8), (0, 1.0)), 1.0), (1, 1.0), (mnp, 0.8)))
     self.true_splits = set(
         [
             make_split((2, 3), (0, 1, 4, 5, 6, 7)),
             make_split((2, 3, 4), (0, 1, 5, 6, 7)),
             make_split((2, 3, 4, 0), (1, 5, 6, 7)),
             make_split((2, 3, 4, 0, 1), (5, 6, 7)),
             make_split((2, 3, 4, 0, 1, 7), (5, 6)),
         ]
     )