def sample_agglomerated_tree(ntaxa): """ Sample a weighted phylogenetic xtree. All of the branch lengths will be default lengths. This method follows the simulation method Used in "Why neighbor-joining works". It agglomerates subtrees at random. @param ntaxa: the number of leaves in the tree @return: the root of a weighted phylogenetic xtree """ # a tree must have at least three leaves before it has an internal node assert ntaxa > 2 # initialize the pool of subtrees subtrees = [] for i in range(ntaxa): v = Xtree.WPXVertex() v.label = i subtrees.append(v) # repeatedly agglomerate pairs of subtrees while len(subtrees) > 3: root = Xtree.WPXVertex() # select the items and efficiently delete them from the list for i in range(2): a = random.randrange(len(subtrees)) root.add_child(subtrees[a]) subtrees[a] = subtrees[-1] del subtrees[-1] # add the new subtree to the list subtrees.append(root) # agglomerate the final three subtrees root = Xtree.WPXVertex() for t in subtrees: root.add_child(t) return root
def evaluate(self, true_splits, D_estimated, atteson, use_nj, use_modified_nj, use_all_spectral, use_one_spectral): """ @param true_splits: the set of all full splits implied by the true tree @param D_estimated: the estimated distance matrix @param atteson: True iff the distance matrix is Atteson """ # initialize the errors nj_error = None modified_nj_error = None all_spectral_error = None one_spectral_error = None if use_nj: nj_splits = BuildTreeTopology.get_splits(D_estimated, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj) nj_error = Xtree.splits_to_rf_distance(nj_splits, true_splits) if use_modified_nj: modified_nj_splits = BuildTreeTopology.get_splits(D_estimated, BuildTreeTopology.split_nj, BuildTreeTopology.update_using_laplacian) modified_nj_error = Xtree.splits_to_rf_distance(modified_nj_splits, true_splits) if use_all_spectral: splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback updater = BuildTreeTopology.update_using_laplacian all_spectral_splits = BuildTreeTopology.get_splits(D_estimated, splitter, updater) all_spectral_error = Xtree.splits_to_rf_distance(all_spectral_splits, true_splits) if use_one_spectral: splitter = SplitFunctor(len(D_estimated)) updater = UpdateFunctor(len(D_estimated)) one_spectral_splits = BuildTreeTopology.get_splits(D_estimated, splitter, updater) one_spectral_error = Xtree.splits_to_rf_distance(one_spectral_splits, true_splits) # add the data point self.scatter_points.append(ScatterPoint(atteson, nj_error, modified_nj_error, all_spectral_error, one_spectral_error))
def setUp(self): """ Define a perturbed and a true distance matrix from a paper. The paper is Why Neighbor Joining Works. """ self.D_perturbed = np.array([[0, 2.7, 2.6, 2.6, 2.6, 4.4, 4.4, 4.4], [2.7, 0, 4.4, 4.4, 4.4, 2.6, 2.6, 2.6], [2.6, 4.4, 0, 0.1, 0.4, 2.7, 2.7, 2.7], [2.6, 4.4, 0.1, 0, 0.4, 2.7, 2.7, 2.7], [2.6, 4.4, 0.4, 0.4, 0, 2.7, 2.7, 2.7], [4.4, 2.6, 2.7, 2.7, 2.7, 0, 0.1, 0.4], [4.4, 2.6, 2.7, 2.7, 2.7, 0.1, 0, 0.4], [4.4, 2.6, 2.7, 2.7, 2.7, 0.4, 0.4, 0]]) self.D = np.array([[0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0], [3.0, 0.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0], [2.0, 3.0, 0.0, 0.1, 0.4, 3.0, 3.0, 3.0], [2.0, 3.0, 0.1, 0.0, 0.4, 3.0, 3.0, 3.0], [2.0, 3.0, 0.4, 0.4, 0.0, 3.0, 3.0, 3.0], [3.0, 2.0, 3.0, 3.0, 3.0, 0.0, 0.1, 0.4], [3.0, 2.0, 3.0, 3.0, 3.0, 0.1, 0.0, 0.4], [3.0, 2.0, 3.0, 3.0, 3.0, 0.4, 0.4, 0.0]]) abc = (4, 0.2), (((2, 0.05), (3, 0.05)), 0.15) mnp = (7, 0.2), (((5, 0.05), (6, 0.05)), 0.15) self.tree = Xtree.list_to_weighted_tree( ((((abc, 0.8), (0, 1.0)), 1.0), (1, 1.0), (mnp, 0.8))) self.true_splits = set([ make_split((2, 3), (0, 1, 4, 5, 6, 7)), make_split((2, 3, 4), (0, 1, 5, 6, 7)), make_split((2, 3, 4, 0), (1, 5, 6, 7)), make_split((2, 3, 4, 0, 1), (5, 6, 7)), make_split((2, 3, 4, 0, 1, 7), (5, 6)) ])
def evaluate(self, true_splits, D_estimated, atteson, use_nj, use_modified_nj, use_all_spectral, use_one_spectral): """ @param true_splits: the set of all full splits implied by the true tree @param D_estimated: the estimated distance matrix @param atteson: True iff the distance matrix is Atteson """ # initialize the errors nj_error = None modified_nj_error = None all_spectral_error = None one_spectral_error = None if use_nj: nj_splits = BuildTreeTopology.get_splits( D_estimated, BuildTreeTopology.split_nj, BuildTreeTopology.update_nj) nj_error = Xtree.splits_to_rf_distance(nj_splits, true_splits) if use_modified_nj: modified_nj_splits = BuildTreeTopology.get_splits( D_estimated, BuildTreeTopology.split_nj, BuildTreeTopology.update_using_laplacian) modified_nj_error = Xtree.splits_to_rf_distance( modified_nj_splits, true_splits) if use_all_spectral: splitter = BuildTreeTopology.split_using_eigenvector_with_nj_fallback updater = BuildTreeTopology.update_using_laplacian all_spectral_splits = BuildTreeTopology.get_splits( D_estimated, splitter, updater) all_spectral_error = Xtree.splits_to_rf_distance( all_spectral_splits, true_splits) if use_one_spectral: splitter = SplitFunctor(len(D_estimated)) updater = UpdateFunctor(len(D_estimated)) one_spectral_splits = BuildTreeTopology.get_splits( D_estimated, splitter, updater) one_spectral_error = Xtree.splits_to_rf_distance( one_spectral_splits, true_splits) # add the data point self.scatter_points.append( ScatterPoint(atteson, nj_error, modified_nj_error, all_spectral_error, one_spectral_error))
def __init__(self): """ Define the topology of a tree for which branch lengths will be sought. """ TreeSearch.__init__(self) # create the fixed tree topology topo = [0, [1, 2], [[3, 4], 5]] self.tree = Xtree.list_to_uniformly_weighted_tree(topo) # define the expected primary split self.desired_primary_split = frozenset([frozenset([0, 1, 2]), frozenset([3, 4, 5])]) # create the mapping from node id to node index self.id_to_index = dict((id(node), node.label) for node in self.tree.get_labeled_vertices()) # define the internal nodes in the left hand subtree self.id_to_index[id(self.tree.children[1])] = 6 self.id_to_index[id(self.tree)] = 7 # define the internal nodes in the right hand subtree self.id_to_index[id(self.tree.children[2].children[0])] = 8 self.id_to_index[id(self.tree.children[2])] = 9
def setUp(self): """ Define a perturbed and a true distance matrix from a paper. The paper is Why Neighbor Joining Works. """ self.D_perturbed = np.array( [ [0, 2.7, 2.6, 2.6, 2.6, 4.4, 4.4, 4.4], [2.7, 0, 4.4, 4.4, 4.4, 2.6, 2.6, 2.6], [2.6, 4.4, 0, 0.1, 0.4, 2.7, 2.7, 2.7], [2.6, 4.4, 0.1, 0, 0.4, 2.7, 2.7, 2.7], [2.6, 4.4, 0.4, 0.4, 0, 2.7, 2.7, 2.7], [4.4, 2.6, 2.7, 2.7, 2.7, 0, 0.1, 0.4], [4.4, 2.6, 2.7, 2.7, 2.7, 0.1, 0, 0.4], [4.4, 2.6, 2.7, 2.7, 2.7, 0.4, 0.4, 0], ] ) self.D = np.array( [ [0.0, 3.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0], [3.0, 0.0, 3.0, 3.0, 3.0, 2.0, 2.0, 2.0], [2.0, 3.0, 0.0, 0.1, 0.4, 3.0, 3.0, 3.0], [2.0, 3.0, 0.1, 0.0, 0.4, 3.0, 3.0, 3.0], [2.0, 3.0, 0.4, 0.4, 0.0, 3.0, 3.0, 3.0], [3.0, 2.0, 3.0, 3.0, 3.0, 0.0, 0.1, 0.4], [3.0, 2.0, 3.0, 3.0, 3.0, 0.1, 0.0, 0.4], [3.0, 2.0, 3.0, 3.0, 3.0, 0.4, 0.4, 0.0], ] ) abc = (4, 0.2), (((2, 0.05), (3, 0.05)), 0.15) mnp = (7, 0.2), (((5, 0.05), (6, 0.05)), 0.15) self.tree = Xtree.list_to_weighted_tree(((((abc, 0.8), (0, 1.0)), 1.0), (1, 1.0), (mnp, 0.8))) self.true_splits = set( [ make_split((2, 3), (0, 1, 4, 5, 6, 7)), make_split((2, 3, 4), (0, 1, 5, 6, 7)), make_split((2, 3, 4, 0), (1, 5, 6, 7)), make_split((2, 3, 4, 0, 1), (5, 6, 7)), make_split((2, 3, 4, 0, 1, 7), (5, 6)), ] )