def get_response_lines(self, options): """ Yield lines that form the result of the analysis. @param options: a subset of strings specifying what to show """ preamble_lines = [] error_lines = [] if 'show_incomplete' in options and self.is_incomplete: error_lines.append( 'the sequential splits defined by the eigenvectors were insufficient to reconstruct the tree' ) if 'show_conflicting' in options and self.is_conflicting: error_lines.append( 'the reconstructed tree has a split that is incompatible with the original tree' ) if 'show_negligible' in options and self.is_negligible: error_lines.append( 'during reconstruction a negligible eigenvector loading was encountered' ) if 'show_all' in options or error_lines: preamble_lines.extend( ['original tree:', NewickIO.get_newick_string(self.tree)]) if self.reconstructed_tree: preamble_lines.extend([ 'reconstructed tree:', NewickIO.get_newick_string(self.reconstructed_tree) ]) return preamble_lines + error_lines
def do_distance_analysis(X): # get the matrix of squared distances labels = list('0123') # reconstruct the matrix of Euclidean distances from a tree D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X]) sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels) sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree) sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree) D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels)) # reconstruct the matrix of squared Euclidean distances from a tree D = D_sqrt**2 tree = NeighborJoining.make_tree(D, labels) tree_string = NewickIO.get_newick_string(tree) feltree = NewickIO.parse(tree_string, FelTree.NewickTree) D_reconstructed = np.array(feltree.get_distance_matrix(labels)) # start writing out = StringIO() # matrix of Euclidean distances and its reconstruction from a tree print >> out, 'matrix of Euclidean distances between tetrahedron vertices:' print >> out, D_sqrt print >> out, 'neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):' print >> out, sqrt_tree_string print >> out, 'distance matrix implied by this tree:' print >> out, D_sqrt_reconstructed # matrix of squared Euclidean distances and its reconstruction from a tree print >> out, 'matrix of squared distances between tetrahedron vertices:' print >> out, D print >> out, 'neighbor joining tree constructed from D = squared Euclidean distances (normal):' print >> out, tree_string print >> out, 'distance matrix implied by this tree:' print >> out, D_reconstructed return out.getvalue().strip()
def do_distance_analysis(X): # get the matrix of squared distances labels = list("0123") # reconstruct the matrix of Euclidean distances from a tree D_sqrt = np.array([[np.linalg.norm(y - x) for x in X] for y in X]) sqrt_tree = NeighborJoining.make_tree(D_sqrt, labels) sqrt_tree_string = NewickIO.get_newick_string(sqrt_tree) sqrt_feltree = NewickIO.parse(sqrt_tree_string, FelTree.NewickTree) D_sqrt_reconstructed = np.array(sqrt_feltree.get_distance_matrix(labels)) # reconstruct the matrix of squared Euclidean distances from a tree D = D_sqrt ** 2 tree = NeighborJoining.make_tree(D, labels) tree_string = NewickIO.get_newick_string(tree) feltree = NewickIO.parse(tree_string, FelTree.NewickTree) D_reconstructed = np.array(feltree.get_distance_matrix(labels)) # start writing out = StringIO() # matrix of Euclidean distances and its reconstruction from a tree print >> out, "matrix of Euclidean distances between tetrahedron vertices:" print >> out, D_sqrt print >> out, "neighbor joining tree constructed from D = non-squared Euclidean distances (unusual):" print >> out, sqrt_tree_string print >> out, "distance matrix implied by this tree:" print >> out, D_sqrt_reconstructed # matrix of squared Euclidean distances and its reconstruction from a tree print >> out, "matrix of squared distances between tetrahedron vertices:" print >> out, D print >> out, "neighbor joining tree constructed from D = squared Euclidean distances (normal):" print >> out, tree_string print >> out, "distance matrix implied by this tree:" print >> out, D_reconstructed return out.getvalue().strip()
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(StringIO(fs.trees)): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError('expected at least four tips but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # begin the response out = StringIO() # look at each tree nerrors = 0 ncounterexamples = 0 for tree in trees: # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> out, 'error: a partition that was supposed to be valid was found to be invalid' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> out, 'found a counterexample!' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out print >> out, 'errors found:', nerrors print >> out, 'counterexamples found:', ncounterexamples # return the response return out.getvalue()
def main(): filename = 'counterexamples.out' fout = open(filename, 'wt') print 'Does monotonically transforming the pairwise leaf distances affect the compatibility' print 'of the split found using principal coordinate analysis?' print 'I am looking through random trees for a tree that is split incompatibly' print 'when distances are squared.' print 'Use control-c to stop the program when you get bored.' try: count = 0 ncounterexamples = 0 nerrors = 0 while True: count += 1 # get a random tree n_base_leaves = 4 n_expected_extra_leaves = 1 expected_branch_length = 1 tree = TreeSampler.sample_tree(n_base_leaves, n_expected_extra_leaves, expected_branch_length) # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> fout, 'error: a partition that was supposed to be valid was found to be invalid' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset( tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> fout, 'found a counterexample!' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout except KeyboardInterrupt, e: print 'trees examined:', count print 'errors:', nerrors print 'counterexamples:', ncounterexamples
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(StringIO(fs.trees)): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError('expected at least four tips but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # begin the response out = StringIO() # look at each tree nerrors = 0 ncounterexamples = 0 for tree in trees: # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> out, 'error: a partition that was supposed to be valid was found to be invalid' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> out, 'found a counterexample!' print >> out, 'tree:', NewickIO.get_newick_string(tree) print >> out, 'invalid partition:', partition_to_string(part) print >> out print >> out, 'errors found:', nerrors print >> out, 'counterexamples found:', ncounterexamples # return the response return out.getvalue()
def main(): filename = 'counterexamples.out' fout = open(filename, 'wt') print 'Does monotonically transforming the pairwise leaf distances affect the compatibility' print 'of the split found using principal coordinate analysis?' print 'I am looking through random trees for a tree that is split incompatibly' print 'when distances are squared.' print 'Use control-c to stop the program when you get bored.' try: count = 0 ncounterexamples = 0 nerrors = 0 while True: count += 1 # get a random tree n_base_leaves = 4 n_expected_extra_leaves = 1 expected_branch_length = 1 tree = TreeSampler.sample_tree(n_base_leaves, n_expected_extra_leaves, expected_branch_length) # get the set of valid partitions implied by the tree valid_parts = TreeComparison.get_partitions(tree) ordered_tip_names = [tip.get_name() for tip in tree.gen_tips()] # assert that the partition implied by the correct formula is valid D = np.array(tree.get_distance_matrix(ordered_tip_names)) loadings = get_principal_coordinate(D) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: nerrors += 1 print >> fout, 'error: a partition that was supposed to be valid was found to be invalid' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout # check the validity of the partition implied by the incorrect formula Q = D * D loadings = get_principal_coordinate(Q) nonneg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v >= 0) neg_leaf_set = frozenset(tip for tip, v in zip(ordered_tip_names, loadings) if v < 0) part = frozenset([nonneg_leaf_set, neg_leaf_set]) if part not in valid_parts: ncounterexamples += 1 print >> fout, 'found a counterexample!' print >> fout, 'tree:', NewickIO.get_newick_string(tree) print >> fout, 'invalid partition:', partition_to_string(part) print >> fout except KeyboardInterrupt, e: print 'trees examined:', count print 'errors:', nerrors print 'counterexamples:', ncounterexamples
def get_response_content(fs): flags = get_flags(fs) nseconds = 5 tm = time.time() rejected_s = None nerrors = 0 nchecked = 0 while time.time() < tm + nseconds and not rejected_s: nchecked += 1 # Sample a Newick tree. true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0) true_s = NewickIO.get_newick_string(true_f) true_tree = Newick.parse(true_s, Newick.NewickTree) # Get the leaf and internal vertex ids for the true tree. internal_ids = set(id(x) for x in true_tree.gen_internal_nodes()) leaf_ids = set(id(x) for x in true_tree.gen_tips()) nleaves = len(leaf_ids) # Get the harmonic valuations for all vertices of the tree. id_to_full_val_list = [ Harmonic.get_harmonic_valuations(true_tree, i) for i in range(1, nleaves) ] # Check for small valuations at the leaves. try: for id_to_full_val in id_to_full_val_list: for x in leaf_ids: value = id_to_full_val[x] if abs(value) < 1e-8: raise CheckTreeError('the true tree is too symmetric') except CheckTreeError as e: nerrors += 1 continue # Assign the leaf values and assign None to internal values. id_to_val_list = [] for id_to_full_val in id_to_full_val_list: d = {} for x in leaf_ids: s = -1 if id_to_full_val[x] < 0 else 1 d[x] = s for x in internal_ids: d[x] = None id_to_val_list.append(d) # Define the topology in a different format. id_to_adj = get_id_to_adj(true_tree) # Check the tree for self-compatibility under the given conditions. id_to_vals = SeekEigenLacing.rec_eigen(id_to_adj, id_to_val_list, flags) if not id_to_vals: rejected_s = true_s # make the report out = StringIO() if rejected_s: print >> out, 'rejected a true tree:' print >> out, rejected_s else: print >> out, 'no true tree was rejected' print >> out print >> out, nchecked, 'trees were sampled total' print >> out, nerrors, 'trees were too symmetric' return out.getvalue()
def __call__(self, tree): # get the partitions implied by the tree valid_partitions = TreeComparison.get_partitions(tree) # Get the partition implied by the Fiedler split # of the graph derived from the tree. tip_nodes = list(tree.gen_tips()) D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition = frozenset((name_selection, name_complement)) if name_partition not in valid_partitions: msg = '\n'.join([ 'invalid partition found:', 'tree:', NewickIO.get_newick_string(tree), 'invalid partition:', name_partition ]) if not self.fout: self.fout = open(self.counterexample_filename, 'wt') print >> self.fout, msg print msg self.ncounterexamples += 1 # do not stop looking, even if a counterexample is found return False
def get_response_content(fs): # read the matrix D = fs.matrix if len(D) < 3: raise HandlingError('the matrix should have at least three rows') # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) if len(ordered_labels) != len(D): msg_a = 'the number of ordered labels should be the same ' msg_b = 'as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) # create the tree building object splitter = Clustering.StoneExactDMS() tree_builder = NeighborhoodJoining.TreeBuilder( D.tolist(), ordered_labels, splitter) # Read the recourse string and set the corresponding method # in the tree builder. recourse_string = fs.getfirst('recourse') if fs.njrecourse: tree_builder.set_fallback_name('nj') elif fs.halvingrecourse: tree_builder.set_fallback_name('halving') # assert that the computation will not take too long if tree_builder.get_complexity() > 1000000: raise HandlingError('this computation would take too long') # build the tree tree = tree_builder.build() # return the response return NewickIO.get_newick_string(tree) + '\n'
def __call__(self, tree): # get the partitions implied by the tree valid_partitions = TreeComparison.get_partitions(tree) # Get the partition implied by the Fiedler split # of the graph derived from the tree. tip_nodes = list(tree.gen_tips()) D = tree.get_partial_distance_matrix( [id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition = frozenset((name_selection, name_complement)) if name_partition not in valid_partitions: msg = '\n'.join([ 'invalid partition found:', 'tree:', NewickIO.get_newick_string(tree), 'invalid partition:', name_partition]) if not self.fout: self.fout = open(self.counterexample_filename, 'wt') print >> self.fout, msg print msg self.ncounterexamples += 1 # do not stop looking, even if a counterexample is found return False
def get_response_content(fs): # read the matrix D = fs.matrix if len(D) < 3: raise HandlingError('the matrix should have at least three rows') # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) if len(ordered_labels) != len(D): msg_a = 'the number of ordered labels should be the same ' msg_b = 'as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) # create the tree building object splitter = Clustering.StoneExactDMS() tree_builder = NeighborhoodJoining.TreeBuilder(D.tolist(), ordered_labels, splitter) # Read the recourse string and set the corresponding method # in the tree builder. recourse_string = fs.getfirst('recourse') if fs.njrecourse: tree_builder.set_fallback_name('nj') elif fs.halvingrecourse: tree_builder.set_fallback_name('halving') # assert that the computation will not take too long if tree_builder.get_complexity() > 1000000: raise HandlingError('this computation would take too long') # build the tree tree = tree_builder.build() # return the response return NewickIO.get_newick_string(tree) + '\n'
def get_tikz_lines(newick, eigenvector_index, yaw, pitch): """ @param eigenvector_index: 1 is Fiedler """ tree = Newick.parse(newick, SpatialTree.SpatialTree) # change the node names and get the new tree string for node in tree.preorder(): node.name = 'n' + str(id(node)) newick = NewickIO.get_newick_string(tree) # do the layout layout = FastDaylightLayout.StraightBranchLayout() layout.do_layout(tree) tree.fit((g_xy_scale, g_xy_scale)) name_to_location = dict( (x.name, tree._layout_to_display(x.location)) for x in tree.preorder()) T, B, N = FtreeIO.newick_to_TBN(newick) # get some vertices leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal # get the locations v_to_location = dict((v, name_to_location[N[v]]) for v in vertices) # get the valuations w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) index_to_val = V[:, eigenvector_index - 1] v_to_val = dict( (vertices[i], g_z_scale * val) for i, val in enumerate(index_to_val)) # get the coordinates v_to_xyz = get_v_to_xyz(yaw, v_to_location, v_to_val) # add intersection vertices add_intersection_vertices(T, B, v_to_xyz) intersection_vertices = sorted(v for v in v_to_xyz if v not in vertices) # get lines of the tikz file return xyz_to_tikz_lines(T, B, pitch, v_to_xyz, leaves, internal, intersection_vertices)
def get_tikz_lines(newick, eigenvector_index, yaw, pitch): """ @param eigenvector_index: 1 is Fiedler """ tree = Newick.parse(newick, SpatialTree.SpatialTree) # change the node names and get the new tree string for node in tree.preorder(): node.name = 'n' + str(id(node)) newick = NewickIO.get_newick_string(tree) # do the layout layout = FastDaylightLayout.StraightBranchLayout() layout.do_layout(tree) tree.fit((g_xy_scale, g_xy_scale)) name_to_location = dict(( x.name, tree._layout_to_display(x.location)) for x in tree.preorder()) T, B, N = FtreeIO.newick_to_TBN(newick) # get some vertices leaves = Ftree.T_to_leaves(T) internal = Ftree.T_to_internal_vertices(T) vertices = leaves + internal # get the locations v_to_location = dict((v, name_to_location[N[v]]) for v in vertices) # get the valuations w, V = Ftree.TB_to_harmonic_extension(T, B, leaves, internal) index_to_val = V[:, eigenvector_index-1] v_to_val = dict( (vertices[i], g_z_scale*val) for i, val in enumerate(index_to_val)) # get the coordinates v_to_xyz = get_v_to_xyz(yaw, v_to_location, v_to_val) # add intersection vertices add_intersection_vertices(T, B, v_to_xyz) intersection_vertices = sorted(v for v in v_to_xyz if v not in vertices) # get lines of the tikz file return xyz_to_tikz_lines(T, B, pitch, v_to_xyz, leaves, internal, intersection_vertices)
def get_distance_matrix(self, ordered_names=None): """ @param ordered_names: the requested order of the names @return: a row major distance matrix """ # map the id of each tip to its index if ordered_names: tip_name_to_index = dict((name, i) for i, name in enumerate(ordered_names)) tip_id_to_index = dict((id(tip), tip_name_to_index[tip.name]) for tip in self.gen_tips()) else: tip_id_to_index = dict((id(tip), i) for i, tip in enumerate(self.gen_tips())) # get the number of tips n = len(list(self.gen_tips())) # for each tip get the distance to each other tip distance_matrix = [[0]*n for i in range(n)] for tip in self.gen_tips(): row = distance_matrix[tip_id_to_index[id(tip)]] stack = [] for directed_branch in tip.gen_directed_branches(): next_target = directed_branch.get_target() assert next_target stack.append((tip, next_target, directed_branch.get_undirected_branch().get_branch_length())) while stack: source, target, distance = stack.pop() if target.is_tip(): row[tip_id_to_index[id(target)]] = distance else: for next_branch in target.gen_exits(source): branch_length = next_branch.get_undirected_branch().get_branch_length() next_target = next_branch.get_target() assert next_target, NewickIO.get_newick_string(self) stack.append((target, next_target, distance + branch_length)) return distance_matrix
def get_full_distance_matrix(self, ordered_ids=None): """ @return: a row major distance matrix @param ordered_ids: the requested row order by node id """ # map the id of each node to its index if ordered_ids: id_to_index = dict((id_, i) for i, id_ in enumerate(ordered_ids)) else: id_to_index = dict( (id(node), i) for i, node in enumerate(self.preorder())) # get the number of nodes n = len(list(self.preorder())) # for each node get the distance to each other node distance_matrix = [[0] * n for i in range(n)] for node in self.preorder(): row = distance_matrix[id_to_index[id(node)]] stack = [] for directed_branch in node.gen_directed_branches(): next_target = directed_branch.get_target() assert next_target stack.append( (node, next_target, directed_branch.get_undirected_branch( ).get_branch_length())) while stack: source, target, distance = stack.pop() row[id_to_index[id(target)]] = distance for next_branch in target.gen_exits(source): branch_length = next_branch.get_undirected_branch( ).get_branch_length() next_target = next_branch.get_target() assert next_target, NewickIO.get_newick_string(self) stack.append( (target, next_target, distance + branch_length)) return distance_matrix
def get_full_distance_matrix(self, ordered_ids=None): """ @return: a row major distance matrix @param ordered_ids: the requested row order by node id """ # map the id of each node to its index if ordered_ids: id_to_index = dict((id_, i) for i, id_ in enumerate(ordered_ids)) else: id_to_index = dict((id(node), i) for i, node in enumerate(self.preorder())) # get the number of nodes n = len(list(self.preorder())) # for each node get the distance to each other node distance_matrix = [[0]*n for i in range(n)] for node in self.preorder(): row = distance_matrix[id_to_index[id(node)]] stack = [] for directed_branch in node.gen_directed_branches(): next_target = directed_branch.get_target() assert next_target stack.append((node, next_target, directed_branch.get_undirected_branch().get_branch_length())) while stack: source, target, distance = stack.pop() row[id_to_index[id(target)]] = distance for next_branch in target.gen_exits(source): branch_length = next_branch.get_undirected_branch().get_branch_length() next_target = next_branch.get_target() assert next_target, NewickIO.get_newick_string(self) stack.append((target, next_target, distance + branch_length)) return distance_matrix
def get_response_content(fs): flags = get_flags(fs) nseconds = 5 tm = time.time() rejected_s = None nerrors = 0 nchecked = 0 while time.time() < tm + nseconds and not rejected_s: nchecked += 1 # Sample a Newick tree. true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0) true_s = NewickIO.get_newick_string(true_f) true_tree = Newick.parse(true_s, Newick.NewickTree) # Get the leaf and internal vertex ids for the true tree. internal_ids = set(id(x) for x in true_tree.gen_internal_nodes()) leaf_ids = set(id(x) for x in true_tree.gen_tips()) nleaves = len(leaf_ids) # Get the harmonic valuations for all vertices of the tree. id_to_full_val_list = [Harmonic.get_harmonic_valuations( true_tree, i) for i in range(1, nleaves)] # Check for small valuations at the leaves. try: for id_to_full_val in id_to_full_val_list: for x in leaf_ids: value = id_to_full_val[x] if abs(value) < 1e-8: raise CheckTreeError('the true tree is too symmetric') except CheckTreeError as e: nerrors += 1 continue # Assign the leaf values and assign None to internal values. id_to_val_list = [] for id_to_full_val in id_to_full_val_list: d = {} for x in leaf_ids: s = -1 if id_to_full_val[x] < 0 else 1 d[x] = s for x in internal_ids: d[x] = None id_to_val_list.append(d) # Define the topology in a different format. id_to_adj = get_id_to_adj(true_tree) # Check the tree for self-compatibility under the given conditions. id_to_vals = SeekEigenLacing.rec_eigen( id_to_adj, id_to_val_list, flags) if not id_to_vals: rejected_s = true_s # make the report out = StringIO() if rejected_s: print >> out, 'rejected a true tree:' print >> out, rejected_s else: print >> out, 'no true tree was rejected' print >> out print >> out, nchecked, 'trees were sampled total' print >> out, nerrors, 'trees were too symmetric' return out.getvalue()
def get_response_lines(self, options): """ Yield lines that form the result of the analysis. @param options: a subset of strings specifying what to show """ preamble_lines = [] error_lines = [] if 'show_incomplete' in options and self.is_incomplete: error_lines.append('the sequential splits defined by the eigenvectors were insufficient to reconstruct the tree') if 'show_conflicting' in options and self.is_conflicting: error_lines.append('the reconstructed tree has a split that is incompatible with the original tree') if 'show_negligible' in options and self.is_negligible: error_lines.append('during reconstruction a negligible eigenvector loading was encountered') if 'show_all' in options or error_lines: preamble_lines.extend(['original tree:', NewickIO.get_newick_string(self.tree)]) if self.reconstructed_tree: preamble_lines.extend(['reconstructed tree:', NewickIO.get_newick_string(self.reconstructed_tree)]) return preamble_lines + error_lines
def test_contrast_matrix_to_tree(self): original_tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree) ordered_names = ('a', 'b', 'c', 'd', 'e') C = get_contrast_matrix(original_tree, ordered_names) assert_contrast_matrix(C) reconstructed_tree = contrast_matrix_to_tree(C, ordered_names) newick_string = NewickIO.get_newick_string(reconstructed_tree) print print newick_string pass
def test_contrast_matrix_to_tree(self): original_tree = NewickIO.parse(g_felsenstein_tree_string, FelTree.NewickTree) ordered_names = ('a', 'b', 'c', 'd', 'e') C = get_contrast_matrix(original_tree, ordered_names) assert_contrast_matrix(C) reconstructed_tree = contrast_matrix_to_tree(C, ordered_names) newick_string = NewickIO.get_newick_string(reconstructed_tree) print print newick_string pass
def __init__(self, tree, epsilon): """ @param tree: a newick tree in the felsenstein-inspired format @param epsilon: determines whether loadings are considered negligible """ # clear some flags that describe events that occur during reconstruction self.is_negligible = False self.is_incomplete = False self.is_conflicting = False # define the trees self.tree = tree self.reconstructed_tree = None # set the threshold for loading negligibility self.epsilon = epsilon # define some arbitrary ordering of tip names self.ordered_names = [node.get_name() for node in tree.gen_tips()] # get the distance matrix with respect to this ordering D = tree.get_distance_matrix(self.ordered_names) # get the Gower doubly centered matrix G = MatrixUtil.double_centered(np.array(D)) # get the eigendecomposition of the Gower matrix eigenvalues, eigenvector_transposes = np.linalg.eigh(G) eigenvectors = eigenvector_transposes.T self.sorted_eigensystem = list( reversed( list( sorted((abs(w), v) for w, v in zip(eigenvalues, eigenvectors))))) # build the tree recursively using the sorted eigensystem indices = set(range(len(self.ordered_names))) try: # try to reconstruct the tree root = self._build_tree(indices, 0) root.set_branch_length(None) output_tree = Newick.NewickTree(root) # convert the tree to the FelTree format newick_string = NewickIO.get_newick_string(output_tree) self.reconstructed_tree = NewickIO.parse(newick_string, FelTree.NewickTree) except NegligibleError: self.is_negligible = True except IncompleteError: self.is_incomplete = True else: # compare the splits defined by the reconstructed tree # to splits in the original tree expected_partitions = TreeComparison.get_nontrivial_partitions( self.tree) observed_partitions = TreeComparison.get_nontrivial_partitions( self.reconstructed_tree) invalid_partitions = observed_partitions - expected_partitions if invalid_partitions: self.is_conflicting = True
def get_response_content(fs): flags_a = get_flags_a(fs) flags_b = get_flags_b(fs) data = CheckTreeData(flags_a, flags_b) nseconds = 5 tm = time.time() while time.time() < tm + nseconds: # Sample a pair of Newick trees. true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0) test_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0) true_s = NewickIO.get_newick_string(true_f) test_s = NewickIO.get_newick_string(test_f) true_tree = Newick.parse(true_s, Newick.NewickTree) test_tree = Newick.parse(test_s, Newick.NewickTree) # Add the pairwise check to the data borg. try: found_difference = check_tree_pair(true_tree, test_tree, data) except CheckTreeError as e: data.add_error(e) # Check to see if we should stop early. if found_difference and fs.halt_on_difference: break # make the report out = StringIO() if data.report: print >> out, 'found a difference in rejection power' print >> out print >> out, data.report print >> out else: print >> out, 'failed to find a difference in rejection power' print >> out print >> out, 'search summary:' m = data.acceptance_matrix print >> out, 'A reject, B reject:', m[0, 0] print >> out, 'A reject, B accept:', m[0, 1] print >> out, 'A accept, B reject:', m[1, 0] print >> out, 'A accept, B accept:', m[1, 1] print >> out, data.nerrors, 'tree symmetry errors' return out.getvalue()
def get_response_content(fs): flags_a = get_flags_a(fs) flags_b = get_flags_b(fs) data = CheckTreeData(flags_a, flags_b) nseconds = 5 tm = time.time() while time.time() < tm + nseconds: # Sample a pair of Newick trees. true_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0) test_f = TreeSampler.sample_tree(fs.nleaves, 0, 1.0) true_s = NewickIO.get_newick_string(true_f) test_s = NewickIO.get_newick_string(test_f) true_tree = Newick.parse(true_s, Newick.NewickTree) test_tree = Newick.parse(test_s, Newick.NewickTree) # Add the pairwise check to the data borg. try: found_difference = check_tree_pair(true_tree, test_tree, data) except CheckTreeError as e: data.add_error(e) # Check to see if we should stop early. if found_difference and fs.halt_on_difference: break # make the report out = StringIO() if data.report: print >> out, 'found a difference in rejection power' print >> out print >> out, data.report print >> out else: print >> out, 'failed to find a difference in rejection power' print >> out print >> out, 'search summary:' m = data.acceptance_matrix print >> out, 'A reject, B reject:', m[0, 0] print >> out, 'A reject, B accept:', m[0, 1] print >> out, 'A accept, B reject:', m[1, 0] print >> out, 'A accept, B accept:', m[1, 1] print >> out, data.nerrors, 'tree symmetry errors' return out.getvalue()
def _create_trees(self): """ Create the full tree and the pruned tree. The full tree is a Newick.NewickTree, and the pruned tree is a FelTree.NewickTree object. """ # create the full tree self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree) # create the pruned tree through a temporary tree that will be modified temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree) remove_redundant_nodes(temp_tree) pruned_newick_string = NewickIO.get_newick_string(temp_tree) self.pruned_tree = NewickIO.parse(pruned_newick_string, FelTree.NewickTree)
def get_art(tree): """ @param tree: a FelTree @return: a multi-line ascii art """ newick_string = NewickIO.get_newick_string(tree) simple_tree = NewickIO.parse(newick_string, Newick.NewickTree) drawer = DrawTree.DrawTree() drawer.use_branch_lengths = True drawer.force_ultrametric = False drawer.vertical_spacing = 1 drawer.horizontal_spacing = 1 return drawer.draw(simple_tree)
def get_art(tree): """ @param tree: a FelTree @return: a multi-line ascii art """ newick_string = NewickIO.get_newick_string(tree) simple_tree = NewickIO.parse(newick_string, Newick.NewickTree) drawer = DrawTree.DrawTree() drawer.use_branch_lengths = True drawer.force_ultrametric = False drawer.vertical_spacing = 1 drawer.horizontal_spacing = 1 return drawer.draw(simple_tree)
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ out = StringIO() # get some samples for i in range(fs.ntrees): tree = TreeSampler.sample_tree(fs.leafbase, fs.leafmean, fs.branchmean) # write the tree print >> out, NewickIO.get_newick_string(tree) # return the response return out.getvalue()
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ out = StringIO() # get some samples for i in range(fs.ntrees): tree = TreeSampler.sample_tree(fs.leafbase, fs.leafmean, fs.branchmean) # write the tree print >> out, NewickIO.get_newick_string(tree) # return the response return out.getvalue()
def get_response_content(fs): # read the matrix C = fs.contrast_matrix # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) # validate the input if len(C) != len(ordered_labels): msg_a = 'the number of rows in the contrast matrix ' msg_b = 'should match the number of labels' raise HandlingError(msg_a + msg_b) # reconstruct the tree reconstructed_tree = Contrasts.contrast_matrix_to_tree(C, ordered_labels) # return the reponse return NewickIO.get_newick_string(reconstructed_tree) + '\n'
def _create_trees(self): """ Create the full tree and the pruned tree. The full tree is a Newick.NewickTree, and the pruned tree is a FelTree.NewickTree object. """ # create the full tree self.full_tree = NewickIO.parse(self.newick_string, Newick.NewickTree) # create the pruned tree through a temporary tree that will be modified temp_tree = NewickIO.parse(self.newick_string, Newick.NewickTree) remove_redundant_nodes(temp_tree) pruned_newick_string = NewickIO.get_newick_string(temp_tree) self.pruned_tree = NewickIO.parse(pruned_newick_string, FelTree.NewickTree)
def get_response_content(fs): # read the matrix C = fs.contrast_matrix # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) # validate the input if len(C) != len(ordered_labels): msg_a = 'the number of rows in the contrast matrix ' msg_b = 'should match the number of labels' raise HandlingError(msg_a + msg_b) # reconstruct the tree reconstructed_tree = Contrasts.contrast_matrix_to_tree(C, ordered_labels) # return the reponse return NewickIO.get_newick_string(reconstructed_tree) + '\n'
def __init__(self, tree, epsilon): """ @param tree: a newick tree in the felsenstein-inspired format @param epsilon: determines whether loadings are considered negligible """ # clear some flags that describe events that occur during reconstruction self.is_negligible = False self.is_incomplete = False self.is_conflicting = False # define the trees self.tree = tree self.reconstructed_tree = None # set the threshold for loading negligibility self.epsilon = epsilon # define some arbitrary ordering of tip names self.ordered_names = [node.get_name() for node in tree.gen_tips()] # get the distance matrix with respect to this ordering D = tree.get_distance_matrix(self.ordered_names) # get the Gower doubly centered matrix G = MatrixUtil.double_centered(np.array(D)) # get the eigendecomposition of the Gower matrix eigenvalues, eigenvector_transposes = np.linalg.eigh(G) eigenvectors = eigenvector_transposes.T self.sorted_eigensystem = list(reversed(list(sorted((abs(w), v) for w, v in zip(eigenvalues, eigenvectors))))) # build the tree recursively using the sorted eigensystem indices = set(range(len(self.ordered_names))) try: # try to reconstruct the tree root = self._build_tree(indices, 0) root.set_branch_length(None) output_tree = Newick.NewickTree(root) # convert the tree to the FelTree format newick_string = NewickIO.get_newick_string(output_tree) self.reconstructed_tree = NewickIO.parse( newick_string, FelTree.NewickTree) except NegligibleError: self.is_negligible = True except IncompleteError: self.is_incomplete = True else: # compare the splits defined by the reconstructed tree # to splits in the original tree expected_partitions = TreeComparison.get_nontrivial_partitions( self.tree) observed_partitions = TreeComparison.get_nontrivial_partitions( self.reconstructed_tree) invalid_partitions = observed_partitions - expected_partitions if invalid_partitions: self.is_conflicting = True
def get_response_content(fs): # read the matrix D = fs.matrix if len(D) < 3: raise HandlingError('the matrix should have at least three rows') # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) if len(ordered_labels) != len(D): msg_a = 'the number of ordered labels should be the same ' msg_b = 'as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) # get the newick tree tree = NeighborJoining.make_tree(D.tolist(), ordered_labels) # return the response return NewickIO.get_newick_string(tree) + '\n'
def get_response_content(fs): # read the matrix D = fs.matrix if len(D) < 3: raise HandlingError('the matrix should have at least three rows') # read the ordered labels ordered_labels = Util.get_stripped_lines(fs.labels.splitlines()) if len(ordered_labels) != len(D): msg_a = 'the number of ordered labels should be the same ' msg_b = 'as the number of rows in the matrix' raise HandlingError(msg_a + msg_b) # get the newick tree tree = NeighborJoining.make_tree(D.tolist(), ordered_labels) # return the response return NewickIO.get_newick_string(tree) + '\n'
def main(): """ Run some tree reconstructions from the command line. """ # initialize the simulation objects sims = [ Simulation(Clustering.NeighborJoiningDMS(), 'nj', 'neighbor joining'), Simulation(Clustering.RandomDMS(), 'nj', 'random partitioning'), Simulation(Clustering.StoneExactDMS(), 'nj', 'exact criterion with neighbor joining fallback'), #Simulation(Clustering.StoneExactDMS(), #'halving', 'exact criterion with stem halving fallback'), Simulation(Clustering.StoneSpectralSignDMS(), 'nj', 'spectral sign cut with neighbor joining fallback') #Simulation(Clustering.StoneSpectralSignDMS(), #'halving', 'spectral sign cut with stem halving fallback') ] # define the simulation parameters tree = get_default_original_tree() reconstruction_count = 1000 sequence_length = 100 step_limit_per_method = 10000000 # set the simulation parameters for sim in sims: sim.set_original_tree(get_default_original_tree()) sim.set_reconstruction_count(reconstruction_count) sim.set_step_limit(step_limit_per_method) sim.set_sequence_length(sequence_length) # show the simulation parameters print 'simulation parameters:' print 'original tree:', NewickIO.get_newick_string(tree) print 'reconstruction count:', reconstruction_count print 'sequence length:', sequence_length # run the simulations print 'running the simulations...' for sim in sims: print 'running "%s"...' % sim.description try: sim.run() except HandlingError as e: print 'Error:', e # print the simulation data print 'simulation results:' for sim in sims: print sim.description + ':' print sim.get_histogram_string()
def main(): """ Run some tree reconstructions from the command line. """ # initialize the simulation objects sims = [ Simulation(Clustering.NeighborJoiningDMS(), 'nj', 'neighbor joining'), Simulation(Clustering.RandomDMS(), 'nj', 'random partitioning'), Simulation(Clustering.StoneExactDMS(), 'nj', 'exact criterion with neighbor joining fallback'), #Simulation(Clustering.StoneExactDMS(), #'halving', 'exact criterion with stem halving fallback'), Simulation(Clustering.StoneSpectralSignDMS(), 'nj', 'spectral sign cut with neighbor joining fallback') #Simulation(Clustering.StoneSpectralSignDMS(), #'halving', 'spectral sign cut with stem halving fallback') ] # define the simulation parameters tree = get_default_original_tree() reconstruction_count = 1000 sequence_length = 100 step_limit_per_method = 10000000 # set the simulation parameters for sim in sims: sim.set_original_tree(get_default_original_tree()) sim.set_reconstruction_count(reconstruction_count) sim.set_step_limit(step_limit_per_method) sim.set_sequence_length(sequence_length) # show the simulation parameters print 'simulation parameters:' print 'original tree:', NewickIO.get_newick_string(tree) print 'reconstruction count:', reconstruction_count print 'sequence length:', sequence_length # run the simulations print 'running the simulations...' for sim in sims: print 'running "%s"...' % sim.description try: sim.run() except HandlingError as e: print 'Error:', e # print the simulation data print 'simulation results:' for sim in sims: print sim.description + ':' print sim.get_histogram_string()
def process(nseconds=None): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ # load the tree tree = NewickIO.parse(g_tree_string, FelTree.NewickTree) # get the alphabetically ordered tip names ordered_tip_names = list( sorted(node.get_name() for node in tree.gen_tips())) # initialize the search start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample some random branch lengths sample_branch_lengths(tree) # get the distance matrix D = np.array(tree.get_distance_matrix(ordered_tip_names)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # see if the sign pattern matches for each coordinate for v_observed, v_target in zip(X.T, g_target_sign_patterns): hadamard_product = v_observed * v_target all_positive = all(x > 0 for x in hadamard_product) all_negative = all(x < 0 for x in hadamard_product) if not (all_positive or all_negative): # the target sign pattern was not met break else: # the sign pattern matched for each coordinate so we have a counterexample msg = NewickIO.get_newick_string(tree) raise CounterexampleError(msg) # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def process(nseconds=None): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ # load the tree tree = NewickIO.parse(g_tree_string, FelTree.NewickTree) # get the alphabetically ordered tip names ordered_tip_names = list(sorted(node.get_name() for node in tree.gen_tips())) # initialize the search start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample some random branch lengths sample_branch_lengths(tree) # get the distance matrix D = np.array(tree.get_distance_matrix(ordered_tip_names)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # see if the sign pattern matches for each coordinate for v_observed, v_target in zip(X.T, g_target_sign_patterns): hadamard_product = v_observed * v_target all_positive = all(x>0 for x in hadamard_product) all_negative = all(x<0 for x in hadamard_product) if not (all_positive or all_negative): # the target sign pattern was not met break else: # the sign pattern matched for each coordinate so we have a counterexample msg = NewickIO.get_newick_string(tree) raise CounterexampleError(msg) # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def process(ntaxa): """ @param ntaxa: use this many taxa per tree @return: a multi-line string that summarizes the results """ np.set_printoptions(linewidth=200) # sample an xtree topology xtree = TreeSampler.sample_agglomerated_tree(ntaxa) # convert the xtree to a FelTree, although I guess this might not be necessary tree_string = xtree.get_newick_string() tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered ids and the number of leaves and some auxiliary variables ordered_ids = get_ordered_ids(tree) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) # sample random branch lengths sample_branch_lengths(tree) # get the weighted tree string weighted_tree_string = NewickIO.get_newick_string(tree) # get the distance matrix relating all vertices D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # create a mass vector that sums to one m = np.array([random.randrange(1, 10) for i in range(len(D))], dtype=float) m /= sum(m) # get the S matrix S = edm_to_S(D, m) # get the pseudoinverse of S S_pinv = np.linalg.pinv(S) # make the response out = StringIO() print >> out, 'newick tree:', weighted_tree_string print >> out print >> out, 'm:' print >> out, m print >> out print >> out, 'D:' print >> out, D print >> out print >> out, 'S:' print >> out, S print >> out print >> out, 'pseudoinverse of S:' print >> out, S_pinv print >> out return out.getvalue().strip()
def get_pruned_tree(tree, names_to_remove): """ @param tree: a Newick tree (not a FelTree) @param names_to_remove: a set of names of leaves to remove from the tree @return: a FelTree """ # get the list of tip nodes to remove nodes_to_remove = [node for node in tree.gen_tips() if node.name in names_to_remove] # prune the tree for node in nodes_to_remove: tree.prune(node) # merge segmented branches internal_nodes_to_remove = [node for node in tree.preorder() if node.get_child_count() == 1] for node in internal_nodes_to_remove: tree.remove_node(node) # convert the tree to the FelTree format newick_string = NewickIO.get_newick_string(tree) return NewickIO.parse(newick_string, FelTree.NewickTree)
def get_pruned_tree(tree, names_to_remove): """ @param tree: a Newick tree (not a FelTree) @param names_to_remove: a set of names of leaves to remove from the tree @return: a FelTree """ # get the list of tip nodes to remove nodes_to_remove = [node for node in tree.gen_tips() if node.name in names_to_remove] # prune the tree for node in nodes_to_remove: tree.prune(node) # merge segmented branches internal_nodes_to_remove = [node for node in tree.preorder() if node.get_child_count() == 1] for node in internal_nodes_to_remove: tree.remove_node(node) # convert the tree to the FelTree format newick_string = NewickIO.get_newick_string(tree) return NewickIO.parse(newick_string, FelTree.NewickTree)
def get_root_augmented_distance_matrix(tree_in, first_taxa, second_taxa): """ @param tree_in: a newick tree @param first_taxa: a set of tip names @param second_taxa: another set of tip names @return: a distance matrix """ # first convert the tree to the appropriate data structure tree = NewickIO.parse(NewickIO.get_newick_string(tree_in), FelTree.NewickTree) # now get the ordered ids ordered_ids = [] for taxa in (first_taxa, second_taxa): for node in tree.gen_tips(): if node.get_name() in taxa: ordered_ids.append(id(node)) ordered_ids.append(id(tree.get_root())) # now get the distance matrix return tree.get_partial_distance_matrix(ordered_ids)
def get_distance_matrix(self, ordered_names=None): """ @param ordered_names: the requested order of the names @return: a row major distance matrix """ # map the id of each tip to its index if ordered_names: tip_name_to_index = dict( (name, i) for i, name in enumerate(ordered_names)) tip_id_to_index = dict((id(tip), tip_name_to_index[tip.name]) for tip in self.gen_tips()) else: tip_id_to_index = dict( (id(tip), i) for i, tip in enumerate(self.gen_tips())) # get the number of tips n = len(list(self.gen_tips())) # for each tip get the distance to each other tip distance_matrix = [[0] * n for i in range(n)] for tip in self.gen_tips(): row = distance_matrix[tip_id_to_index[id(tip)]] stack = [] for directed_branch in tip.gen_directed_branches(): next_target = directed_branch.get_target() assert next_target stack.append( (tip, next_target, directed_branch.get_undirected_branch(). get_branch_length())) while stack: source, target, distance = stack.pop() if target.is_tip(): row[tip_id_to_index[id(target)]] = distance else: for next_branch in target.gen_exits(source): branch_length = next_branch.get_undirected_branch( ).get_branch_length() next_target = next_branch.get_target() assert next_target, NewickIO.get_newick_string(self) stack.append( (target, next_target, distance + branch_length)) return distance_matrix
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(fs.trees.splitlines()): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError('expected at least four tips ' 'but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # create the response out = StringIO() same_count = 0 diff_count = 0 for tree in trees: # make the local paragraph that will be shown if there is an event local_out = StringIO() has_event = False # print the tree print >> local_out, NewickIO.get_newick_string(tree) # get the tip nodes and the internal nodes tip_nodes = [] internal_nodes = [] for node in tree.preorder(): if node.is_tip(): tip_nodes.append(node) else: internal_nodes.append(node) all_nodes = tip_nodes + internal_nodes # get all tip name partitions implied by the tree topology valid_partitions = TreeComparison.get_partitions(tree) # get results from the augmented distance matrix D_full = tree.get_partial_distance_matrix( [id(node) for node in all_nodes]) y_full = get_vector(D_full).tolist() y = y_full[:len(tip_nodes)] name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_a = frozenset((name_selection, name_complement)) if name_partition_a not in valid_partitions: print >> local_out, 'augmented distance matrix split fail:', print >> local_out, name_partition_a has_event = True # get results from the not-augmented distance matrix D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_b = frozenset((name_selection, name_complement)) if name_partition_b not in valid_partitions: print >> local_out, 'not-augmented distance matrix split fail:', print >> local_out, name_partition_b has_event = True # compare the name partitions if name_partition_a == name_partition_b: same_count += 1 else: diff_count += 1 print >> local_out, 'this tree was split differently ' print >> local_out, 'by the different methods:' print >> local_out, 'augmented distance matrix split:', print >> local_out, name_partition_a print >> local_out, 'not-augmented distance matrix split:', print >> local_out, name_partition_b has_event = True # print a newline between trees if has_event: print >> out, local_out.getvalue() # write the summary print >> out, 'for this many trees the same split was found:', print >> out, same_count print >> out, 'for this many trees different splits were found:', print >> out, diff_count # write the response return out.getvalue()
def get_response_content(fs): # get the newick trees. trees = [] for tree_string in iterutils.stripped_lines(fs.trees.splitlines()): # parse each tree and make sure that it conforms to various requirements tree = NewickIO.parse(tree_string, FelTree.NewickTree) tip_names = [tip.get_name() for tip in tree.gen_tips()] if len(tip_names) < 4: raise HandlingError( 'expected at least four tips ' 'but found ' + str(len(tip_names))) if any(name is None for name in tip_names): raise HandlingError('each terminal node must be labeled') if len(set(tip_names)) != len(tip_names): raise HandlingError('each terminal node label must be unique') trees.append(tree) # create the response out = StringIO() same_count = 0 diff_count = 0 for tree in trees: # make the local paragraph that will be shown if there is an event local_out = StringIO() has_event = False # print the tree print >> local_out, NewickIO.get_newick_string(tree) # get the tip nodes and the internal nodes tip_nodes = [] internal_nodes = [] for node in tree.preorder(): if node.is_tip(): tip_nodes.append(node) else: internal_nodes.append(node) all_nodes = tip_nodes + internal_nodes # get all tip name partitions implied by the tree topology valid_partitions = TreeComparison.get_partitions(tree) # get results from the augmented distance matrix D_full = tree.get_partial_distance_matrix( [id(node) for node in all_nodes]) y_full = get_vector(D_full).tolist() y = y_full[:len(tip_nodes)] name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_a = frozenset((name_selection, name_complement)) if name_partition_a not in valid_partitions: print >> local_out, 'augmented distance matrix split fail:', print >> local_out, name_partition_a has_event = True # get results from the not-augmented distance matrix D = tree.get_partial_distance_matrix([id(node) for node in tip_nodes]) y = get_vector(D).tolist() name_selection = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem > 0) name_complement = frozenset(node.get_name() for node, elem in zip(tip_nodes, y) if elem <= 0) name_partition_b = frozenset((name_selection, name_complement)) if name_partition_b not in valid_partitions: print >> local_out, 'not-augmented distance matrix split fail:', print >> local_out, name_partition_b has_event = True # compare the name partitions if name_partition_a == name_partition_b: same_count += 1 else: diff_count += 1 print >> local_out, 'this tree was split differently ' print >> local_out, 'by the different methods:' print >> local_out, 'augmented distance matrix split:', print >> local_out, name_partition_a print >> local_out, 'not-augmented distance matrix split:', print >> local_out, name_partition_b has_event = True # print a newline between trees if has_event: print >> out, local_out.getvalue() # write the summary print >> out, 'for this many trees the same split was found:', print >> out, same_count print >> out, 'for this many trees different splits were found:', print >> out, diff_count # write the response return out.getvalue()
def process(ntaxa, nseconds): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 pattern_to_topo_surrogate = {} pattern_to_tree_string = {} counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample an xtree topology xtree = TreeSampler.sample_agglomerated_tree(ntaxa) # convert the xtree to a FelTree, although I guess this might not be necessary tree_string = xtree.get_newick_string() tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered ids and the number of leaves and some auxiliary variables ordered_ids = get_ordered_ids(tree) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) # force every branch length to be the unit length reset_branch_lengths(tree) # get the unweighted distance matrix among tips in convenient hashable form D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids)) topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit) # sample random branch lengths sample_branch_lengths(tree) # get the weighted tree string weighted_tree_string = NewickIO.get_newick_string(tree) # get the distance matrix relating the leaves D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # do an orthogonal transformation that puts the first point in the positive orthant canonizing_vector = np.array(point_to_orthant(X[0])) X *= canonizing_vector # get the canonical sign pattern sign_pattern = tuple(point_to_orthant(row) for row in X) # compare the topo surrogate of this sign pattern to the one in memory expected_topo_surrogate = pattern_to_topo_surrogate.get( sign_pattern, None) if expected_topo_surrogate: if topo_surrogate != expected_topo_surrogate: remembered_tree_string = pattern_to_tree_string[ sign_pattern] msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % ( weighted_tree_string, remembered_tree_string) raise CounterexampleError(msg) else: pattern_to_topo_surrogate[sign_pattern] = topo_surrogate pattern_to_tree_string[sign_pattern] = weighted_tree_string # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def process(ntaxa, nseconds): """ @param nseconds: allow this many seconds to run or None to run forever @return: a multi-line string that summarizes the results """ start_time = time.time() nsamples_rejected = 0 nsamples_accepted = 0 pattern_to_topo_surrogate = {} pattern_to_tree_string = {} counterexample_message = 'no counterexample was found' try: while True: elapsed_time = time.time() - start_time if nseconds and elapsed_time > nseconds: break # sample an xtree topology xtree = TreeSampler.sample_agglomerated_tree(ntaxa) # convert the xtree to a FelTree, although I guess this might not be necessary tree_string = xtree.get_newick_string() tree = NewickIO.parse(tree_string, FelTree.NewickTree) # get ordered ids and the number of leaves and some auxiliary variables ordered_ids = get_ordered_ids(tree) nleaves = len(list(tree.gen_tips())) id_to_index = dict((myid, i) for i, myid in enumerate(ordered_ids)) # force every branch length to be the unit length reset_branch_lengths(tree) # get the unweighted distance matrix among tips in convenient hashable form D_unit = np.array(tree.get_partial_distance_matrix(ordered_ids)) topo_surrogate = tuple(tuple(row.tolist()) for row in D_unit) # sample random branch lengths sample_branch_lengths(tree) # get the weighted tree string weighted_tree_string = NewickIO.get_newick_string(tree) # get the distance matrix relating the leaves D = np.array(tree.get_partial_distance_matrix(ordered_ids)) # get the projections onto the MDS axes of the leaves X = Euclid.edm_to_points(D) # if any coordinate is near zero then reject the sample if np.min(np.abs(X)) < g_epsilon: nsamples_rejected += 1 continue # do an orthogonal transformation that puts the first point in the positive orthant canonizing_vector = np.array(point_to_orthant(X[0])) X *= canonizing_vector # get the canonical sign pattern sign_pattern = tuple(point_to_orthant(row) for row in X) # compare the topo surrogate of this sign pattern to the one in memory expected_topo_surrogate = pattern_to_topo_surrogate.get(sign_pattern, None) if expected_topo_surrogate: if topo_surrogate != expected_topo_surrogate: remembered_tree_string = pattern_to_tree_string[sign_pattern] msg = 'these trees have the same sign pattern but different topologies: {%s, %s}' % (weighted_tree_string, remembered_tree_string) raise CounterexampleError(msg) else: pattern_to_topo_surrogate[sign_pattern] = topo_surrogate pattern_to_tree_string[sign_pattern] = weighted_tree_string # increment the count of accepted samples nsamples_accepted += 1 except KeyboardInterrupt, e: pass
def get_response_content(fs): # read the criterion string, creating the splitter object if fs.exact: splitter = Clustering.StoneExactDMS() elif fs.sign: splitter = Clustering.StoneSpectralSignDMS() elif fs.threshold: splitter = Clustering.StoneSpectralThresholdDMS() elif fs.nj: splitter = Clustering.NeighborJoiningDMS() elif fs.random: splitter = Clustering.RandomDMS() # read the original tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # Make sure that the splitter object is appropriate for the number # of taxa and the number of tree reconstructions. ntaxa = len(list(tree.gen_tips())) if splitter.get_complexity(ntaxa) * fs.iterations > 1000000: msg_a = 'use a faster bipartition function, fewer taxa, ' msg_b = 'or fewer tree reconstructions' raise HandlingError(msg_a + msg_b) # sample a bunch of sequences ordered_names = [node.name for node in tree.gen_tips()] sampler = DMSampler(tree, ordered_names, fs.length) # simulate a bunch of distance matrices and reconstruct the trees mismatch_count_tree_pairs = [] error_count_histogram = {} max_steps = 1000000 for sequence_list, distance_matrix in sampler.gen_distance_matrices( fs.iterations, max_steps): # create the tree builder tree_builder = NeighborhoodJoining.ValidatingTreeBuilder( distance_matrix, ordered_names, splitter) # Read the recourse string and set the corresponding method # in the tree builder. if fs.njrecourse: tree_builder.set_fallback_name('nj') elif fs.halvingrecourse: tree_builder.set_fallback_name('halving') # set parameters of the tree validating tree builder tree_builder.set_original_tree(tree) # build the tree reconstructed_tree = tree_builder.build() # note the number of partition errors during the reconstruction mismatch_count = tree_builder.get_mismatch_count() if mismatch_count not in error_count_histogram: error_count_histogram[mismatch_count] = 0 error_count_histogram[mismatch_count] += 1 # If we are saving the reconstructed trees # then remove branch lengths and add to the tree list. if fs.showtrees: for node in reconstructed_tree.preorder(): node.set_branch_length(None) mismatch_count_tree_pair = (mismatch_count, reconstructed_tree) mismatch_count_tree_pairs.append(mismatch_count_tree_pair) # See if we bailed early because # the sampling was predicted to take too long. if sampler.accepted_sample_count < fs.iterations: raise HandlingError(sampler.get_sampling_error_message()) # define the response out = StringIO() print >> out, 'partition error count frequencies:' max_mismatch_count = max(error_count_histogram) for i in range(max_mismatch_count + 1): frequency = error_count_histogram.get(i, 0) print >> out, i, ':', frequency if fs.showtrees: print >> out, '' print >> out, 'reconstructed tree topologies with mismatch counts:' for mismatch_count, tree in sorted(mismatch_count_tree_pairs): print >> out, NewickIO.get_newick_string(tree), mismatch_count # return the response return out.getvalue()
def get_response_content(fs): # read the criterion string, creating the splitter object if fs.exact: splitter = Clustering.StoneExactDMS() elif fs.sign: splitter = Clustering.StoneSpectralSignDMS() elif fs.threshold: splitter = Clustering.StoneSpectralThresholdDMS() elif fs.nj: splitter = Clustering.NeighborJoiningDMS() elif fs.random: splitter = Clustering.RandomDMS() # read the original tree tree = NewickIO.parse(fs.tree, FelTree.NewickTree) # Make sure that the splitter object is appropriate for the number # of taxa and the number of tree reconstructions. ntaxa = len(list(tree.gen_tips())) if splitter.get_complexity(ntaxa) * fs.iterations > 1000000: msg_a = 'use a faster bipartition function, fewer taxa, ' msg_b = 'or fewer tree reconstructions' raise HandlingError(msg_a + msg_b) # sample a bunch of sequences ordered_names = [node.name for node in tree.gen_tips()] sampler = DMSampler(tree, ordered_names, fs.length) # simulate a bunch of distance matrices and reconstruct the trees mismatch_count_tree_pairs = [] error_count_histogram = {} max_steps = 1000000 for sequence_list, distance_matrix in sampler.gen_distance_matrices( fs.iterations, max_steps): # create the tree builder tree_builder = NeighborhoodJoining.ValidatingTreeBuilder( distance_matrix, ordered_names, splitter) # Read the recourse string and set the corresponding method # in the tree builder. if fs.njrecourse: tree_builder.set_fallback_name('nj') elif fs.halvingrecourse: tree_builder.set_fallback_name('halving') # set parameters of the tree validating tree builder tree_builder.set_original_tree(tree) # build the tree reconstructed_tree = tree_builder.build() # note the number of partition errors during the reconstruction mismatch_count = tree_builder.get_mismatch_count() if mismatch_count not in error_count_histogram: error_count_histogram[mismatch_count] = 0 error_count_histogram[mismatch_count] += 1 # If we are saving the reconstructed trees # then remove branch lengths and add to the tree list. if fs.showtrees: for node in reconstructed_tree.preorder(): node.set_branch_length(None) mismatch_count_tree_pair = (mismatch_count, reconstructed_tree) mismatch_count_tree_pairs.append(mismatch_count_tree_pair) # See if we bailed early because # the sampling was predicted to take too long. if sampler.accepted_sample_count < fs.iterations: raise HandlingError(sampler.get_sampling_error_message()) # define the response out = StringIO() print >> out, 'partition error count frequencies:' max_mismatch_count = max(error_count_histogram) for i in range(max_mismatch_count + 1): frequency = error_count_histogram.get(i, 0) print >> out, i, ':', frequency if fs.showtrees: print >> out, '' print >> out, 'reconstructed tree topologies with mismatch counts:' for mismatch_count, tree in sorted(mismatch_count_tree_pairs): print >> out, NewickIO.get_newick_string(tree), mismatch_count # return the response return out.getvalue()
def get_response_content(fs): # read the values from the form subtree_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree) taxa_a1 = Util.get_stripped_lines(StringIO(fs.taxa_a1)) taxa_a2 = Util.get_stripped_lines(StringIO(fs.taxa_a2)) subtree_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree) taxa_b1 = Util.get_stripped_lines(StringIO(fs.taxa_b1)) taxa_b2 = Util.get_stripped_lines(StringIO(fs.taxa_b2)) connecting_branch_length = fs.blen # assert that no group of taxa contains duplicates for taxa in (taxa_a1, taxa_a2, taxa_b1, taxa_b2): if len(set(taxa)) != len(taxa): raise HandlingError('one of the lists of taxa contains duplicates') # assert that each subtree has at least two tips and no duplicates for tree in (subtree_a, subtree_b): tip_names = list(node.get_name() for node in tree.gen_tips()) if len(tip_names) < 2: raise HandlingError('each subtree should have at least two tips') if len(set(tip_names)) != len(tip_names): raise HandlingError('a subtree has duplicate tip names') # assert that the partitions are valid first_group = ('A', subtree_a, taxa_a1, taxa_a2) second_group = ('B', subtree_b, taxa_b1, taxa_b2) for tree_name, tree, taxa_1, taxa_2 in (first_group, second_group): tip_names = set(node.get_name() for node in tree.gen_tips()) for group_name, taxa in (('1', taxa_1), ('2', taxa_2)): nonsense_names = list(set(taxa) - set(tip_names)) msg_a = 'the following taxa in group %s ' % group_name msg_b = 'of subtree %s ' % tree_name msg_c = 'are not valid tips: %s' % str(nonsense_names) message = msg_a + msg_b + msg_c if nonsense_names: raise HandlingError(message) if set(taxa_1) & set(taxa_2): msg_a = 'the taxon lists for subtree %s ' % tree_name msg_b = 'are not disjoint' raise HandlingError(msg_a + msg_b) if set(taxa_1) | set(taxa_2) < tip_names: msg_a = 'a tip in subtree %s ' % tree_name msg_b = 'is not represented in either of the groups' raise HandlingError(msg_a + msg_b) # define the response out = StringIO() # get the results for the first method do_first_method(subtree_a, subtree_b, taxa_a1, taxa_a2, taxa_b1, taxa_b2, connecting_branch_length, out) # define the entire tree by connecting the subtrees subtree_b.get_root().set_branch_length(connecting_branch_length) subtree_a.get_root().add_child(subtree_b.get_root()) tree = subtree_a # define the order and structure of the distance matrix block_structure = get_block_structure(taxa_a1, taxa_a2, taxa_b1, taxa_b2) name_order = taxa_a1 + taxa_a2 + taxa_b1 + taxa_b2 # get the distance matrix fel_tree = NewickIO.parse(NewickIO.get_newick_string(tree), FelTree.NewickTree) D = fel_tree.get_distance_matrix(name_order) # get the R matrix R = Clustering.get_R_balaji(D) # get the sums of block elements of R block_R = [[0]*4 for i in range(4)] for i, block_i in enumerate(block_structure): for j, block_j in enumerate(block_structure): block_R[block_i][block_j] += R[i][j] # show the results from the second method do_second_method(fel_tree, taxa_a1, taxa_a2, taxa_b1, taxa_b2, out) # show the results from the third method tree_m3_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree) tree_m3_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree) for t in (tree_m3_a, tree_m3_b): neo = Newick.NewickNode() neo.name = 'special' neo.blen = connecting_branch_length / 2 t.get_root().add_child(neo) feltree_m3_a = NewickIO.parse(NewickIO.get_newick_string(tree_m3_a), FelTree.NewickTree) feltree_m3_b = NewickIO.parse(NewickIO.get_newick_string(tree_m3_b), FelTree.NewickTree) tree_m3_a = NewickIO.parse(fs.subtree_a, Newick.NewickTree) tree_m3_b = NewickIO.parse(fs.subtree_b, Newick.NewickTree) new_root = Newick.NewickNode() tree_m3_a.get_root().blen = connecting_branch_length / 2 tree_m3_b.get_root().blen = connecting_branch_length / 2 new_root.add_child(tree_m3_a.get_root()) new_root.add_child(tree_m3_b.get_root()) tree_m3 = Newick.NewickTree(new_root) feltree_m3 = NewickIO.parse(NewickIO.get_newick_string(tree_m3), FelTree.NewickTree) branch_d2 = connecting_branch_length / 2 do_third_method(feltree_m3_a, feltree_m3_b, feltree_m3, branch_d2, taxa_a1, taxa_a2, taxa_b1, taxa_b2, out) # show the expected results print >> out, 'M:' print >> out, MatrixUtil.m_to_string(R) print >> out, 'M summed within blocks:' print >> out, MatrixUtil.m_to_string(block_R) # return the response return out.getvalue()