def newick_to_TB(s, name_type=None): """ Vertex names are used as vertices. This is mostly for testing. @param s: newick string @return: undirected topology, branch lengths """ T, B, N = newick_to_TBN(s) N = get_validated_name_map(N, name_type) T = set(mkedge(N[a], N[b]) for a, b in T) B = dict((mkedge(N[a], N[b]), x) for (a, b), x in B.items()) return T, B
def _BNv_to_newick(v_to_source, v_to_sinks, B, N, v): """ This is part of writing a newick string with branch lengths. Note that this function does not add the semicolon termination. @param v_to_source: a map from a vertex to its source @param v_to_sinks: a map from a vertex to a set of sinks @param B: branch lengths @param N: map from vertices to names @param v: a subtree root vertex @return: a chunk of a newick string """ if v in v_to_source: # a vertex that has a source should record its distance to its source blen = B[mkedge(v, v_to_source[v])] suffix = ':' + str(blen) else: suffix = '' if v not in v_to_sinks: if v not in N: raise ValueError('expected leaf vertices to be named') return str(N[v]) + suffix sinks = sorted(v_to_sinks[v]) arr = [_BNv_to_newick(v_to_source, v_to_sinks, B, N, x) for x in sinks] internal_vertex_name = str(N.get(v, '')) return '(' + ', '.join(arr) + ')' + internal_vertex_name + suffix
def set_branch_length(self, v, blen): """ Note that a branch length can be set to a root. This happens during the construction of the tree when the subtree has not yet been connected to the rest of the tree. """ if v in self.v_to_source: edge = mkedge(v, self.v_to_source[v]) self.B[edge] = blen else: self.v_to_hanging_length[v] = blen
def newick_to_T(s, name_type=None): """ Everything to do with branch lengths is ignored. Vertex names are used as vertices. This is mostly for testing. @param s: newick string @return: undirected topology """ T, N = newick_to_TN(s) N = get_validated_name_map(N, name_type) T = set(mkedge(N[a], N[b]) for a, b in T) return T
def _Bv_to_newick(v_to_source, v_to_sinks, B, v): """ This is part of writing a newick string with branch lengths. Note that this function does not add the semicolon termination. @param v_to_source: a map from a vertex to its source @param v_to_sinks: a map from a vertex to a set of sinks @param B: branch lengths @param v: a subtree root vertex @return: a chunk of a newick string """ if v in v_to_source: # a vertex that has a source should record its distance to its source blen = B[mkedge(v, v_to_source[v])] suffix = ':' + str(blen) else: suffix = '' if v not in v_to_sinks: return str(v) + suffix sinks = sorted(v_to_sinks[v]) arr = [_Bv_to_newick(v_to_source, v_to_sinks, B, x) for x in sinks] return '(' + ', '.join(arr) + ')' + str(v) + suffix
if name_type: N = dict((v, name_type(n)) for v, n in N.items()) nvertices = len(N) names = N.values() if any(n is None for n in names): raise ValueError( 'expected a name for each vertex, including internal vertices') if len(set(names)) < nvertices: raise ValueError('expected unique vertex names') return N # Testing g_example_T = set([ mkedge(2,1), mkedge(2,3), mkedge(2,4), mkedge(3,5), mkedge(3,6), mkedge(6,7)]) g_example_B = { mkedge(2,1) : 1, mkedge(2,3) : 2, mkedge(2,4) : 2, mkedge(3,5) : 3, mkedge(3,6) : 3, mkedge(6,7) : 3} class TestFtreeIO(unittest.TestCase):