def sample_b_to_rate(R): """ The b in this function name means branch. @param R: directed topology @return: a sampled map from vertex to expected rate """ b_to_rate = {} v_to_source = Ftree.R_to_v_to_source(R) for v in Ftree.R_to_preorder(R): p = v_to_source.get(v, None) if p is None: continue # sample a coefficient regardless of whether we use it # this is an obsolete method #log_coeff = (random.random() - 0.5) * epsrate #coeff = math.exp(log_coeff) curr_branch = frozenset([v, p]) gp = v_to_source.get(p, None) if gp is None: parent_rate = 1.0 else: prev_branch = frozenset([p, gp]) parent_rate = b_to_rate[prev_branch] b_to_rate[curr_branch] = random.expovariate(1/parent_rate) return b_to_rate
def equal_arc_layout(T, B): """ @param T: tree topology @param B: branch lengths @return: a map from vertex to location """ # arbitrarily root the tree R = Ftree.T_to_R_canonical(T) r = Ftree.R_to_root(R) # map vertices to subtree tip count v_to_sinks = Ftree.R_to_v_to_sinks(R) v_to_count = {} for v in Ftree.R_to_postorder(R): sinks = v_to_sinks.get(v, []) if sinks: v_to_count[v] = sum(v_to_count[sink] for sink in sinks) else: v_to_count[v] = 1 # create the equal arc angles v_to_theta = {} _force_equal_arcs( v_to_sinks, v_to_count, v_to_theta, r, -math.pi, math.pi) # convert angles to coordinates v_to_source = Ftree.R_to_v_to_source(R) v_to_location = {} _update_locations( R, B, v_to_source, v_to_sinks, v_to_theta, v_to_location, r, (0, 0), 0) return v_to_location
def RB_to_newick(R, B): """ @param R: a directed topology @param B: branch lengths @return: a newick string """ r = Ftree.R_to_root(R) v_to_source = Ftree.R_to_v_to_source(R) v_to_sinks = Ftree.R_to_v_to_sinks(R) return _Bv_to_newick(v_to_source, v_to_sinks, B, r) + ';'
def RBN_to_newick(R, B, N): """ @param R: a directed topology @param B: branch lengths @param N: map from vertices to names @return: a newick string """ r = Ftree.R_to_root(R) v_to_source = Ftree.R_to_v_to_source(R) v_to_sinks = Ftree.R_to_v_to_sinks(R) return _BNv_to_newick(v_to_source, v_to_sinks, B, N, r) + ';'
def get_paths_to_root(R): sources, sinks = zip(*R) leaves = set(sinks) - set(sources) v_to_source = Ftree.R_to_v_to_source(R) paths_to_root = [] for v in leaves: path = [v] while path[-1] in v_to_source: path.append(v_to_source[path[-1]]) paths_to_root.append(path) return paths_to_root
def RB_to_v_to_age(R, B): """ @param R: directed topology @param B: branch lengths in time units @return: map from vertex to age """ sources, sinks = zip(*R) leaves = set(sinks) - set(sources) v_to_age = dict((v, 0) for v in leaves) v_to_source = Ftree.R_to_v_to_source(R) for v in Ftree.R_to_postorder(R): p = v_to_source.get(v, None) if p is not None: v_to_age[p] = v_to_age[v] + B[frozenset([v, p])] return v_to_age
def get_correlation(R, b_to_rate): """ This tries to exactly replicate the BEAST statistic. """ X = [] Y = [] v_to_source = Ftree.R_to_v_to_source(R) for p, v in R: gp = v_to_source.get(p, None) if gp is not None: X.append(b_to_rate[frozenset([gp, p])]) Y.append(b_to_rate[frozenset([p, v])]) xbar = sum(X) / len(X) ybar = sum(Y) / len(Y) xvar = sum((x - xbar)**2 for x in X) / (len(X) - 1) yvar = sum((y - ybar)**2 for y in Y) / (len(Y) - 1) xstd = math.sqrt(xvar) ystd = math.sqrt(yvar) xycorr_num = sum((x - xbar) * (y - ybar) for x, y in zip(X, Y)) xycorr_den = xstd * ystd * len(zip(X, Y)) xycorr = xycorr_num / xycorr_den return xycorr
def sample_jc_column(R, B): """ Sample a column of a Jukes-Cantor alignment. @param R: Ftree directed topology @param B: branch lengths in expected number of substitutions @return: a map from vertex to nucleotide """ acgt = 'ACGT' v_to_nt = {} v_to_source = Ftree.R_to_v_to_source(R) for v in Ftree.R_to_preorder(R): p = v_to_source.get(v, None) if p is None: v_to_nt[v] = random.choice(acgt) else: d = B[frozenset([v, p])] p_randomize = 1.0 - math.exp(-(4.0 / 3.0) * d) if random.random() < p_randomize: v_to_nt[v] = random.choice(acgt) else: v_to_nt[v] = v_to_nt[p] return v_to_nt
def set_root(self, v): """ This is slow, probably as a result of the design. """ self.R = Ftree.T_to_R_specific(Ftree.R_to_T(self.R), v) self.v_to_source = Ftree.R_to_v_to_source(self.R)