def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Construct a parent-independent process # with the same max rate and stationary distribution # as the sampled process. #max_rate = max(-np.diag(R)) #expected_rate = np.dot(v, -np.diag(R)) #logical_entropy = np.dot(v, 1-v) #randomization_rate = expected_rate / logical_entropy Q = self.simplification(R) #Q = np.outer(np.ones(self.nstates), v) #Q -= np.diag(np.sum(Q, axis=1)) #Q *= max(np.diag(R) / np.diag(Q)) # sample a random time t = random.expovariate(1) # Check that the mutual information of the # parent independent process is smaller. mi_R = ctmcmi.get_expected_ll_ratio(R, t) mi_Q = ctmcmi.get_expected_ll_ratio(Q, t) if np.allclose(mi_R, mi_Q): self.n_too_close += 1 return False if mi_R < mi_Q: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'parent independent process Q:' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'mutual information of sampled process:', mi_R print >> out, 'mutual information of p.i. process:', mi_Q print >> out self.counterexample = out.getvalue().rstrip() return True
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:, np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Construct a parent-independent process # with the same max rate and stationary distribution # as the sampled process. #max_rate = max(-np.diag(R)) #expected_rate = np.dot(v, -np.diag(R)) #logical_entropy = np.dot(v, 1-v) #randomization_rate = expected_rate / logical_entropy Q = self.simplification(R) #Q = np.outer(np.ones(self.nstates), v) #Q -= np.diag(np.sum(Q, axis=1)) #Q *= max(np.diag(R) / np.diag(Q)) # sample a random time t = random.expovariate(1) # Check that the mutual information of the # parent independent process is smaller. mi_R = ctmcmi.get_expected_ll_ratio(R, t) mi_Q = ctmcmi.get_expected_ll_ratio(Q, t) if np.allclose(mi_R, mi_Q): self.n_too_close += 1 return False if mi_R < mi_Q: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'parent independent process Q:' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'mutual information of sampled process:', mi_R print >> out, 'mutual information of p.i. process:', mi_Q print >> out self.counterexample = out.getvalue().rstrip() return True
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # sample a random time rate = 1.0 / self.etime t = random.expovariate(rate) # sample one side of the bipartition and get the mutual information k = random.randrange(1, self.nstates) A = random.sample(range(self.nstates), k) mi_non_markov = get_mutual_information(R, A, t) # get summary statistics of the non-markov process Q = msimpl.get_fast_two_state(R, A) mi_markov = ctmcmi.get_expected_ll_ratio(Q, t) # check if the mutual informations are indistinguishable if np.allclose(mi_non_markov, mi_markov): self.n_too_close += 1 return False if mi_non_markov < mi_markov: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'reduced rate matrix Q' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'non-markov mutual information:', mi_non_markov print >> out, 'markov mutual information:', mi_markov print >> out self.counterexample = out.getvalue().rstrip() return True
def __call__(self): """ Look for a counterexample. """ # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(self.nstates) v = mrate.sample_distn(self.nstates) R = (v**-0.5)[:, np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # sample a random time rate = 1.0 / self.etime t = random.expovariate(rate) # sample one side of the bipartition and get the mutual information k = random.randrange(1, self.nstates) A = random.sample(range(self.nstates), k) mi_non_markov = get_mutual_information(R, A, t) # get summary statistics of the non-markov process Q = msimpl.get_fast_two_state(R, A) mi_markov = ctmcmi.get_expected_ll_ratio(Q, t) # check if the mutual informations are indistinguishable if np.allclose(mi_non_markov, mi_markov): self.n_too_close += 1 return False if mi_non_markov < mi_markov: out = StringIO() print >> out, 'found a counterexample' print >> out print >> out, 'sampled symmetric matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'reduced rate matrix Q' print >> out, Q print >> out print >> out, 'sampled time t:', t print >> out print >> out, 'non-markov mutual information:', mi_non_markov print >> out, 'markov mutual information:', mi_markov print >> out self.counterexample = out.getvalue().rstrip() return True
def make_table(args, distn_modes): """ Make outputs to pass to RUtil.get_table_string. @param args: user args @param distn_modes: ordered distribution modes @return: matrix, headers """ # define some variables t_low = args.t_low t_high = args.t_high if t_high <= t_low: raise ValueError('low time must be smaller than high time') ntimes = 100 incr = (t_high - t_low) / (ntimes - 1) n = args.nstates # define some tables distn_mode_to_f = { UNIFORM : get_distn_uniform, ONE_INC : get_distn_one_inc, TWO_INC : get_distn_two_inc, ONE_DEC : get_distn_one_dec, TWO_DEC : get_distn_two_dec} selection_mode_to_f = { BALANCED : mrate.to_gtr_balanced, HALPERN_BRUNO : mrate.to_gtr_halpern_bruno} # define the selection modes and calculators selection_f = selection_mode_to_f[args.selection] distn_fs = [distn_mode_to_f[m] for m in distn_modes] # define the headers headers = ['t'] + [s.replace('_', '.') for s in distn_modes] # define the numbers in the table S = np.ones((n, n), dtype=float) S -= np.diag(np.sum(S, axis=1)) arr = [] for i in range(ntimes): t = t_low + i * incr row = [t] for distn_f in distn_fs: v = distn_f(n, args.sel_surr) R = selection_f(S, v) expected_log_ll_ratio = ctmcmi.get_expected_ll_ratio(R, t) row.append(expected_log_ll_ratio) arr.append(row) return np.array(arr), headers
def make_table(args, distn_modes): """ Make outputs to pass to RUtil.get_table_string. @param args: user args @param distn_modes: ordered distribution modes @return: matrix, headers """ # define some variables t_low = args.t_low t_high = args.t_high if t_high <= t_low: raise ValueError("low time must be smaller than high time") ntimes = 100 incr = (t_high - t_low) / (ntimes - 1) n = args.nstates # define some tables distn_mode_to_f = { UNIFORM: get_distn_uniform, ONE_INC: get_distn_one_inc, TWO_INC: get_distn_two_inc, ONE_DEC: get_distn_one_dec, TWO_DEC: get_distn_two_dec, } selection_mode_to_f = {BALANCED: mrate.to_gtr_balanced, HALPERN_BRUNO: mrate.to_gtr_halpern_bruno} # define the selection modes and calculators selection_f = selection_mode_to_f[args.selection] distn_fs = [distn_mode_to_f[m] for m in distn_modes] # define the headers headers = ["t"] + [s.replace("_", ".") for s in distn_modes] # define the numbers in the table S = np.ones((n, n), dtype=float) S -= np.diag(np.sum(S, axis=1)) arr = [] for i in range(ntimes): t = t_low + i * incr row = [t] for distn_f in distn_fs: v = distn_f(n, args.sel_surr) R = selection_f(S, v) expected_log_ll_ratio = ctmcmi.get_expected_ll_ratio(R, t) row.append(expected_log_ll_ratio) arr.append(row) return np.array(arr), headers
def get_response_content(fs): np.set_printoptions(linewidth=200) out = StringIO() R_jc = get_jc_rate_matrix() t = 0.1 x = 1.6 w = 0.5 * log(x) v = x_to_distn(x) R_hb_easy = mrate.to_gtr_halpern_bruno(R_jc, v) y, z, = mrate.x_to_halpern_bruno_yz(x) yz_ratio = y / z R_hb_tedious = get_mut_sel_rate_matrix(y, z) P_hb_easy = get_trans_mat_expm(R_hb_easy, t) P_hb_tedious = get_trans_mat_tediously(y, z, t) P_hb_tedious_c = get_trans_mat_tediously_c(y, z, t) P_hb_from_x = get_trans_mat_from_x(x, t) e_ll_jc = ctmcmi.get_expected_ll_ratio(R_jc, t) e_ll_jc_tedious = get_jc_e_ll(t) e_ll_hb = ctmcmi.get_expected_ll_ratio(R_hb_easy, t) e_ll_hb_from_x = get_e_ll_from_x(x, t) e_ll_hb_from_x_b = get_e_ll_from_x_b(x, t) e_ll_hb_from_x_htrig = get_e_ll_from_x_htrig(x, t) # print some values print >> out, 'Jukes-Cantor mutation matrix:' print >> out, R_jc print >> out print >> out, 'ratio of common to uncommon probabilities:' print >> out, x print >> out print >> out, '1/2 log ratio:' print >> out, w print >> out print >> out, 'fast rate:' print >> out, y print >> out print >> out, 'slow rate:' print >> out, z print >> out print >> out, 'reciprocal of fast rate:' print >> out, 1.0 / y print >> out print >> out, 'ratio of fast to slow rates (should be x):' print >> out, yz_ratio print >> out print >> out, 'mutation-selection rate matrix (easy):' print >> out, R_hb_easy print >> out print >> out, 'mutation-selection rate matrix (tedious):' print >> out, R_hb_tedious print >> out print >> out, 'time:' print >> out, t print >> out print >> out, 'mutation-selection transition matrix (easy):' print >> out, P_hb_easy print >> out print >> out, 'mutation-selection transition matrix (tedious):' print >> out, P_hb_tedious print >> out print >> out, 'mutation-selection transition matrix (tedious c):' print >> out, P_hb_tedious_c print >> out print >> out, 'mutation-selection transition matrix (from x):' print >> out, P_hb_from_x print >> out print >> out, 'expected Jukes-Cantor log likelihood ratio:' print >> out, e_ll_jc print >> out print >> out, 'expected Jukes-Cantor log likelihood ratio (tedious):' print >> out, e_ll_jc_tedious print >> out print >> out, 'expected mutation-selection log likelihood ratio:' print >> out, e_ll_hb print >> out print >> out, 'expected mutation-selection ll ratio from x:' print >> out, e_ll_hb_from_x print >> out print >> out, 'expected mutation-selection ll ratio from x (impl b):' print >> out, e_ll_hb_from_x_b print >> out print >> out, 'expected mutation-selection ll ratio from x (htrig):' print >> out, e_ll_hb_from_x_htrig print >> out # check some invariants if np.allclose(R_hb_easy, R_hb_tedious): print >> out, 'halpern-bruno rate matrices are equal as expected' else: print >> out, '*** halpern-bruno rate matrices are not equal!' if np.allclose(P_hb_easy, P_hb_tedious): print >> out, 'halpern-bruno transition matrices are equal as expected' else: print >> out, '*** halpern-bruno transition matrices are not equal!' if np.allclose(P_hb_easy, P_hb_tedious_c): print >> out, 'halpern-bruno transition matrices are equal as expected' else: print >> out, '*** halpern-bruno transition matrices are not equal!' if np.allclose(P_hb_easy, P_hb_from_x): print >> out, 'halpern-bruno transition matrices are equal as expected' else: print >> out, '*** halpern-bruno trans. mat. from x is not equal!' # return the results return out.getvalue()
def process(fs): nstates = fs.nstates np.set_printoptions(linewidth=200) t = fs.t ### sample a random time ##time_mu = 0.01 ##t = random.expovariate(1 / time_mu) # Sample a rate matrix. # Use a trick by Robert Kern to left and right multiply by diagonals. # http://mail.scipy.org/pipermail/numpy-discussion/2007-March/ # 026809.html S = MatrixUtil.sample_pos_sym_matrix(nstates) v = mrate.sample_distn(nstates) R = (v**-0.5)[:,np.newaxis] * S * (v**0.5) R -= np.diag(np.sum(R, axis=1)) # Construct a parent-independent process # with the same max rate and stationary distribution # as the sampled process. if fs.parent_indep: Q = np.outer(np.ones(nstates), v) Q -= np.diag(np.sum(Q, axis=1)) pi_rescaling_factor = max(np.diag(R) / np.diag(Q)) Q *= pi_rescaling_factor Z = msimpl.get_fast_meta_f81_autobarrier(Q) # Construct a child-independent process # with the same expected rate # as the sampled process if fs.child_indep: C = np.outer(1/v, np.ones(nstates)) C -= np.diag(np.sum(C, axis=1)) ci_rescaling_factor = np.max(R / C) #expected_rate = -ndot(np.diag(R), v) #ci_rescaling_factor = expected_rate / (nstates*(nstates-1)) #ci_rescaling_factor = expected_rate / (nstates*nstates) C *= ci_rescaling_factor Q = C if fs.bipartitioned: Q = msimpl.get_fast_meta_f81_autobarrier(R) # Check that the mutual information of the # parent independent process is smaller. out = StringIO() print >> out, 'sampled symmetric part of the rate matrix S:' print >> out, S print >> out print >> out, 'sampled stationary distribution v:' print >> out, v print >> out print >> out, 'shannon entropy of stationary distribution v:' print >> out, -np.dot(np.log(v), v) print >> out print >> out, 'sqrt stationary distribution:' print >> out, np.sqrt(v) print >> out print >> out, 'implied rate matrix R:' print >> out, R print >> out print >> out, 'eigenvalues of R:', scipy.linalg.eigvals(R) print >> out print >> out, 'relaxation rate of R:', print >> out, sorted(np.abs(scipy.linalg.eigvals(R)))[1] print >> out print >> out, 'expected rate of R:', mrate.Q_to_expected_rate(R) print >> out print >> out, 'cheeger bounds of R:', get_cheeger_bounds(R, v) print >> out print >> out, 'randomization rate of R:', get_randomization_rate(R, v) print >> out candidates = [get_randomization_candidate(R, v, i) for i in range(nstates)] if np.allclose(get_randomization_rate(R, v), candidates): print >> out, 'all candidates are equal to this rate' else: print >> out, 'not all candidates are equal to this rate' print >> out print >> out, 'simplified rate matrix Q:' print >> out, Q print >> out qv = mrate.R_to_distn(Q) print >> out, 'stationary distribution of Q:' print >> out, qv print >> out print >> out, 'ratio qv/v:' print >> out, qv / v print >> out print >> out, 'shannon entropy of stationary distribution of Q:' print >> out, -np.dot(np.log(qv), qv) print >> out if fs.parent_indep: print >> out, 'parent independent rescaling factor:' print >> out, pi_rescaling_factor print >> out if fs.child_indep: print >> out, 'child independent rescaling factor:' print >> out, ci_rescaling_factor print >> out print >> out, 'eigenvalues of Q:', scipy.linalg.eigvals(Q) print >> out print >> out, 'relaxation rate of Q:', print >> out, sorted(np.abs(scipy.linalg.eigvals(Q)))[1] print >> out print >> out, 'expected rate of Q:', mrate.Q_to_expected_rate(Q) print >> out print >> out, 'cheeger bounds of Q:', get_cheeger_bounds(Q, v) print >> out print >> out, 'randomization rate of Q:', get_randomization_rate(Q, v) print >> out candidates = [get_randomization_candidate(Q, v, i) for i in range(nstates)] if np.allclose(get_randomization_rate(Q, v), candidates): print >> out, 'all candidates are equal to this rate' else: print >> out, 'warning: not all candidates are equal to this rate' print >> out print >> out, 'E(rate) of Q divided by logical entropy:', print >> out, mrate.Q_to_expected_rate(Q) / ndot(v, 1-v) print >> out print >> out, 'symmetric matrix similar to Q:' S = ndot(np.diag(np.sqrt(v)), Q, np.diag(1/np.sqrt(v))) print >> out, S print >> out print >> out, 'eigendecomposition of the similar matrix:' W, V = scipy.linalg.eigh(S) print >> out, V print >> out, np.diag(W) print >> out, V.T print >> out # print >> out, 'time:', t print >> out print >> out, 'stationary distn logical entropy:', ndot(v, 1-v) print >> out # P_by_hand = get_pi_transition_matrix(Q, v, t) print >> out, 'simplified-process transition matrix computed by hand:' print >> out, P_by_hand print >> out print >> out, 'simplified-process transition matrix computed by expm:' print >> out, scipy.linalg.expm(Q*t) print >> out # print >> out, 'simplified-process m.i. by hand:' print >> out, get_pi_mi(Q, v, t) print >> out print >> out, 'simplified-process m.i. by expm:' print >> out, ctmcmi.get_expected_ll_ratio(Q, t) print >> out # print >> out, 'original process m.i. by expm:' print >> out, ctmcmi.get_expected_ll_ratio(R, t) print >> out # print >> out, 'stationary distn Shannon entropy:' print >> out, -ndot(v, np.log(v)) print >> out # if fs.parent_indep: print >> out, 'approximate simplified process m.i. 2nd order approx:' print >> out, get_pi_mi_t2_approx(Q, v, t) print >> out print >> out, 'approximate simplified process m.i. "better" approx:' print >> out, get_pi_mi_t2_diag_approx(Q, v, t) print >> out print >> out, '"f81-ization plus barrier" of pure f81-ization:' print >> out, Z print >> out # return out.getvalue().rstrip()