def mytrans(d): """ Provides a dict -> pandas.DataFrame wrapper of the pure JSON arbplf_trans. """ s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) return df
def test_heterogeneous_edge_rates(): # try changing one of the edge rate coefficients d = { "model_and_data": { "edges": [[0, 1], [1, 2]], "edge_rate_coefficients": [1, 2], "rate_matrix": [[0, 1], [0, 0]], "probability_array": [[[1, 0], [1, 1], [1, 0]]] }, "site_reduction": { "aggregation": "only" } } actual_marginal = json.loads(arbplf_marginal(json.dumps(d))) assert_equal(actual_marginal, desired_marginal) g = copy.deepcopy(d) g['trans_reduction'] = dict(selection=[[0, 1], [1, 0]]) actual_trans = json.loads(arbplf_trans(json.dumps(g))) assert_equal(actual_trans, desired_trans) actual_ll = json.loads(arbplf_ll(json.dumps(d))) desired_ll = {"columns": ["value"], "data": [[-3.0]]} assert_equal(actual_ll, desired_ll) actual_em_update = json.loads(arbplf_em_update(json.dumps(d))) assert_equal(actual_em_update, desired_em_update) actual_dwell = json.loads(arbplf_dwell(json.dumps(d))) assert_equal(actual_dwell, desired_dwell)
def test_edges_are_not_preordered(): # Try switching the order of the edges in the input # and increasing the birth rate in the rate matrix. d = { "model_and_data": { "edges": [[1, 2], [0, 1]], "edge_rate_coefficients": [1, 2], "rate_matrix": [[0, 2], [0, 0]], "probability_array": [[[1, 0], [1, 1], [1, 0]]] }, "site_reduction": { "aggregation": "only" } } actual_marginal = json.loads(arbplf_marginal(json.dumps(d))) assert_equal(actual_marginal, desired_marginal) g = copy.deepcopy(d) g['trans_reduction'] = dict(selection=[[0, 1], [1, 0]]) actual_trans = json.loads(arbplf_trans(json.dumps(g))) assert_equal(actual_trans, desired_trans) actual_ll = json.loads(arbplf_ll(json.dumps(d))) desired_ll = {"columns": ["value"], "data": [[-6.0]]} assert_equal(actual_ll, desired_ll) actual_em_update = json.loads(arbplf_em_update(json.dumps(d))) assert_equal(actual_em_update, desired_em_update) actual_dwell = json.loads(arbplf_dwell(json.dumps(d))) assert_equal(actual_dwell, desired_dwell)
def test_trans_10(): for agg in ('sum', 'avg', 'only'): d = copy.deepcopy(D) d['trans_reduction'] = {"selection" : [[1, 0]], "aggregation" : agg} s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) actual = df.set_index('edge').value.values # compute the desired closed form solution desired = np.zeros_like(actual) # compare actual and desired result assert_equal(actual, desired)
def test_truncated_trans_10(): d = copy.deepcopy(D) d['model_and_data']['probability_array'][0][-1] = [0, 1] d['trans_reduction'] = {"selection" : [[1, 0]], "aggregation" : "sum"} s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) actual = df.set_index('edge').value.values # compute the desired closed form solution desired = np.zeros_like(actual) # compare actual and desired result assert_equal(actual, desired)
def main(): np.random.seed(123475) # sample a random rate matrix state_count = 3 edge_count = 3 node_count = edge_count + 1 #Q = sample_rate_matrix(state_count) Q = sample_reversible_rate_matrix(state_count) p = equilibrium(Q) expected_rate = -p.dot(np.diag(Q)) print('expected rate:', expected_rate) Q = Q / expected_rate np.fill_diagonal(Q, 0) # use ad hoc data probability_array = [[[1, 1, 1], [1, 0, 0], [1, 0, 0], [1, 0, 0]], [[1, 1, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0]], [[1, 1, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]] site_weights = [.7, .2, .1] edges = [[0, 1], [0, 2], [0, 3]] coefficients = [.01, .01, .01] d = { "model_and_data": { "edges": edges, "edge_rate_coefficients": coefficients, "rate_matrix": Q.tolist(), "probability_array": probability_array }, "site_reduction": { "aggregation": site_weights } } print(d) for i in range(100): s = arbplf_em_update(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) y = df.value.values.tolist() d['model_and_data']['edge_rate_coefficients'] = y print('coefficients updated by EM:', y) s = arbplf_newton_refine(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) y = df.value.values.tolist() print('coefficients updated by newton refinement:', y) d['trans_reduction'] = { 'selection': [[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]], 'aggregation': 'sum' } d['model_and_data']['edge_rate_coefficients'] = y s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) y = df.value.values.tolist() print('conditionally expected transition counts:', y)
def test_trans_01(): for agg in ('sum', 'avg', 'only'): d = copy.deepcopy(D) d['trans_reduction'] = {"selection" : [[0, 1]], "aggregation" : agg} s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) actual = df.set_index('edge').value.values # compute the desired closed form solution u = np.cumsum([0] + rates) a, b = u[:-1], u[1:] desired = exp(-a) - exp(-b) # compare actual and desired result assert_allclose(actual, desired)
def main(): np.random.seed(123475) # sample a random rate matrix state_count = 3 edge_count = 3 node_count = edge_count + 1 # Q = sample_rate_matrix(state_count) Q = sample_reversible_rate_matrix(state_count) p = equilibrium(Q) expected_rate = -p.dot(np.diag(Q)) print("expected rate:", expected_rate) Q = Q / expected_rate np.fill_diagonal(Q, 0) # use ad hoc data probability_array = [ [[1, 1, 1], [1, 0, 0], [1, 0, 0], [1, 0, 0]], [[1, 1, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0]], [[1, 1, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]], ] site_weights = [0.7, 0.2, 0.1] edges = [[0, 1], [0, 2], [0, 3]] coefficients = [0.01, 0.01, 0.01] d = { "model_and_data": { "edges": edges, "edge_rate_coefficients": coefficients, "rate_matrix": Q.tolist(), "probability_array": probability_array, }, "site_reduction": {"aggregation": site_weights}, } print(d) for i in range(100): s = arbplf_em_update(json.dumps(d)) df = pd.read_json(StringIO(s), orient="split", precise_float=True) y = df.value.values.tolist() d["model_and_data"]["edge_rate_coefficients"] = y print("coefficients updated by EM:", y) s = arbplf_newton_refine(json.dumps(d)) df = pd.read_json(StringIO(s), orient="split", precise_float=True) y = df.value.values.tolist() print("coefficients updated by newton refinement:", y) d["trans_reduction"] = {"selection": [[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]], "aggregation": "sum"} d["model_and_data"]["edge_rate_coefficients"] = y s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient="split", precise_float=True) y = df.value.values.tolist() print("conditionally expected transition counts:", y)
def test_truncated_trans_01(): d = copy.deepcopy(D) d['model_and_data']['probability_array'][0][-1] = [0, 1] d['trans_reduction'] = {"selection" : [[0, 1]], "aggregation" : "sum"} s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) actual = df.set_index('edge').value.values # compute the desired closed form solution u = np.cumsum([0] + rates) T = u[-1] def f(x): return (exp(-T) - exp(-x)) / expm1(-T) a, b = u[:-1], u[1:] desired = f(a) - f(b) # compare actual and desired result assert_allclose(actual, desired)
def run(assumed_kappa): state_count = 4 node_count = 5 true_kappa = 4 assumed_m, assumed_denom = get_rate_matrix(assumed_kappa) true_m, true_denom = get_rate_matrix(true_kappa) edges = [[0, 2], [0, 1], [1, 3], [1, 4]] assumed_coeffs = [28, 21, 12, 9] true_coeffs = [30, 20, 10, 10] # There are five nodes. # Three of them have unobserved states. # Use one site for each of the 4^3 = 64 possible observations. X = [-1] U = range(4) all_site_patterns = list(itertools.product(X, X, U, U, U)) prior_array = [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]] probability_array = [] for pattern in all_site_patterns: arr = [] for i, p in enumerate(pattern): if p == -1: row = [1] * state_count else: row = [0] * state_count row[p] = 1 arr.append(row) probability_array.append(arr) model_and_data = { "edges": edges, "edge_rate_coefficients": true_coeffs, "root_prior": "equilibrium_distribution", "rate_matrix": true_m, "rate_divisor": true_denom * 100, "probability_array": probability_array } d = {"model_and_data": model_and_data} s = arbplf_ll(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) log_likelihoods = df.value.values print('log likelihood sum:', sum(log_likelihoods)) # compute ts and tv using the likelihoods as observation weights weights = [math.exp(ll) for ll in log_likelihoods] total = sum(weights) weights = [w / total for w in weights] ts_pairs, tv_pairs = get_ts_tv_pairs() model_and_data = { "edges": edges, "edge_rate_coefficients": assumed_coeffs, "root_prior": "equilibrium_distribution", "rate_matrix": assumed_m, "rate_divisor": assumed_denom * 100, "probability_array": probability_array } d = { "model_and_data": model_and_data, "site_reduction": { "aggregation": weights }, "edge_reduction": { "aggregation": "sum" }, "trans_reduction": { "aggregation": "sum" } } d['trans_reduction']['selection'] = ts_pairs d['trans_reduction']['aggregation'] = [1000] * len(ts_pairs) d['site_reduction']['aggregation'] = "sum" d['model_and_data']['probability_array'] = prior_array s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) print("prior ts expectation:") print(df.value.values[0]) print(s) d['trans_reduction']['selection'] = ts_pairs d['trans_reduction']['aggregation'] = [1000] * len(ts_pairs) d['site_reduction']['aggregation'] = weights d['model_and_data']['probability_array'] = probability_array s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) print("conditional ts expectation:") print(df.value.values[0]) print(s) d['trans_reduction']['selection'] = tv_pairs d['trans_reduction']['aggregation'] = [1000] * len(tv_pairs) d['site_reduction']['aggregation'] = "sum" d['model_and_data']['probability_array'] = prior_array s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) print("prior tv expectation:") print(df.value.values[0]) print(s) d['trans_reduction']['selection'] = tv_pairs d['trans_reduction']['aggregation'] = [1000] * len(tv_pairs) d['site_reduction']['aggregation'] = weights d['model_and_data']['probability_array'] = probability_array s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) print("conditional tv expectation:") print(df.value.values[0]) print(s)
def run(): state_count = 4 edge_count = 5 node_count = edge_count + 1 # Define the tree used in the phyl transition mapping example. edges = [[4, 0], [4, 1], [5, 4], [5, 2], [5, 3]] inference_rates = [0.001, 0.002, 0.008, 0.01, 0.1] simulation_rates = [0.001 * (9 / 20), 0.002, 0.008, 0.01, 0.1] """ # Define the poisson rate matrix with expected exit rate 1 rate_divisor = 3 rate_matrix = [ [0, 1, 1, 1], [1, 0, 1, 1], [1, 1, 0, 1], [1, 1, 1, 0]] """ # use a GTR rate matrix a, b, c, d, e, pA, pC, pG, pT = ( 1, 0.2, 0.3, 0.4, 0.4, 0.1, 0.35, 0.35, 0.2) rate_matrix = make_rate_matrix(a, b, c, d, e, pA, pC, pG, pT) # Use one site for each of the 4^4 = 256 possible observations. X = [-1] U = range(4) all_site_patterns = list(itertools.product(U, U, U, U, X, X)) prior_array = [[ [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]] probability_array = [] for pattern in all_site_patterns: arr = [] for i, p in enumerate(pattern): if p == -1: row = [1]*state_count else: row = [0]*state_count row[p] = 1 arr.append(row) probability_array.append(arr) model_and_data = { "edges" : edges, "edge_rate_coefficients" : simulation_rates, "rate_divisor" : "equilibrium_exit_rate", "root_prior" : "equilibrium_distribution", "rate_matrix" : rate_matrix, "probability_array" : probability_array} d = {"model_and_data" : model_and_data} s = json.dumps(d) s = arbplf_ll(s) df = pd.read_json(StringIO(s), orient='split', precise_float=True) log_likelihoods = df.value.values # compute expectations using the likelihoods as observation weights weights = [math.exp(ll) for ll in log_likelihoods] total = sum(weights) weights = [(20000 * w) / total for w in weights] model_and_data = { "edges" : edges, "edge_rate_coefficients" : inference_rates, "rate_divisor" : "equilibrium_exit_rate", "root_prior" : "equilibrium_distribution", "rate_matrix" : rate_matrix, "probability_array" : probability_array} d = { "model_and_data" : model_and_data, "site_reduction" : {"aggregation" : weights}, "trans_reduction" : {"aggregation" : "sum"}} d['model_and_data']['probability_array'] = prior_array d['trans_reduction']['selection'] = [ [i, j] for i in range(4) for j in range(4) if i != j] d['site_reduction'] = {"aggregation" : "sum"} s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) print("prior expectation:") print(20000 * df.value.values) d['model_and_data']['probability_array'] = probability_array d['trans_reduction']['selection'] = [ [i, j] for i in range(4) for j in range(4) if i != j] d['site_reduction'] = {"aggregation" : weights} s = arbplf_trans(json.dumps(d)) df = pd.read_json(StringIO(s), orient='split', precise_float=True) print("conditional expectation:") print(df.value.values)