예제 #1
0
def mytrans(d):
    """
    Provides a dict -> pandas.DataFrame wrapper of the pure JSON arbplf_trans.
    """
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    return df
예제 #2
0
def test_heterogeneous_edge_rates():
    # try changing one of the edge rate coefficients
    d = {
        "model_and_data": {
            "edges": [[0, 1], [1, 2]],
            "edge_rate_coefficients": [1, 2],
            "rate_matrix": [[0, 1], [0, 0]],
            "probability_array": [[[1, 0], [1, 1], [1, 0]]]
        },
        "site_reduction": {
            "aggregation": "only"
        }
    }

    actual_marginal = json.loads(arbplf_marginal(json.dumps(d)))
    assert_equal(actual_marginal, desired_marginal)

    g = copy.deepcopy(d)
    g['trans_reduction'] = dict(selection=[[0, 1], [1, 0]])
    actual_trans = json.loads(arbplf_trans(json.dumps(g)))
    assert_equal(actual_trans, desired_trans)

    actual_ll = json.loads(arbplf_ll(json.dumps(d)))
    desired_ll = {"columns": ["value"], "data": [[-3.0]]}
    assert_equal(actual_ll, desired_ll)

    actual_em_update = json.loads(arbplf_em_update(json.dumps(d)))
    assert_equal(actual_em_update, desired_em_update)

    actual_dwell = json.loads(arbplf_dwell(json.dumps(d)))
    assert_equal(actual_dwell, desired_dwell)
예제 #3
0
def test_edges_are_not_preordered():
    # Try switching the order of the edges in the input
    # and increasing the birth rate in the rate matrix.
    d = {
        "model_and_data": {
            "edges": [[1, 2], [0, 1]],
            "edge_rate_coefficients": [1, 2],
            "rate_matrix": [[0, 2], [0, 0]],
            "probability_array": [[[1, 0], [1, 1], [1, 0]]]
        },
        "site_reduction": {
            "aggregation": "only"
        }
    }

    actual_marginal = json.loads(arbplf_marginal(json.dumps(d)))
    assert_equal(actual_marginal, desired_marginal)

    g = copy.deepcopy(d)
    g['trans_reduction'] = dict(selection=[[0, 1], [1, 0]])
    actual_trans = json.loads(arbplf_trans(json.dumps(g)))
    assert_equal(actual_trans, desired_trans)

    actual_ll = json.loads(arbplf_ll(json.dumps(d)))
    desired_ll = {"columns": ["value"], "data": [[-6.0]]}
    assert_equal(actual_ll, desired_ll)

    actual_em_update = json.loads(arbplf_em_update(json.dumps(d)))
    assert_equal(actual_em_update, desired_em_update)

    actual_dwell = json.loads(arbplf_dwell(json.dumps(d)))
    assert_equal(actual_dwell, desired_dwell)
예제 #4
0
def mytrans(d):
    """
    Provides a dict -> pandas.DataFrame wrapper of the pure JSON arbplf_trans.
    """
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    return df
def test_trans_10():
    for agg in ('sum', 'avg', 'only'):
        d = copy.deepcopy(D)
        d['trans_reduction'] = {"selection" : [[1, 0]], "aggregation" : agg}
        s = arbplf_trans(json.dumps(d))
        df = pd.read_json(StringIO(s), orient='split', precise_float=True)
        actual = df.set_index('edge').value.values
        # compute the desired closed form solution
        desired = np.zeros_like(actual)
        # compare actual and desired result
        assert_equal(actual, desired)
def test_truncated_trans_10():
    d = copy.deepcopy(D)
    d['model_and_data']['probability_array'][0][-1] = [0, 1]
    d['trans_reduction'] = {"selection" : [[1, 0]], "aggregation" : "sum"}
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    actual = df.set_index('edge').value.values
    # compute the desired closed form solution
    desired = np.zeros_like(actual)
    # compare actual and desired result
    assert_equal(actual, desired)
예제 #7
0
def main():
    np.random.seed(123475)

    # sample a random rate matrix
    state_count = 3
    edge_count = 3
    node_count = edge_count + 1
    #Q = sample_rate_matrix(state_count)
    Q = sample_reversible_rate_matrix(state_count)
    p = equilibrium(Q)
    expected_rate = -p.dot(np.diag(Q))
    print('expected rate:', expected_rate)
    Q = Q / expected_rate
    np.fill_diagonal(Q, 0)
    # use ad hoc data
    probability_array = [[[1, 1, 1], [1, 0, 0], [1, 0, 0], [1, 0, 0]],
                         [[1, 1, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0]],
                         [[1, 1, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]]]
    site_weights = [.7, .2, .1]
    edges = [[0, 1], [0, 2], [0, 3]]
    coefficients = [.01, .01, .01]
    d = {
        "model_and_data": {
            "edges": edges,
            "edge_rate_coefficients": coefficients,
            "rate_matrix": Q.tolist(),
            "probability_array": probability_array
        },
        "site_reduction": {
            "aggregation": site_weights
        }
    }
    print(d)
    for i in range(100):
        s = arbplf_em_update(json.dumps(d))
        df = pd.read_json(StringIO(s), orient='split', precise_float=True)
        y = df.value.values.tolist()
        d['model_and_data']['edge_rate_coefficients'] = y
        print('coefficients updated by EM:', y)
    s = arbplf_newton_refine(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    y = df.value.values.tolist()
    print('coefficients updated by newton refinement:', y)

    d['trans_reduction'] = {
        'selection': [[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]],
        'aggregation': 'sum'
    }
    d['model_and_data']['edge_rate_coefficients'] = y

    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    y = df.value.values.tolist()
    print('conditionally expected transition counts:', y)
def test_trans_01():
    for agg in ('sum', 'avg', 'only'):
        d = copy.deepcopy(D)
        d['trans_reduction'] = {"selection" : [[0, 1]], "aggregation" : agg}
        s = arbplf_trans(json.dumps(d))
        df = pd.read_json(StringIO(s), orient='split', precise_float=True)
        actual = df.set_index('edge').value.values
        # compute the desired closed form solution
        u = np.cumsum([0] + rates)
        a, b = u[:-1], u[1:]
        desired = exp(-a) - exp(-b)
        # compare actual and desired result
        assert_allclose(actual, desired)
예제 #9
0
def main():
    np.random.seed(123475)

    # sample a random rate matrix
    state_count = 3
    edge_count = 3
    node_count = edge_count + 1
    # Q = sample_rate_matrix(state_count)
    Q = sample_reversible_rate_matrix(state_count)
    p = equilibrium(Q)
    expected_rate = -p.dot(np.diag(Q))
    print("expected rate:", expected_rate)
    Q = Q / expected_rate
    np.fill_diagonal(Q, 0)
    # use ad hoc data
    probability_array = [
        [[1, 1, 1], [1, 0, 0], [1, 0, 0], [1, 0, 0]],
        [[1, 1, 1], [0, 1, 0], [1, 0, 0], [1, 0, 0]],
        [[1, 1, 1], [1, 0, 0], [0, 1, 0], [0, 0, 1]],
    ]
    site_weights = [0.7, 0.2, 0.1]
    edges = [[0, 1], [0, 2], [0, 3]]
    coefficients = [0.01, 0.01, 0.01]
    d = {
        "model_and_data": {
            "edges": edges,
            "edge_rate_coefficients": coefficients,
            "rate_matrix": Q.tolist(),
            "probability_array": probability_array,
        },
        "site_reduction": {"aggregation": site_weights},
    }
    print(d)
    for i in range(100):
        s = arbplf_em_update(json.dumps(d))
        df = pd.read_json(StringIO(s), orient="split", precise_float=True)
        y = df.value.values.tolist()
        d["model_and_data"]["edge_rate_coefficients"] = y
        print("coefficients updated by EM:", y)
    s = arbplf_newton_refine(json.dumps(d))
    df = pd.read_json(StringIO(s), orient="split", precise_float=True)
    y = df.value.values.tolist()
    print("coefficients updated by newton refinement:", y)

    d["trans_reduction"] = {"selection": [[0, 1], [0, 2], [1, 0], [1, 2], [2, 0], [2, 1]], "aggregation": "sum"}
    d["model_and_data"]["edge_rate_coefficients"] = y

    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient="split", precise_float=True)
    y = df.value.values.tolist()
    print("conditionally expected transition counts:", y)
def test_truncated_trans_01():
    d = copy.deepcopy(D)
    d['model_and_data']['probability_array'][0][-1] = [0, 1]
    d['trans_reduction'] = {"selection" : [[0, 1]], "aggregation" : "sum"}
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    actual = df.set_index('edge').value.values
    # compute the desired closed form solution
    u = np.cumsum([0] + rates)
    T = u[-1]
    def f(x):
        return (exp(-T) - exp(-x)) / expm1(-T)
    a, b = u[:-1], u[1:]
    desired = f(a) - f(b)
    # compare actual and desired result
    assert_allclose(actual, desired)
예제 #11
0
def run(assumed_kappa):
    state_count = 4
    node_count = 5
    true_kappa = 4
    assumed_m, assumed_denom = get_rate_matrix(assumed_kappa)
    true_m, true_denom = get_rate_matrix(true_kappa)
    edges = [[0, 2], [0, 1], [1, 3], [1, 4]]
    assumed_coeffs = [28, 21, 12, 9]
    true_coeffs = [30, 20, 10, 10]
    # There are five nodes.
    # Three of them have unobserved states.
    # Use one site for each of the 4^3 = 64 possible observations.
    X = [-1]
    U = range(4)
    all_site_patterns = list(itertools.product(X, X, U, U, U))
    prior_array = [[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1],
                    [1, 1, 1, 1]]]
    probability_array = []
    for pattern in all_site_patterns:
        arr = []
        for i, p in enumerate(pattern):
            if p == -1:
                row = [1] * state_count
            else:
                row = [0] * state_count
                row[p] = 1
            arr.append(row)
        probability_array.append(arr)
    model_and_data = {
        "edges": edges,
        "edge_rate_coefficients": true_coeffs,
        "root_prior": "equilibrium_distribution",
        "rate_matrix": true_m,
        "rate_divisor": true_denom * 100,
        "probability_array": probability_array
    }
    d = {"model_and_data": model_and_data}
    s = arbplf_ll(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    log_likelihoods = df.value.values
    print('log likelihood sum:', sum(log_likelihoods))

    # compute ts and tv using the likelihoods as observation weights
    weights = [math.exp(ll) for ll in log_likelihoods]
    total = sum(weights)
    weights = [w / total for w in weights]
    ts_pairs, tv_pairs = get_ts_tv_pairs()

    model_and_data = {
        "edges": edges,
        "edge_rate_coefficients": assumed_coeffs,
        "root_prior": "equilibrium_distribution",
        "rate_matrix": assumed_m,
        "rate_divisor": assumed_denom * 100,
        "probability_array": probability_array
    }
    d = {
        "model_and_data": model_and_data,
        "site_reduction": {
            "aggregation": weights
        },
        "edge_reduction": {
            "aggregation": "sum"
        },
        "trans_reduction": {
            "aggregation": "sum"
        }
    }

    d['trans_reduction']['selection'] = ts_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(ts_pairs)
    d['site_reduction']['aggregation'] = "sum"
    d['model_and_data']['probability_array'] = prior_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("prior ts expectation:")
    print(df.value.values[0])
    print(s)

    d['trans_reduction']['selection'] = ts_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(ts_pairs)
    d['site_reduction']['aggregation'] = weights
    d['model_and_data']['probability_array'] = probability_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("conditional ts expectation:")
    print(df.value.values[0])
    print(s)

    d['trans_reduction']['selection'] = tv_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(tv_pairs)
    d['site_reduction']['aggregation'] = "sum"
    d['model_and_data']['probability_array'] = prior_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("prior tv expectation:")
    print(df.value.values[0])
    print(s)

    d['trans_reduction']['selection'] = tv_pairs
    d['trans_reduction']['aggregation'] = [1000] * len(tv_pairs)
    d['site_reduction']['aggregation'] = weights
    d['model_and_data']['probability_array'] = probability_array
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("conditional tv expectation:")
    print(df.value.values[0])
    print(s)
예제 #12
0
def run():
    state_count = 4
    edge_count = 5
    node_count = edge_count + 1

    # Define the tree used in the phyl transition mapping example.
    edges = [[4, 0], [4, 1], [5, 4], [5, 2], [5, 3]]
    inference_rates = [0.001, 0.002, 0.008, 0.01, 0.1]
    simulation_rates = [0.001 * (9 / 20), 0.002, 0.008, 0.01, 0.1]

    """
    # Define the poisson rate matrix with expected exit rate 1
    rate_divisor = 3
    rate_matrix = [
            [0, 1, 1, 1],
            [1, 0, 1, 1],
            [1, 1, 0, 1],
            [1, 1, 1, 0]]
    """
    # use a GTR rate matrix
    a, b, c, d, e, pA, pC, pG, pT = (
            1, 0.2, 0.3, 0.4, 0.4, 0.1, 0.35, 0.35, 0.2)
    rate_matrix = make_rate_matrix(a, b, c, d, e, pA, pC, pG, pT)

    # Use one site for each of the 4^4 = 256 possible observations.
    X = [-1]
    U = range(4)
    all_site_patterns = list(itertools.product(U, U, U, U, X, X))
    prior_array = [[
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]]
    probability_array = []
    for pattern in all_site_patterns:
        arr = []
        for i, p in enumerate(pattern):
            if p == -1:
                row = [1]*state_count
            else:
                row = [0]*state_count
                row[p] = 1
            arr.append(row)
        probability_array.append(arr)
    model_and_data = {
            "edges" : edges,
            "edge_rate_coefficients" : simulation_rates,
            "rate_divisor" : "equilibrium_exit_rate",
            "root_prior" : "equilibrium_distribution",
            "rate_matrix" : rate_matrix,
            "probability_array" : probability_array}
    d = {"model_and_data" : model_and_data}
    s = json.dumps(d)
    s = arbplf_ll(s)
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    log_likelihoods = df.value.values

    # compute expectations using the likelihoods as observation weights
    weights = [math.exp(ll) for ll in log_likelihoods]
    total = sum(weights)
    weights = [(20000 * w) / total for w in weights]

    model_and_data = {
            "edges" : edges,
            "edge_rate_coefficients" : inference_rates,
            "rate_divisor" : "equilibrium_exit_rate",
            "root_prior" : "equilibrium_distribution",
            "rate_matrix" : rate_matrix,
            "probability_array" : probability_array}
    d = {
            "model_and_data" : model_and_data,
            "site_reduction" : {"aggregation" : weights},
            "trans_reduction" : {"aggregation" : "sum"}}

    d['model_and_data']['probability_array'] = prior_array
    d['trans_reduction']['selection'] = [
            [i, j] for i in range(4) for j in range(4) if i != j]
    d['site_reduction'] = {"aggregation" : "sum"}
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("prior expectation:")
    print(20000 * df.value.values)

    d['model_and_data']['probability_array'] = probability_array
    d['trans_reduction']['selection'] = [
            [i, j] for i in range(4) for j in range(4) if i != j]
    d['site_reduction'] = {"aggregation" : weights}
    s = arbplf_trans(json.dumps(d))
    df = pd.read_json(StringIO(s), orient='split', precise_float=True)
    print("conditional expectation:")
    print(df.value.values)