예제 #1
0
def test_log_comb_factor():
    def log_num_swaps(n00, n01, n10, n11):
        confs = [(n00, n01, n10, n11), (n00, n10, n01, n11),
                 (n11, n10, n01, n00), (n11, n01, n10, n00),
                 (n01, n00, n11, n10), (n01, n11, n00, n10),
                 (n10, n00, n11, n01), (n10, n11, n00, n01)]
        return np.log(len(set(confs)))

    lcf = _log_comb_factor(np.array([[2, 2], [2, 2]]))
    lcf_true = log_mult_coef(np.array([2, 2, 2, 2]))
    assert np.allclose(lcf, lcf_true)

    lcf = _log_comb_factor(np.array([[3, 2], [2, 2]]))

    lcf_true = (log_mult_coef(np.array([3, 2, 2, 2])) +
                log_num_swaps(3, 2, 2, 2))
    assert np.allclose(lcf, lcf_true)

    lcf = _log_comb_factor(np.array([[3, 2], [2, 3]]))
    lcf_true = (log_mult_coef(np.array([3, 2, 2, 3])) +
                log_num_swaps(3, 2, 2, 3))
    assert np.allclose(lcf, lcf_true), np.exp(log_num_swaps(3, 2, 2, 3))

    lcf = _log_comb_factor(np.array([[1, 2], [3, 4]]))
    lcf_true = (log_mult_coef(np.array([1, 2, 3, 4])) +
                log_num_swaps(1, 2, 3, 4))
    assert np.allclose(lcf, lcf_true)

    lcf = _log_comb_factor(np.array([[2, 2], [3, 3]]))
    lcf_true = (log_mult_coef(np.array([2, 2, 3, 3])) +
                log_num_swaps(2, 2, 3, 3))
    assert np.allclose(lcf, lcf_true)
예제 #2
0
def _get_dip_likelihood(table, subtable_sizes, config, sample_size,
                        fast_missing):
    if fast_missing:
        gconf = np.array([[config[0], config[1], config[2], 0],
                          [config[4], config[5], config[6], 0],
                          [config[8], config[9], config[10], 0],
                          [0, 0, 0, 0]])

    else:
        gconf = np.array([[config[0], config[1], config[2], config[3]],
                          [config[4], config[5], config[6], config[7]],
                          [config[8], config[9], config[10], config[11]],
                          [config[12], config[13], config[14], 0]])
    to_return = np.full(table.shape[1], np.NINF)
    for k in range(gconf[1, 1] + 1):
        k_inv = gconf[1, 1] - k
        hap_conf = np.array([2*gconf[0, 0] + gconf[0, 1] + gconf[1, 0] + k,
                             2*gconf[0, 2] + gconf[0, 1] + gconf[1, 2] + k_inv,
                             2*gconf[0, -1] + gconf[1, -1],
                             2*gconf[2, 0] + gconf[2, 1] + gconf[1, 0] + k_inv,
                             2*gconf[2, 2] + gconf[2, 1] + gconf[1, 2] + k,
                             2*gconf[2, -1] + gconf[1, -1],
                             2*gconf[-1, 0] + gconf[-1, 1],
                             2*gconf[-1, 2] + gconf[-1, 1]])
        comb = log_mult_coef(np.array([k, k_inv]))
        if fast_missing:
            this_ll = _get_hap_likelihood_fast_missing(table, subtable_sizes,
                                                       hap_conf)
        else:
            this_ll = _get_hap_likelihood(table, hap_conf, sample_size)
        this_ll += comb
        to_return = np.logaddexp(to_return, this_ll)
    swaps = gconf[1, :].sum() + gconf[:, 1].sum() - gconf[1, 1]
    return swaps*np.log(2) + to_return
예제 #3
0
def _log_comb_factor(config):
    num_unique = np.unique(config).shape[0]
    swaps = 2**(num_unique - 1)
    if num_unique == 2:
        if config[0, 0] != config[1, 1] or config[0, 1] != config[1, 0]:
            swaps *= 2
    log_mult = log_mult_coef(config.flatten())
    return log_mult + np.log(swaps)
예제 #4
0
def _get_hap_comb(hconf, to_add00, to_add01, to_add10, to_add11):
    runtime = ((1 + min(to_add10, to_add00, hconf[-1, 0],
                        to_add00 + to_add10 - hconf[-1, 0]))
               * (1 + min(to_add01, to_add11, hconf[-1, 1],
                          to_add01 + to_add11 - hconf[-1, 1])))
    transpose_runtime = (
        (1 + min(to_add01, to_add00, hconf[0, -1],
                 to_add00 + to_add01 - hconf[0, -1]))
        * (1 + min(to_add10, to_add11, hconf[1, -1],
                   to_add10 + to_add11 - hconf[1, -1]))
    )
    if runtime > transpose_runtime:
        hconf = hconf.transpose()
        to_add01, to_add10 = to_add10, to_add01
    to_return = np.NINF
    imin = max(0, hconf[-1, 0] - to_add00)
    imax = min(to_add10, hconf[-1, 0])
    for i in range(imin, imax + 1):
        i_coef = np.NINF
        i_comb = log_mult_coef(np.array([i, hconf[-1, 0] - i]))
        jmin = max(0, hconf[-1, 1] - to_add11,
                   i + hconf[-1, 1] + hconf[1, -1] - to_add11 - to_add10)
        jmax = min(to_add00 + to_add01 + i - hconf[-1, 0] - hconf[0, -1],
                   to_add01, hconf[-1, 1])
        for j in range(jmin, jmax + 1):
            this_coef = log_mult_coef(
                np.array([to_add00 - hconf[-1, 0] + i, to_add01 - j])
            )
            this_coef += log_mult_coef(
                np.array([to_add11 - hconf[-1, 1] + j, to_add10 - i])
            )
            this_coef += log_mult_coef(np.array([
                to_add00 + to_add01 + i - j - hconf[-1, 0] - hconf[0, -1],
                to_add11 + to_add10 + j - i - hconf[-1, 1] - hconf[1, -1]])
            )
            this_coef += log_mult_coef(np.array([j, hconf[-1, 1] - j]))
            i_coef = np.logaddexp(i_coef, this_coef)
        to_return = np.logaddexp(to_return, i_coef + i_comb)
    return to_return
예제 #5
0
def test_log_mult_coef():
    assert np.allclose(np.log(binom(10, 2)), log_mult_coef(np.array([2, 8])))
    assert np.allclose(log_mult_coef(np.array([1, 1, 1])), np.log(6))
    assert np.allclose(log_mult_coef(np.array([0, 0, 0, 0])), 0)
    pytest.raises(ValueError, log_mult_coef, np.array([-1]))