def test_log_comb_factor(): def log_num_swaps(n00, n01, n10, n11): confs = [(n00, n01, n10, n11), (n00, n10, n01, n11), (n11, n10, n01, n00), (n11, n01, n10, n00), (n01, n00, n11, n10), (n01, n11, n00, n10), (n10, n00, n11, n01), (n10, n11, n00, n01)] return np.log(len(set(confs))) lcf = _log_comb_factor(np.array([[2, 2], [2, 2]])) lcf_true = log_mult_coef(np.array([2, 2, 2, 2])) assert np.allclose(lcf, lcf_true) lcf = _log_comb_factor(np.array([[3, 2], [2, 2]])) lcf_true = (log_mult_coef(np.array([3, 2, 2, 2])) + log_num_swaps(3, 2, 2, 2)) assert np.allclose(lcf, lcf_true) lcf = _log_comb_factor(np.array([[3, 2], [2, 3]])) lcf_true = (log_mult_coef(np.array([3, 2, 2, 3])) + log_num_swaps(3, 2, 2, 3)) assert np.allclose(lcf, lcf_true), np.exp(log_num_swaps(3, 2, 2, 3)) lcf = _log_comb_factor(np.array([[1, 2], [3, 4]])) lcf_true = (log_mult_coef(np.array([1, 2, 3, 4])) + log_num_swaps(1, 2, 3, 4)) assert np.allclose(lcf, lcf_true) lcf = _log_comb_factor(np.array([[2, 2], [3, 3]])) lcf_true = (log_mult_coef(np.array([2, 2, 3, 3])) + log_num_swaps(2, 2, 3, 3)) assert np.allclose(lcf, lcf_true)
def _get_dip_likelihood(table, subtable_sizes, config, sample_size, fast_missing): if fast_missing: gconf = np.array([[config[0], config[1], config[2], 0], [config[4], config[5], config[6], 0], [config[8], config[9], config[10], 0], [0, 0, 0, 0]]) else: gconf = np.array([[config[0], config[1], config[2], config[3]], [config[4], config[5], config[6], config[7]], [config[8], config[9], config[10], config[11]], [config[12], config[13], config[14], 0]]) to_return = np.full(table.shape[1], np.NINF) for k in range(gconf[1, 1] + 1): k_inv = gconf[1, 1] - k hap_conf = np.array([2*gconf[0, 0] + gconf[0, 1] + gconf[1, 0] + k, 2*gconf[0, 2] + gconf[0, 1] + gconf[1, 2] + k_inv, 2*gconf[0, -1] + gconf[1, -1], 2*gconf[2, 0] + gconf[2, 1] + gconf[1, 0] + k_inv, 2*gconf[2, 2] + gconf[2, 1] + gconf[1, 2] + k, 2*gconf[2, -1] + gconf[1, -1], 2*gconf[-1, 0] + gconf[-1, 1], 2*gconf[-1, 2] + gconf[-1, 1]]) comb = log_mult_coef(np.array([k, k_inv])) if fast_missing: this_ll = _get_hap_likelihood_fast_missing(table, subtable_sizes, hap_conf) else: this_ll = _get_hap_likelihood(table, hap_conf, sample_size) this_ll += comb to_return = np.logaddexp(to_return, this_ll) swaps = gconf[1, :].sum() + gconf[:, 1].sum() - gconf[1, 1] return swaps*np.log(2) + to_return
def _log_comb_factor(config): num_unique = np.unique(config).shape[0] swaps = 2**(num_unique - 1) if num_unique == 2: if config[0, 0] != config[1, 1] or config[0, 1] != config[1, 0]: swaps *= 2 log_mult = log_mult_coef(config.flatten()) return log_mult + np.log(swaps)
def _get_hap_comb(hconf, to_add00, to_add01, to_add10, to_add11): runtime = ((1 + min(to_add10, to_add00, hconf[-1, 0], to_add00 + to_add10 - hconf[-1, 0])) * (1 + min(to_add01, to_add11, hconf[-1, 1], to_add01 + to_add11 - hconf[-1, 1]))) transpose_runtime = ( (1 + min(to_add01, to_add00, hconf[0, -1], to_add00 + to_add01 - hconf[0, -1])) * (1 + min(to_add10, to_add11, hconf[1, -1], to_add10 + to_add11 - hconf[1, -1])) ) if runtime > transpose_runtime: hconf = hconf.transpose() to_add01, to_add10 = to_add10, to_add01 to_return = np.NINF imin = max(0, hconf[-1, 0] - to_add00) imax = min(to_add10, hconf[-1, 0]) for i in range(imin, imax + 1): i_coef = np.NINF i_comb = log_mult_coef(np.array([i, hconf[-1, 0] - i])) jmin = max(0, hconf[-1, 1] - to_add11, i + hconf[-1, 1] + hconf[1, -1] - to_add11 - to_add10) jmax = min(to_add00 + to_add01 + i - hconf[-1, 0] - hconf[0, -1], to_add01, hconf[-1, 1]) for j in range(jmin, jmax + 1): this_coef = log_mult_coef( np.array([to_add00 - hconf[-1, 0] + i, to_add01 - j]) ) this_coef += log_mult_coef( np.array([to_add11 - hconf[-1, 1] + j, to_add10 - i]) ) this_coef += log_mult_coef(np.array([ to_add00 + to_add01 + i - j - hconf[-1, 0] - hconf[0, -1], to_add11 + to_add10 + j - i - hconf[-1, 1] - hconf[1, -1]]) ) this_coef += log_mult_coef(np.array([j, hconf[-1, 1] - j])) i_coef = np.logaddexp(i_coef, this_coef) to_return = np.logaddexp(to_return, i_coef + i_comb) return to_return
def test_log_mult_coef(): assert np.allclose(np.log(binom(10, 2)), log_mult_coef(np.array([2, 8]))) assert np.allclose(log_mult_coef(np.array([1, 1, 1])), np.log(6)) assert np.allclose(log_mult_coef(np.array([0, 0, 0, 0])), 0) pytest.raises(ValueError, log_mult_coef, np.array([-1]))