def test__mean_interhomolog_counts_ambig(): lengths = np.array([10, 20]) ploidy = 2 seed = 42 alpha, beta = -3., 1. mean_chrom_coords = np.array([[0, 3, 0], [-4, 0, 0], [0, -3, 0], [4, 0, 0]]) n = lengths.sum() random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = random_state.rand(n * ploidy, 3) begin = end = 0 for i in range(len(lengths) * ploidy): end += np.tile(lengths, ploidy)[i] X_true[begin:end] -= X_true[begin:end].mean(axis=0) X_true[begin:end] += mean_chrom_coords[i] begin = end dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta * dis**alpha counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = np.triu(counts, 1) nchrom = lengths.shape[0] X_true_lowres = decrease_struct_res(X_true, multiscale_factor=lengths.max(), lengths=lengths) dis_lowres = euclidean_distances(X_true_lowres) dis_lowres[np.tril_indices(dis_lowres.shape[0])] = np.nan np.fill_diagonal(dis_lowres[:nchrom, nchrom:], np.nan) approx_interhomo_dis_ambig = np.nanmean(dis_lowres) ambig_counts_raw = counts[:n, :n] + counts[n:, n:] + counts[:n, n:] + \ counts[n:, :n] ambig_counts = _format_counts(counts=sparse.coo_matrix(ambig_counts_raw), lengths=lengths, ploidy=ploidy, beta=beta) bias = 0.1 + random_state.rand(n) ambig_counts_biased = ambig_counts_raw * bias.reshape(-1, 1) * \ bias.reshape(-1, 1).T ambig_counts_biased = _format_counts( counts=sparse.coo_matrix(ambig_counts_biased), lengths=lengths, ploidy=ploidy, beta=beta) mhs_k_ambig = constraints._mean_interhomolog_counts(ambig_counts, lengths=lengths) mhs_k_ambig_biased = constraints._mean_interhomolog_counts( ambig_counts_biased, lengths=lengths, bias=bias) assert_allclose(mhs_k_ambig, mhs_k_ambig_biased) assert_allclose(mhs_k_ambig**(1 / alpha), approx_interhomo_dis_ambig, rtol=1e-1)
def test__mean_interhomolog_counts_unambig(): lengths = np.array([10, 20]) ploidy = 2 seed = 42 alpha, beta = -3., 1. mean_chrom_coords = np.array([[0, 3, 0], [-4, 0, 0], [0, -3, 0], [4, 0, 0]]) n = lengths.sum() random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = random_state.rand(n * ploidy, 3) begin = end = 0 for i in range(len(lengths) * ploidy): end += np.tile(lengths, ploidy)[i] X_true[begin:end] -= X_true[begin:end].mean(axis=0) X_true[begin:end] += mean_chrom_coords[i] begin = end dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta * dis**alpha counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = np.triu(counts, 1) true_interhomo_dis = constraints._inter_homolog_dis(X_true, lengths=lengths) bias = 0.1 + random_state.rand(n) counts_biased = counts * np.tile(bias, 2).reshape(-1, 1) * \ np.tile(bias, 2).reshape(-1, 1).T ua_counts = _format_counts(counts=sparse.coo_matrix(counts), lengths=lengths, ploidy=ploidy, beta=beta) ua_counts_biased = _format_counts(counts=sparse.coo_matrix(counts_biased), lengths=lengths, ploidy=ploidy, beta=beta) mhs_k_ua = constraints._mean_interhomolog_counts(ua_counts, lengths=lengths) mhs_k_ua_biased = constraints._mean_interhomolog_counts(ua_counts_biased, lengths=lengths, bias=bias) assert_allclose(mhs_k_ua, mhs_k_ua_biased) assert_allclose(mhs_k_ua**(1 / alpha), true_interhomo_dis, rtol=1e-2)
def test_bcc_constraint(): lengths = np.array([20]) ploidy = 2 alpha, beta = -3., 1. n = lengths.sum() X_true = np.concatenate([ np.arange(n * ploidy).reshape(-1, 1), np.zeros((n * ploidy, 1)), np.zeros((n * ploidy, 1)) ], axis=1) dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta * dis**alpha counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = np.triu(counts, 1) counts = sparse.coo_matrix(counts) counts = _format_counts(counts=counts, lengths=lengths, ploidy=ploidy, beta=beta) constraint = constraints.Constraints(counts, lengths=lengths, ploidy=ploidy, multiscale_factor=1, constraint_lambdas={'bcc': 1}, constraint_params=None) constraint.check() obj = constraint.apply(X_true)['obj_bcc'] assert obj < 1e-6
def test_estimate_alpha_beta_diploid_unambig(): lengths = np.array([20]) ploidy = 2 seed = 42 alpha_true, beta_true = -3., 2. random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = random_state.rand(n * ploidy, 3) dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta_true * dis**alpha_true counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = np.triu(counts, 1) counts = sparse.coo_matrix(counts) counts = _format_counts(counts=counts, lengths=lengths, ploidy=ploidy, beta=beta_true) alpha, obj, converged, _ = estimate_alpha_beta.estimate_alpha( X=X_true, counts=counts, alpha_init=alpha_true, lengths=lengths) beta = list( estimate_alpha_beta._estimate_beta(X=X_true, counts=counts, alpha=alpha, lengths=lengths, verbose=False).values())[0] assert converged assert obj < -1e4 assert_array_almost_equal(alpha_true, alpha, decimal=5) assert_array_almost_equal(beta_true, beta, decimal=3)
def test_estimate_alpha_beta_diploid_combo(): lengths = np.array([20]) ploidy = 2 seed = 42 alpha_true, beta_true_single = -3., 4. ratio_ambig, ratio_pa, ratio_ua = [1 / 3] * 3 bias = None random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = random_state.rand(n * ploidy, 3) dis = euclidean_distances(X_true) dis[dis == 0] = np.inf poisson_intensity = dis**alpha_true ambig_counts = ratio_ambig * beta_true_single * poisson_intensity ambig_counts[np.isnan(ambig_counts) | np.isinf(ambig_counts)] = 0 ambig_counts = ambig_counts[:n, :n] + ambig_counts[ n:, n:] + ambig_counts[:n, n:] + ambig_counts[n:, :n] ambig_counts = np.triu(ambig_counts, 1) ambig_counts = sparse.coo_matrix(ambig_counts) pa_counts = ratio_pa * beta_true_single * poisson_intensity pa_counts[np.isnan(pa_counts) | np.isinf(pa_counts)] = 0 pa_counts = pa_counts[:, :n] + pa_counts[:, n:] np.fill_diagonal(pa_counts[:n, :], 0) np.fill_diagonal(pa_counts[n:, :], 0) pa_counts = sparse.coo_matrix(pa_counts) ua_counts = ratio_ua * beta_true_single * poisson_intensity ua_counts[np.isnan(ua_counts) | np.isinf(ua_counts)] = 0 ua_counts = np.triu(ua_counts, 1) ua_counts = sparse.coo_matrix(ua_counts) counts_raw = [ambig_counts, pa_counts, ua_counts] beta_true = np.array([ratio_ambig, ratio_pa, ratio_ua]) * beta_true_single counts = _format_counts(counts=counts_raw, lengths=lengths, ploidy=ploidy, beta=beta_true) alpha, obj, converged, _ = estimate_alpha_beta.estimate_alpha( X=X_true, counts=counts, alpha_init=alpha_true, lengths=lengths, bias=bias) beta = list( estimate_alpha_beta._estimate_beta(X=X_true, counts=counts, alpha=alpha, lengths=lengths, bias=bias, verbose=False).values())[0] assert converged assert obj < -1e4 assert_array_almost_equal(alpha_true, alpha, decimal=5) assert_array_almost_equal(beta_true, beta, decimal=3)
def test_poisson_objective_diploid_unambig(): lengths = np.array([20]) ploidy = 2 seed = 42 alpha, beta = -3., 1. random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = random_state.rand(n * ploidy, 3) dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta * dis**alpha counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = np.triu(counts, 1) counts = sparse.coo_matrix(counts) counts = _format_counts(counts=counts, lengths=lengths, ploidy=ploidy, beta=beta) obj = poisson.objective(X=X_true, counts=counts, alpha=alpha, lengths=lengths) assert obj < -1e4
def test_poisson_objective_diploid_partially_ambig_biased(): lengths = np.array([20]) ploidy = 2 seed = 42 alpha, beta = -3., 1. random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = random_state.rand(n * ploidy, 3) dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta * dis**alpha counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = counts[:, :n] + counts[:, n:] np.fill_diagonal(counts[:n, :], 0) np.fill_diagonal(counts[n:, :], 0) bias = 0.1 + random_state.rand(n) counts *= np.tile(bias, 2).reshape(-1, 1) counts *= bias.reshape(-1, 1).T counts = sparse.coo_matrix(counts) counts = _format_counts(counts=counts, lengths=lengths, ploidy=ploidy, beta=beta) obj = poisson.objective(X=X_true, counts=counts, alpha=alpha, lengths=lengths, bias=bias) assert obj < -1e4
def test_estimate_alpha_beta_diploid_partially_ambig_biased(): lengths = np.array([20]) ploidy = 2 seed = 42 alpha_true, beta_true = -3., 4. random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = random_state.rand(n * ploidy, 3) dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta_true * dis**alpha_true counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = counts[:, :n] + counts[:, n:] np.fill_diagonal(counts[:n, :], 0) np.fill_diagonal(counts[n:, :], 0) bias = 0.1 + random_state.rand(n) counts *= np.tile(bias, 2).reshape(-1, 1) counts *= bias.reshape(-1, 1).T counts = sparse.coo_matrix(counts) counts = _format_counts(counts=counts, lengths=lengths, ploidy=ploidy, beta=beta_true) alpha, obj, converged, _ = estimate_alpha_beta.estimate_alpha( X=X_true, counts=counts, alpha_init=alpha_true, lengths=lengths, bias=bias) beta = list( estimate_alpha_beta._estimate_beta(X=X_true, counts=counts, alpha=alpha, lengths=lengths, bias=bias, verbose=False).values())[0] assert converged assert obj < -1e4 assert_array_almost_equal(alpha_true, alpha, decimal=5) assert_array_almost_equal(beta_true, beta, decimal=3)
def test_mhs_constraint(): lengths = np.array([30]) ploidy = 2 seed = 42 true_interhomo_dis = np.array([10.]) alpha, beta = -3., 1. random_state = np.random.RandomState(seed=seed) n = lengths.sum() X_true = np.zeros((n * ploidy, 3), dtype=float) for i in range(X_true.shape[0]): X_true[i:, random_state.choice([0, 1, 2])] += 1 X_true[n:] -= X_true[n:].mean(axis=0) X_true[:n] -= X_true[:n].mean(axis=0) begin = end = 0 for i in range(len(lengths)): end += lengths[i] X_true[begin:end, 0] += true_interhomo_dis[i] begin = end dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta * dis**alpha counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = np.triu(counts, 1) counts = sparse.coo_matrix(counts) counts = _format_counts(counts=counts, lengths=lengths, ploidy=ploidy, beta=beta) mhs_k = constraints._mean_interhomolog_counts(counts, lengths=lengths) constraint = constraints.Constraints(counts, lengths=lengths, ploidy=ploidy, multiscale_factor=1, constraint_lambdas={'mhs': 1}, constraint_params={'mhs': mhs_k}) constraint.check() obj = constraint.apply(X_true, alpha=alpha)['obj_mhs'] assert obj < 1e-3
def test_estimate_alpha_beta_diploid_mhs_constraint(): lengths = np.array([30]) ploidy = 2 seed = 42 true_interhomo_dis = np.array([5.]) alpha_true, beta_true = -3., 4. mhs_lambda = 1 random_state = np.random.RandomState(seed=seed) n = lengths.sum() '''X_true = np.zeros((n * ploidy, 3), dtype=float) for i in range(X_true.shape[0]): X_true[i:, random_state.choice([0, 1, 2])] += 1''' X_true = random_state.rand(n * ploidy, 3) X_true[n:] -= X_true[n:].mean(axis=0) X_true[:n] -= X_true[:n].mean(axis=0) begin = end = 0 for i in range(len(lengths)): end += lengths[i] X_true[begin:end, 0] += true_interhomo_dis[i] begin = end dis = euclidean_distances(X_true) dis[dis == 0] = np.inf counts = beta_true * dis**alpha_true counts[np.isnan(counts) | np.isinf(counts)] = 0 counts = np.triu(counts, 1) counts = sparse.coo_matrix(counts) counts = _format_counts(counts=counts, lengths=lengths, ploidy=ploidy, beta=beta_true) null_counts = [ NullCountsMatrix(counts=counts, lengths=lengths, ploidy=ploidy, multiscale_factor=1) ] mhs_k = _mean_interhomolog_counts(counts, lengths=lengths) constraint = Constraints(counts, lengths=lengths, ploidy=ploidy, multiscale_factor=1, constraint_lambdas={'mhs': mhs_lambda}, constraint_params={'mhs': mhs_k}) constraint.check() alpha, obj, converged, _ = estimate_alpha_beta.estimate_alpha( X=X_true, counts=null_counts, alpha_init=alpha_true, lengths=lengths, constraints=constraint) beta = list( estimate_alpha_beta._estimate_beta(X=X_true, counts=counts, alpha=alpha, lengths=lengths, verbose=False).values())[0] print(alpha, obj, converged, beta) assert converged assert obj < 1e-6 assert_allclose(alpha_true, alpha, rtol=1e-2) assert_allclose(beta_true, beta, rtol=1e-2)