def test_compute_mean_variance_sparse_without_zeros(): n = 100 counts = np.random.rand(n, n) counts = np.triu(counts) counts = counts.T + counts lengths = np.array([n]) _, mean_dense, var_dense, _ = compute_mean_variance(counts, lengths, use_zero_counts=False) counts = np.triu(counts) _, mean_sparse, var_sparse, _ = compute_mean_variance( sparse.coo_matrix(counts), lengths, use_zero_counts=False) assert_array_almost_equal(mean_dense, mean_sparse) assert_array_almost_equal(var_dense, var_sparse)
def test_negative_binomial_obj_sparse_dispersion_biased(): n = 10 random_state = np.random.RandomState(42) X = random_state.rand(n, 3) dis = euclidean_distances(X) alpha, beta = -3, 1 counts = beta * dis ** alpha _, mean, variance, _ = dispersion.compute_mean_variance( counts**2, np.array([counts.shape[0]])) mean, variance = mean[:-1], variance[:-1] d = dispersion.ExponentialDispersion() d.fit(mean, variance) counts = np.triu(counts) counts[np.arange(len(counts)), np.arange(len(counts))] = 0 counts = sparse.coo_matrix(counts) obj_sparse = negative_binomial_structure.negative_binomial_obj( X, counts, dispersion=d, alpha=alpha, beta=beta) obj_dense = negative_binomial_structure.negative_binomial_obj( X, counts.toarray(), dispersion=d, alpha=alpha, beta=beta) obj_ = negative_binomial_structure.negative_binomial_obj( random_state.rand(*X.shape), counts, dispersion=d, alpha=alpha, beta=beta) assert(obj_sparse < obj_) assert_almost_equal(obj_sparse, obj_dense, 6)
def test_estimate_X_biased_dispersion(): n = 50 random_state = np.random.RandomState(42) X_true = random_state.rand(n, 3) * 10 dis = euclidean_distances(X_true) alpha, beta = -3, 1 fdis = beta * dis ** alpha fdis[np.isinf(fdis)] = 1 disp = fdis + fdis ** 2 p = fdis / (fdis + disp) counts = random_state.negative_binomial(disp, 1 - p) counts = np.triu(counts) counts[np.arange(len(counts)), np.arange(len(counts))] = 0 counts = sparse.coo_matrix(counts, dtype=np.float) lengths = np.array([counts.shape[0]]) _, mean, variance, _ = dispersion.compute_mean_variance(counts, lengths) mean, variance = mean[:-1], variance[:-1] d = dispersion.ExponentialDispersion() d.fit(mean, variance) X = negative_binomial_structure.estimate_X(counts, alpha, beta, dispersion=d, random_state=random_state)
def test_negative_binomial_gradient_sparse_dispersed(): n = 10 random_state = np.random.RandomState(42) X = random_state.rand(n, 3) dis = euclidean_distances(X) alpha, beta = -3, 1 fdis = beta * dis**alpha fdis[np.isinf(fdis)] = 0 dispersion_estimated = fdis + fdis ** 2 p = fdis / (fdis + dispersion_estimated) # counts = random_state.negative_binomial(dispersion, 1 - p) # counts = np.triu(counts) counts = np.triu(fdis) counts[np.arange(len(counts)), np.arange(len(counts))] = 0 counts = sparse.coo_matrix(counts, dtype=float) _, mean, variance, _ = dispersion.compute_mean_variance( counts, np.array([counts.shape[0]])) mean, variance = mean[:-1], variance[:-1] d = dispersion.ExponentialDispersion() d.fit(mean, variance) gradient_sparse = negative_binomial_structure.negative_binomial_gradient( X, counts, dispersion=d) gradient_dense = negative_binomial_structure.negative_binomial_gradient( X, counts.toarray(), dispersion=d) assert_array_almost_equal(gradient_dense, gradient_sparse) assert_array_almost_equal( np.zeros(gradient_sparse.shape), gradient_sparse, -5)
def test_exponential_dispersion(): n = 100 counts = np.random.rand(n, n) counts = np.triu(counts) counts = counts.T + counts lengths = np.array([n]) _, mean_dense, var_dense, _ = compute_mean_variance(counts, lengths, use_zero_counts=True) for degree in [0, 1, 2]: dispersion_ = ExponentialDispersion(degree=degree) dispersion_.fit(mean_dense, var_dense) disp = dispersion_.predict(mean_dense) assert disp.shape == mean_dense.shape disp_der = dispersion_.predict(mean_dense) assert disp_der.shape == mean_dense.shape
############################################################################### # Normalize the contact count data, but keep the biases to estimate the # dispersion counts = iced.filter.filter_low_counts(counts, percentage=0.06) normed_counts, biases = iced.normalization.ICE_normalization(counts, output_bias=True) ############################################################################### # Now, estimate the variance and mean for every genomic distance # # Note that in order to have an unbiased estimation of the variance, you need # to provide the bias vector. _, mean, variance, _ = dispersion.compute_mean_variance(counts, lengths, bias=biases) ############################################################################### # And now plot the resulting mean versus variance fig, ax = plt.subplots() s = ax.scatter(mean, variance, linewidth=0, marker="o", s=20) ax.set_xscale("log") ax.set_yscale("log") ax.set_xlabel("Mean", fontweight="bold") ax.set_ylabel("Variance", fontweight="bold") xmin, xmax = ax.get_xlim() ymin, ymax = ax.get_ylim() ax.plot(np.arange(1e-1, 1e7, 1e6),