Exemple #1
0
def test_compute_mean_variance_sparse_without_zeros():
    n = 100
    counts = np.random.rand(n, n)
    counts = np.triu(counts)
    counts = counts.T + counts
    lengths = np.array([n])
    _, mean_dense, var_dense, _ = compute_mean_variance(counts,
                                                        lengths,
                                                        use_zero_counts=False)
    counts = np.triu(counts)
    _, mean_sparse, var_sparse, _ = compute_mean_variance(
        sparse.coo_matrix(counts), lengths, use_zero_counts=False)
    assert_array_almost_equal(mean_dense, mean_sparse)
    assert_array_almost_equal(var_dense, var_sparse)
def test_negative_binomial_obj_sparse_dispersion_biased():
    n = 10
    random_state = np.random.RandomState(42)
    X = random_state.rand(n, 3)
    dis = euclidean_distances(X)
    alpha, beta = -3, 1

    counts = beta * dis ** alpha
    _, mean, variance, _ = dispersion.compute_mean_variance(
        counts**2,
        np.array([counts.shape[0]]))
    mean, variance = mean[:-1], variance[:-1]

    d = dispersion.ExponentialDispersion()
    d.fit(mean, variance)

    counts = np.triu(counts)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts)

    obj_sparse = negative_binomial_structure.negative_binomial_obj(
        X, counts, dispersion=d, alpha=alpha, beta=beta)
    obj_dense = negative_binomial_structure.negative_binomial_obj(
        X, counts.toarray(), dispersion=d, alpha=alpha, beta=beta)

    obj_ = negative_binomial_structure.negative_binomial_obj(
        random_state.rand(*X.shape),
        counts, dispersion=d, alpha=alpha, beta=beta)
    assert(obj_sparse < obj_)
    assert_almost_equal(obj_sparse, obj_dense, 6)
def test_estimate_X_biased_dispersion():
    n = 50
    random_state = np.random.RandomState(42)
    X_true = random_state.rand(n, 3) * 10
    dis = euclidean_distances(X_true)
    alpha, beta = -3, 1

    fdis = beta * dis ** alpha
    fdis[np.isinf(fdis)] = 1
    disp = fdis + fdis ** 2
    p = fdis / (fdis + disp)

    counts = random_state.negative_binomial(disp, 1 - p)
    counts = np.triu(counts)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts, dtype=np.float)

    lengths = np.array([counts.shape[0]])
    _, mean, variance, _ = dispersion.compute_mean_variance(counts, lengths)
    mean, variance = mean[:-1], variance[:-1]
    d = dispersion.ExponentialDispersion()
    d.fit(mean, variance)

    X = negative_binomial_structure.estimate_X(counts, alpha, beta,
                                               dispersion=d,
                                               random_state=random_state)
def test_negative_binomial_gradient_sparse_dispersed():
    n = 10
    random_state = np.random.RandomState(42)
    X = random_state.rand(n, 3)
    dis = euclidean_distances(X)
    alpha, beta = -3, 1

    fdis = beta * dis**alpha
    fdis[np.isinf(fdis)] = 0
    dispersion_estimated = fdis + fdis ** 2
    p = fdis / (fdis + dispersion_estimated)

    # counts = random_state.negative_binomial(dispersion, 1 - p)
    # counts = np.triu(counts)
    counts = np.triu(fdis)
    counts[np.arange(len(counts)), np.arange(len(counts))] = 0
    counts = sparse.coo_matrix(counts, dtype=float)

    _, mean, variance, _ = dispersion.compute_mean_variance(
        counts,
        np.array([counts.shape[0]]))
    mean, variance = mean[:-1], variance[:-1]
    d = dispersion.ExponentialDispersion()
    d.fit(mean, variance)

    gradient_sparse = negative_binomial_structure.negative_binomial_gradient(
        X, counts, dispersion=d)
    gradient_dense = negative_binomial_structure.negative_binomial_gradient(
        X, counts.toarray(), dispersion=d)

    assert_array_almost_equal(gradient_dense, gradient_sparse)
    assert_array_almost_equal(
       np.zeros(gradient_sparse.shape), gradient_sparse, -5)
Exemple #5
0
def test_exponential_dispersion():
    n = 100
    counts = np.random.rand(n, n)
    counts = np.triu(counts)
    counts = counts.T + counts

    lengths = np.array([n])
    _, mean_dense, var_dense, _ = compute_mean_variance(counts,
                                                        lengths,
                                                        use_zero_counts=True)

    for degree in [0, 1, 2]:
        dispersion_ = ExponentialDispersion(degree=degree)
        dispersion_.fit(mean_dense, var_dense)
        disp = dispersion_.predict(mean_dense)
        assert disp.shape == mean_dense.shape

        disp_der = dispersion_.predict(mean_dense)
        assert disp_der.shape == mean_dense.shape
Exemple #6
0
###############################################################################
# Normalize the contact count data, but keep the biases to estimate the
# dispersion

counts = iced.filter.filter_low_counts(counts, percentage=0.06)
normed_counts, biases = iced.normalization.ICE_normalization(counts,
                                                             output_bias=True)

###############################################################################
# Now, estimate the variance and mean for every genomic distance
#
# Note that in order to have an unbiased estimation of the variance, you need
# to provide the bias vector.

_, mean, variance, _ = dispersion.compute_mean_variance(counts,
                                                        lengths,
                                                        bias=biases)

###############################################################################
# And now plot the resulting mean versus variance
fig, ax = plt.subplots()
s = ax.scatter(mean, variance, linewidth=0, marker="o", s=20)

ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel("Mean", fontweight="bold")
ax.set_ylabel("Variance", fontweight="bold")
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()

ax.plot(np.arange(1e-1, 1e7, 1e6),