def test_filter_genes():
    """
    Ensure genes with no expression are pre-filtered
    """
    # Simulate some data
    N_CELLS = 100
    N_DIM = 10
    N_GENES = 10
    N_GENES_ZERO = 5

    latent = sim_data.sim_latent(N_CELLS, N_DIM)
    latent = pd.DataFrame(latent)

    umi_counts = sim_data.sim_umi_counts(N_CELLS, 2000, 200)
    umi_counts = pd.Series(umi_counts)

    gene_exp = np.random.rand(N_GENES + N_GENES_ZERO, N_CELLS)
    gene_exp[N_GENES:] = 0
    gene_exp = pd.DataFrame(
        gene_exp,
        index=['Gene{}'.format(i + 1) for i in range(gene_exp.shape[0])],
        columns=latent.index)

    hs = Hotspot(gene_exp,
                 model='normal',
                 latent=latent,
                 umi_counts=umi_counts)

    assert hs.counts.shape[0] == N_GENES
Exemple #2
0
def test_local_correlation():
    N_CELLS = 1000
    N_DIM = 10

    latent = sim_data.sim_latent(N_CELLS, N_DIM)
    latent = pd.DataFrame(latent)

    umi_counts = sim_data.sim_umi_counts(N_CELLS, 2000, 200)
    umi_counts = pd.Series(umi_counts)

    neighbors, weights = neighbors_and_weights(latent,
                                               n_neighbors=30,
                                               neighborhood_factor=3)
    neighbors = neighbors.values
    weights = weights.values
    weights = make_weights_non_redundant(neighbors, weights)

    counts_i = np.random.randn(N_CELLS)

    gxy = local_cov_pair(counts_i, counts_i, neighbors, weights)

    g = local_cov_weights(counts_i, neighbors, weights)

    assert math.isclose(
        g, gxy, rel_tol=1e-10
    ), "Pairwise covariance on (x, x) should be same as local covariance on (x)"
def test_local_autocorrelation_centered():
    """
    Test if the expected moment calculation is correct
    """

    # Simulate some data
    N_CELLS = 1000
    N_DIM = 10

    latent = sim_data.sim_latent(N_CELLS, N_DIM)
    latent = pd.DataFrame(latent)

    umi_counts = sim_data.sim_umi_counts(N_CELLS, 2000, 200)
    umi_counts = pd.Series(umi_counts)

    neighbors, weights = neighbors_and_weights(latent,
                                               n_neighbors=30,
                                               neighborhood_factor=3)
    neighbors = neighbors.values
    weights = weights.values
    weights = make_weights_non_redundant(neighbors, weights)

    Wtot2 = (weights**2).sum()

    # Simulate counts
    for gene_p in [.2, 1, 5, 10]:
        N_REPS = 10000

        mu, var, x2 = bernoulli_model.true_params_scaled(
            gene_p, umi_counts.values)

        Gs = []
        for i in range(N_REPS):

            counts_i = sim_data.sim_counts_bernoulli(N_CELLS,
                                                     umi_counts.values, gene_p)

            counts_i = center_values(counts_i, mu, var)

            g = local_stats.local_cov_weights(counts_i, neighbors, weights)
            Gs.append(g)

        Gs = np.array(Gs)

        EG = 0
        EG2 = Wtot2
        EstdG = (EG2 - EG**2)**0.5

        Gmean = Gs.mean()
        Gstd = Gs.std()

        assert math.isclose(
            0, abs(Gmean / Gstd), abs_tol=5e-2
        ), "EG is off for gene_p={}, Actual={:.2f}, Expected={:.2f}".format(
            gene_p, Gmean, EG)
        assert math.isclose(
            EstdG, Gstd, rel_tol=5e-2
        ), "stdG is off for gene_p={}, Actual={:.2f}, Expected={:.2f}".format(
            gene_p, Gstd, EstdG)
def test_models():
    """
    Ensure each model runs
    """

    # Simulate some data
    N_CELLS = 100
    N_DIM = 10
    N_GENES = 10

    latent = sim_data.sim_latent(N_CELLS, N_DIM)
    latent = pd.DataFrame(
        latent, index=['Cell{}'.format(i + 1) for i in range(N_CELLS)])

    umi_counts = sim_data.sim_umi_counts(N_CELLS, 2000, 200)
    umi_counts = pd.Series(umi_counts)

    gene_exp = np.random.rand(N_GENES, N_CELLS)
    gene_exp = pd.DataFrame(
        gene_exp,
        index=['Gene{}'.format(i + 1) for i in range(gene_exp.shape[0])],
        columns=latent.index)

    for model in ['danb', 'bernoulli', 'normal', 'none']:
        hs = Hotspot(gene_exp,
                     model=model,
                     latent=latent,
                     umi_counts=umi_counts)
        hs.create_knn_graph(False, n_neighbors=30)
        hs.compute_hotspot()

        assert isinstance(hs.results, pd.DataFrame)
        assert hs.results.shape[0] == N_GENES

        hs.compute_autocorrelations()

        assert isinstance(hs.results, pd.DataFrame)
        assert hs.results.shape[0] == N_GENES

        hs.compute_local_correlations(gene_exp.index)

        assert isinstance(hs.local_correlation_z, pd.DataFrame)
        assert hs.local_correlation_z.shape[0] == N_GENES
        assert hs.local_correlation_z.shape[1] == N_GENES

        hs.create_modules(min_gene_threshold=2, fdr_threshold=1)

        assert isinstance(hs.modules, pd.Series)
        assert (hs.modules.index & gene_exp.index).size == N_GENES

        assert isinstance(hs.linkage, np.ndarray)
        assert hs.linkage.shape == (N_GENES - 1, 4)

        hs.calculate_module_scores()

        assert isinstance(hs.module_scores, pd.DataFrame)
        assert (hs.module_scores.index == gene_exp.columns).all()