def test_rank_genes_groups_use_raw(): # https://github.com/scverse/scanpy/issues/1929 pbmc = pbmc68k_reduced() assert pbmc.raw is not None sc.tl.rank_genes_groups(pbmc, groupby="bulk_labels", use_raw=True) pbmc = pbmc68k_reduced() del pbmc.raw assert pbmc.raw is None with pytest.raises( ValueError, match="Received `use_raw=True`, but `adata.raw` is empty"): sc.tl.rank_genes_groups(pbmc, groupby="bulk_labels", use_raw=True)
def test_tsne_metric_warning(): pbmc = pbmc68k_reduced() import sklearn with patch.object(sklearn, "__version__", "0.23.0"), pytest.warns( UserWarning, match="Results for non-euclidean metrics changed"): sc.tl.tsne(pbmc, metric="cosine")
def test_matrixplot_obj(image_comparer): save_and_compare_images = image_comparer(ROOT, FIGS, tol=15) adata = pbmc68k_reduced() marker_genes_dict = { "3": ["GNLY", "NKG7"], "1": ["FCER1A"], "2": ["CD3D"], "0": ["FCGR3A"], "4": ["CD79A", "MS4A1"], } plot = sc.pl.matrixplot( adata, marker_genes_dict, 'bulk_labels', use_raw=False, title='added totals', return_fig=True, ) plot.add_totals(sort='descending').style(edge_color='white', edge_lw=0.5).show() save_and_compare_images('master_matrixplot_with_totals') axes = plot.get_axes() assert 'mainplot_ax' in axes, 'mainplot_ax not found in returned axes dict'
def test_graph_metrics_w_constant_values(metric, array_type): # https://github.com/scverse/scanpy/issues/1806 pbmc = pbmc68k_reduced() XT = array_type(pbmc.raw.X.T.copy()) g = pbmc.obsp["connectivities"].copy() const_inds = np.random.choice(XT.shape[0], 10, replace=False) with warnings.catch_warnings(): warnings.simplefilter("ignore", sparse.SparseEfficiencyWarning) XT_zero_vals = XT.copy() XT_zero_vals[const_inds, :] = 0 XT_const_vals = XT.copy() XT_const_vals[const_inds, :] = 42 results_full = metric(g, XT) # TODO: Check for warnings with pytest.warns( UserWarning, match=r"10 variables were constant, will return nan for these"): results_const_zeros = metric(g, XT_zero_vals) with pytest.warns( UserWarning, match=r"10 variables were constant, will return nan for these"): results_const_vals = metric(g, XT_const_vals) assert not np.isnan(results_full).any() np.testing.assert_array_equal(results_const_zeros, results_const_vals) np.testing.assert_array_equal(np.nan, results_const_zeros[const_inds]) np.testing.assert_array_equal(np.nan, results_const_vals[const_inds]) non_const_mask = ~np.isin(np.arange(XT.shape[0]), const_inds) np.testing.assert_array_equal(results_full[non_const_mask], results_const_zeros[non_const_mask])
def test_wilcoxon_symmetry(): pbmc = pbmc68k_reduced() rank_genes_groups( pbmc, groupby="bulk_labels", groups=["CD14+ Monocyte", "Dendritic"], reference="Dendritic", method='wilcoxon', rankby_abs=True, ) assert pbmc.uns["rank_genes_groups"]["params"]["use_raw"] is True stats_mono = (rank_genes_groups_df( pbmc, group="CD14+ Monocyte").drop(columns="names").to_numpy()) rank_genes_groups( pbmc, groupby="bulk_labels", groups=["CD14+ Monocyte", "Dendritic"], reference="CD14+ Monocyte", method='wilcoxon', rankby_abs=True, ) stats_dend = (rank_genes_groups_df( pbmc, group="Dendritic").drop(columns="names").to_numpy()) assert np.allclose(np.abs(stats_mono), np.abs(stats_dend))
def test_emptycat(): pbmc = pbmc68k_reduced() pbmc.obs['louvain'] = pbmc.obs['louvain'].cat.add_categories(['11']) with pytest.raises(ValueError, match=rf"Could not calculate statistics.*{'11'}"): rank_genes_groups(pbmc, groupby='louvain')
def test_morans_i_consistency(): pbmc = pbmc68k_reduced() pbmc.layers["raw"] = pbmc.raw.X.copy() g = pbmc.obsp["connectivities"] assert eq( sc.metrics.morans_i(g, pbmc.obs["percent_mito"]), sc.metrics.morans_i(pbmc, vals=pbmc.obs["percent_mito"]), ) assert eq( # Test that series and vectors return same value sc.metrics.morans_i(g, pbmc.obs["percent_mito"]), sc.metrics.morans_i(g, pbmc.obs["percent_mito"].values), ) np.testing.assert_array_equal( sc.metrics.morans_i(pbmc, obsm="X_pca"), sc.metrics.morans_i(g, pbmc.obsm["X_pca"].T), ) all_genes = sc.metrics.morans_i(pbmc, layer="raw") first_gene = sc.metrics.morans_i(pbmc, vals=pbmc.obs_vector(pbmc.var_names[0], layer="raw")) np.testing.assert_allclose(all_genes[0], first_gene, rtol=1e-5) # Test that results are similar for sparse and dense reps of same data np.testing.assert_allclose( sc.metrics.morans_i(pbmc, layer="raw"), sc.metrics.morans_i(pbmc, vals=pbmc.layers["raw"].T.toarray()), )
def test_recipe_weinreb(): # Just tests for failure for now adata = pbmc68k_reduced().raw.to_adata() adata.X = adata.X.toarray() orig = adata.copy() sc.pp.recipe_weinreb17(adata, log=False, copy=True) assert_equal(orig, adata)
def test_umap_init_dtype(): pbmc = pbmc68k_reduced() pbmc = pbmc[:100, :].copy() sc.tl.umap(pbmc, init_pos=pbmc.obsm["X_pca"][:, :2].astype(np.float32)) embed1 = pbmc.obsm["X_umap"].copy() sc.tl.umap(pbmc, init_pos=pbmc.obsm["X_pca"][:, :2].astype(np.float64)) embed2 = pbmc.obsm["X_umap"].copy() assert_array_almost_equal(embed1, embed2) assert_array_almost_equal(embed1, embed2)
def test_scatter_no_basis_per_var(image_comparer): """Test scatterplot of per-var points with no basis""" save_and_compare_images = image_comparer(ROOT, FIGS, tol=15) pbmc = pbmc68k_reduced() sc.pl.scatter(pbmc, x="AAAGCCTGGCTAAC-1", y="AAATTCGATGCACA-1", use_raw=False) save_and_compare_images("scatter_AAAGCCTGGCTAAC-1_vs_AAATTCGATGCACA-1")
def test_scatter_no_basis_per_obs(image_comparer): """Test scatterplot of per-obs points with no basis""" save_and_compare_images = image_comparer(ROOT, FIGS, tol=15) pbmc = pbmc68k_reduced() sc.pl.scatter(pbmc, x="HES4", y="percent_mito", color="n_genes", use_raw=False) save_and_compare_images("scatter_HES_percent_mito_n_genes")
def _pbmc_scatterplots(): # Wrapped in another fixture to avoid mutation pbmc = pbmc68k_reduced() pbmc.layers["sparse"] = pbmc.raw.X / 2 pbmc.layers["test"] = pbmc.X.copy() + 100 pbmc.var["numbers"] = [str(x) for x in range(pbmc.shape[1])] sc.pp.neighbors(pbmc) sc.tl.tsne(pbmc, random_state=0, n_pcs=30) sc.tl.diffmap(pbmc) return pbmc
def adatas(): pbmc = pbmc68k_reduced() n_split = 500 adata_ref = sc.AnnData(pbmc.X[:n_split, :], obs=pbmc.obs.iloc[:n_split]) adata_new = sc.AnnData(pbmc.X[n_split:, :]) sc.pp.pca(adata_ref) sc.pp.neighbors(adata_ref) sc.tl.umap(adata_ref) return adata_ref, adata_new
def test_scale(): adata = pbmc68k_reduced() adata.X = adata.raw.X v = adata[:, 0 : adata.shape[1] // 2] # Should turn view to copy https://github.com/scverse/anndata/issues/171#issuecomment-508689965 assert v.is_view with pytest.warns(Warning, match="view"): sc.pp.scale(v) assert not v.is_view assert_allclose(v.X.var(axis=0), np.ones(v.shape[1]), atol=0.01) assert_allclose(v.X.mean(axis=0), np.zeros(v.shape[1]), atol=0.00001)
def test_rank_genes_groups(image_comparer, name, fn): save_and_compare_images = image_comparer(ROOT, FIGS, tol=15) pbmc = pbmc68k_reduced() sc.tl.rank_genes_groups(pbmc, 'louvain', n_genes=pbmc.raw.shape[1]) # add gene symbol pbmc.var['symbol'] = pbmc.var.index + "__" with plt.rc_context({"axes.grid": True, "figure.figsize": (4, 4)}): fn(pbmc) save_and_compare_images(f"master_{name}") plt.close()
def test_diffmap(): pbmc = pbmc68k_reduced() sc.tl.diffmap(pbmc) d1 = pbmc.obsm['X_diffmap'].copy() sc.tl.diffmap(pbmc) d2 = pbmc.obsm['X_diffmap'].copy() assert_array_equal(d1, d2) # Checking if specifying random_state works, arrays shouldn't be equal sc.tl.diffmap(pbmc, random_state=1234) d3 = pbmc.obsm['X_diffmap'].copy() assert_raises(AssertionError, assert_array_equal, d1, d3)
def test_deprecate_multicore_tsne(): pbmc = pbmc68k_reduced() with pytest.warns( UserWarning, match="calling tsne with n_jobs > 1 would use MulticoreTSNE"): sc.tl.tsne(pbmc, n_jobs=2) with pytest.warns(FutureWarning, match="Argument `use_fast_tsne` is deprecated"): sc.tl.tsne(pbmc, use_fast_tsne=True) with pytest.warns(UserWarning, match="Falling back to scikit-learn"): sc.tl.tsne(pbmc, use_fast_tsne=True)
def test_scatter_raw(tmp_path): pbmc = pbmc68k_reduced()[:100].copy() raw_pth = tmp_path / "raw.png" x_pth = tmp_path / "X.png" sc.pl.scatter(pbmc, color="HES4", basis="umap", use_raw=True) plt.savefig(raw_pth, dpi=60) plt.close() sc.pl.scatter(pbmc, color="HES4", basis="umap", use_raw=False) plt.savefig(x_pth, dpi=60) plt.close() comp = compare_images(str(raw_pth), str(x_pth), tol=5) assert "Error" in comp, "Plots should change depending on use_raw."
def test_scatter_embedding_groups_and_size(image_comparer): # test that the 'groups' parameter sorts # cells, such that the cells belonging to the groups are # plotted on top. This new ordering requires that the size # vector is also ordered (if given). save_and_compare_images = image_comparer(ROOT, FIGS, tol=15) pbmc = pbmc68k_reduced() sc.pl.embedding( pbmc, 'umap', color=['bulk_labels'], groups=['CD14+ Monocyte', 'Dendritic'], size=(np.arange(pbmc.shape[0]) / 40)**1.7, ) save_and_compare_images('master_embedding_groups_size')
def test_paga_positions_reproducible(): """Check exact reproducibility and effect of random_state on paga positions""" # https://github.com/scverse/scanpy/issues/1859 pbmc = pbmc68k_reduced() sc.tl.paga(pbmc, "bulk_labels") a = pbmc.copy() b = pbmc.copy() c = pbmc.copy() sc.pl.paga(a, show=False, random_state=42) sc.pl.paga(b, show=False, random_state=42) sc.pl.paga(c, show=False, random_state=13) np.testing.assert_array_equal(a.uns["paga"]["pos"], b.uns["paga"]["pos"]) assert a.uns["paga"]["pos"].tolist() != c.uns["paga"]["pos"].tolist()
def test_highly_variable_genes_batches(): adata = pbmc68k_reduced() adata[:100, :100].X = np.zeros((100, 100)) adata.obs['batch'] = ['0' if i < 100 else '1' for i in range(adata.n_obs)] adata_1 = adata[adata.obs.batch.isin(['0']), :] adata_2 = adata[adata.obs.batch.isin(['1']), :] sc.pp.highly_variable_genes( adata, batch_key='batch', flavor='cell_ranger', n_top_genes=200, ) sc.pp.filter_genes(adata_1, min_cells=1) sc.pp.filter_genes(adata_2, min_cells=1) hvg1 = sc.pp.highly_variable_genes(adata_1, flavor='cell_ranger', n_top_genes=200, inplace=False) hvg2 = sc.pp.highly_variable_genes(adata_2, flavor='cell_ranger', n_top_genes=200, inplace=False) assert np.isclose( adata.var['dispersions_norm'][100], 0.5 * hvg1['dispersions_norm'][0] + 0.5 * hvg2['dispersions_norm'][100], ) assert np.isclose( adata.var['dispersions_norm'][101], 0.5 * hvg1['dispersions_norm'][1] + 0.5 * hvg2['dispersions_norm'][101], ) assert np.isclose(adata.var['dispersions_norm'][0], 0.5 * hvg2['dispersions_norm'][0]) colnames = [ 'means', 'dispersions', 'dispersions_norm', 'highly_variable', ] assert np.all(np.isin(colnames, hvg1.columns))
def test_violin(image_comparer): save_and_compare_images = image_comparer(ROOT, FIGS, tol=40) with plt.rc_context(): sc.pl.set_rcParams_defaults() sc.set_figure_params(dpi=50, color_map='viridis') pbmc = pbmc68k_reduced() sc.pl.violin( pbmc, ['n_genes', 'percent_mito', 'n_counts'], stripplot=True, multi_panel=True, jitter=True, show=False, ) save_and_compare_images('master_violin_multi_panel') sc.pl.violin( pbmc, ['n_genes', 'percent_mito', 'n_counts'], ylabel=["foo", "bar", "baz"], groupby='bulk_labels', stripplot=True, multi_panel=True, jitter=True, show=False, rotation=90, ) save_and_compare_images('master_violin_multi_panel_with_groupby') # test use of layer pbmc.layers['negative'] = pbmc.X * -1 sc.pl.violin( pbmc, 'CST3', groupby='bulk_labels', stripplot=True, multi_panel=True, jitter=True, show=False, layer='negative', use_raw=False, rotation=90, ) save_and_compare_images('master_violin_multi_panel_with_layer')
def test_color_cycler(caplog): # https://github.com/scverse/scanpy/issues/1885 import logging pbmc = pbmc68k_reduced() colors = sns.color_palette("deep") cyl = sns.rcmod.cycler('color', sns.color_palette("deep")) with caplog.at_level(logging.WARNING): with plt.rc_context({ 'axes.prop_cycle': cyl, "patch.facecolor": colors[0] }): sc.pl.umap(pbmc, color="phase") plt.show() plt.close() assert caplog.text == ""
def test_rank_genes_groups_plots_n_genes_vs_var_names(tmpdir, func, check_same_image): """\ Checks that passing a negative value for n_genes works, and that passing var_names as a dict works. """ N = 3 pbmc = pbmc68k_reduced().raw.to_adata() groups = pbmc.obs["louvain"].cat.categories[:3] pbmc = pbmc[pbmc.obs["louvain"].isin(groups)][::3].copy() sc.tl.rank_genes_groups(pbmc, groupby="louvain") top_genes = {} bottom_genes = {} for g, subdf in sc.get.rank_genes_groups_df(pbmc, group=groups).groupby("group"): top_genes[g] = list(subdf["names"].head(N)) bottom_genes[g] = list(subdf["names"].tail(N)) positive_n_pth = tmpdir / f"{func.__name__}_positive_n.png" top_genes_pth = tmpdir / f"{func.__name__}_top_genes.png" negative_n_pth = tmpdir / f"{func.__name__}_negative_n.png" bottom_genes_pth = tmpdir / f"{func.__name__}_bottom_genes.png" def wrapped(pth, **kwargs): func(pbmc, groupby="louvain", dendrogram=False, **kwargs) plt.savefig(pth) plt.close() wrapped(positive_n_pth, n_genes=N) wrapped(top_genes_pth, var_names=top_genes) check_same_image(positive_n_pth, top_genes_pth, tol=1) wrapped(negative_n_pth, n_genes=-N) wrapped(bottom_genes_pth, var_names=bottom_genes) check_same_image(negative_n_pth, bottom_genes_pth, tol=1) # Shouldn't be able to pass these together with pytest.raises(ValueError, match="n_genes and var_names are mutually exclusive"): wrapped(tmpdir / "not_written.png", n_genes=N, var_names=top_genes)
def test_wilcoxon_tie_correction(reference): pbmc = pbmc68k_reduced() groups = ['CD14+ Monocyte', 'Dendritic'] groupby = 'bulk_labels' _, groups_masks = select_groups(pbmc, groups, groupby) X = pbmc.raw.X[groups_masks[0]].toarray() mask_rest = groups_masks[1] if reference else ~groups_masks[0] Y = pbmc.raw.X[mask_rest].toarray() # Handle scipy versions if version.parse(scipy.__version__) >= version.parse("1.7.0"): pvals = mannwhitneyu(X, Y, use_continuity=False, alternative='two-sided').pvalue pvals[np.isnan(pvals)] = 1.0 else: # Backwards compat, to drop once we drop scipy < 1.7 n_genes = X.shape[1] pvals = np.zeros(n_genes) for i in range(n_genes): try: _, pvals[i] = mannwhitneyu(X[:, i], Y[:, i], use_continuity=False, alternative='two-sided') except ValueError: pvals[i] = 1 if reference: ref = groups[1] else: ref = 'rest' groups = groups[:1] test_obj = _RankGenes(pbmc, groups, groupby, reference=ref) test_obj.compute_statistics('wilcoxon', tie_correct=True) np.testing.assert_allclose(test_obj.stats[groups[0]]['pvals'], pvals)
def test_column_content(): "uses a larger dataset to test column order and content" adata = pbmc68k_reduced() # test that columns content is correct for obs_df query = ['CST3', 'NKG7', 'GNLY', 'louvain', 'n_counts', 'n_genes'] df = sc.get.obs_df(adata, query) for col in query: assert col in df np.testing.assert_array_equal(query, df.columns) np.testing.assert_array_equal(df[col].values, adata.obs_vector(col)) # test that columns content is correct for var_df cell_ids = list(adata.obs.sample(5).index) query = cell_ids + ['highly_variable', 'dispersions_norm', 'dispersions'] df = sc.get.var_df(adata, query) np.testing.assert_array_equal(query, df.columns) for col in query: np.testing.assert_array_equal(df[col].values, adata.var_vector(col))
def test_rankings(image_comparer): save_and_compare_images = image_comparer(ROOT, FIGS, tol=15) pbmc = pbmc68k_reduced() sc.pp.pca(pbmc) sc.pl.pca_loadings(pbmc) save_and_compare_images('master_pca_loadings') sc.pl.pca_loadings(pbmc, components='1,2,3') save_and_compare_images('master_pca_loadings') sc.pl.pca_loadings(pbmc, components=[1, 2, 3]) save_and_compare_images('master_pca_loadings') sc.pl.pca_loadings(pbmc, include_lowest=False) save_and_compare_images('master_pca_loadings_without_lowest') sc.pl.pca_loadings(pbmc, n_points=10) save_and_compare_images('master_pca_loadings_10_points')
def test_stacked_violin_obj(image_comparer, plt): save_and_compare_images = image_comparer(ROOT, FIGS, tol=26) pbmc = pbmc68k_reduced() markers = { 'T-cell': ['CD3D', 'CD3E', 'IL32'], 'B-cell': ['CD79A', 'CD79B', 'MS4A1'], 'myeloid': ['CST3', 'LYZ'], } plot = sc.pl.stacked_violin( pbmc, markers, 'bulk_labels', use_raw=False, title="return_fig. add_totals", return_fig=True, ) plot.add_totals().style(row_palette='tab20').show() save_and_compare_images('master_stacked_violin_return_fig')
def test_multiple_plots(image_comparer): # only testing stacked_violin, matrixplot and dotplot save_and_compare_images = image_comparer(ROOT, FIGS, tol=15) adata = pbmc68k_reduced() markers = { 'T-cell': ['CD3D', 'CD3E', 'IL32'], 'B-cell': ['CD79A', 'CD79B', 'MS4A1'], 'myeloid': ['CST3', 'LYZ'], } fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 5), gridspec_kw={'wspace': 0.7}) _ = sc.pl.stacked_violin( adata, markers, groupby='bulk_labels', ax=ax1, title='stacked_violin', dendrogram=True, show=False, ) _ = sc.pl.dotplot( adata, markers, groupby='bulk_labels', ax=ax2, title='dotplot', dendrogram=True, show=False, ) _ = sc.pl.matrixplot( adata, markers, groupby='bulk_labels', ax=ax3, title='matrixplot', dendrogram=True, show=False, ) save_and_compare_images('master_multiple_plots')
def test_filter_genes_dispersion_compare_to_seurat(): seurat_hvg_info = pd.read_csv(FILE, sep=' ') pbmc = pbmc68k_reduced() pbmc.X = pbmc.raw.X pbmc.var_names_make_unique() sc.pp.normalize_per_cell(pbmc, counts_per_cell_after=1e4) sc.pp.filter_genes_dispersion( pbmc, flavor='seurat', log=True, subset=False, min_mean=0.0125, max_mean=3, min_disp=0.5, ) np.testing.assert_array_equal(seurat_hvg_info['highly_variable'], pbmc.var['highly_variable']) # (still) Not equal to tolerance rtol=2e-05, atol=2e-05: # np.testing.assert_allclose(4, 3.9999, rtol=2e-05, atol=2e-05) np.testing.assert_allclose( seurat_hvg_info['means'], pbmc.var['means'], rtol=2e-05, atol=2e-05, ) np.testing.assert_allclose( seurat_hvg_info['dispersions'], pbmc.var['dispersions'], rtol=2e-05, atol=2e-05, ) np.testing.assert_allclose( seurat_hvg_info['dispersions_norm'], pbmc.var['dispersions_norm'], rtol=2e-05, atol=2e-05, )