def cell_cycle(): adata = utils.create_adata_dummy() adata_int = adata.copy() # only final score implementation score = me.cell_cycle(adata, adata_int, batch_key='batch', organism='mouse', agg_func=np.mean, verbose=True) print(f"score: {score}") assert score == 1 # get all intermediate scores scores_df = me.cell_cycle(adata, adata_int, batch_key='batch', organism='mouse', agg_func=None, verbose=True) print(f"score: {scores_df}") assert isinstance(scores_df, pd.DataFrame) for i in scores_df['score']: assert i == 1
def silhouette(): adata = utils.create_adata_dummy(pca=True, n_top_genes=2000) score = me.silhouette(adata, group_key='celltype', embed='X_pca', scale=True) print(f"score: {score}") assert score >= 0 assert score <= 1
def test_all_metrics(): adata = utils.create_adata_dummy() adata_int = adata.copy() for ot in ["full", "embed", "knn"]: all_metrics(adata, adata_int, script="metrics.py", type_=ot, method="orig")
def pcr_comparison(): verbose = True # no PCA precomputed adata = utils.create_adata_dummy() adata_int = adata.copy() score = me.pcr_comparison(adata, adata_int, covariate='batch', n_comps=50, scale=True, verbose=verbose) print(f"no PCA precomputed: {score}") assert score < 1e-6 # use different embedding adata = utils.create_adata_dummy() adata_int = adata.copy() utils.add_emb(adata_int, type_='full') score = me.pcr_comparison(adata, adata_int, covariate='batch', embed='X_emb', n_comps=50, scale=True, verbose=verbose) print(f"using embedding: {score}") assert score >= 0 assert score <= 1 assert score < 1e-6 # precomputed PCA adata = utils.create_adata_dummy(pca=True, n_top_genes=2000) adata_int = adata.copy() score = me.pcr_comparison(adata, adata_int, covariate='batch', scale=True, verbose=verbose) print(f"precomputed PCA: {score}") assert score == 0 # same PCA values -> difference should be 0
def silhouette_batch(): adata = utils.create_adata_dummy(pca=True, n_top_genes=2000) _, sil = me.silhouette_batch(adata, batch_key='batch', group_key='celltype', embed='X_pca', scale=True, verbose=False) score = sil['silhouette_score'].mean() print(f"score: {score}") assert score >= 0 assert score <= 1
def test_cluster(): adata = utils.create_adata_dummy(pca=True, n_top_genes=2000, neighbors=True) _, _, score_all, clustering = cl.opt_louvain(adata, label_key='celltype', cluster_key='cluster', plot=True, inplace=False) assert isinstance(score_all, pd.DataFrame) assert isinstance(clustering, pd.Series)
def metrics_all_methods(): adata = utils.create_adata_dummy() methods = { 'scanorama': runScanorama, 'trvae': runTrVae, 'seurat': runSeurat, 'harmony': runHarmony, 'mnn': runMNN, 'bbknn': runBBKNN, 'conos': runConos, 'scvi': runScvi }
def ari(): adata = utils.create_adata_dummy(pca=True, n_top_genes=2000, neighbors=True) # trivial score score = scIB.me.ari(adata, 'celltype', 'celltype') assert score == 1 # on cell type cluster(adata, cluster_key='cluster', label_key='celltype') score = me.ari(adata, group1='cluster', group2='celltype') print(f"score: {score}") assert score >= 0 assert score <= 1
def isolated_labels(): adata = utils.create_adata_dummy(pca=True, n_top_genes=2000, neighbors=True) # test 2 different implementations of score for impl in [True, False]: score = me.isolated_labels(adata, label_key='celltype', batch_key='batch', cluster=impl, n=4, verbose=True) print(f"score: {score}") assert score <= 1 assert score >= 0
def nmi(): adata = utils.create_adata_dummy(pca=True, n_top_genes=2000, neighbors=True) # trivial score score = scIB.me.nmi(adata, 'celltype', 'celltype') assert score == 1 # on cell type _, _, nmi_all = cluster(adata, cluster_key='cluster', label_key='celltype', verbose=True) for score in nmi_all['score']: print(score) assert score >= 0 assert score <= 1
def hvg_overlap(): adata = utils.create_adata_dummy() adata_int = adata.copy() score = me.hvg_overlap(adata_int, adata, batch='batch', n_hvg=500) print(f"score: {score}") assert score == 1
def setup_test_directory(methods): """ create necessary files for a test directory TODO: use fixtures TODO: create environments :params methods: list of method names to be used """ methods = [methods] if isinstance(methods, str) else methods data_dir = os.path.abspath(f"./pipeline-{'_'.join(methods)}") create_if_missing(data_dir) print(f"created {data_dir}") # create input and output directories input_dir = os.path.join(data_dir, "input") create_if_missing(input_dir) output_dir = os.path.join(data_dir, "output") create_if_missing(output_dir) # write data files input_adata_file = os.path.join(input_dir, "adata_raw.h5ad") if not os.path.isfile(input_adata_file): adata = utils.create_adata_dummy(pca=True, n_top_genes=2000, neighbors=True) adata.write(input_adata_file) # write config file config = { "ROOT": output_dir, "r_env": "benchmarking_data_integration_dev", "py_env": "benchmarking_data_integration_dev", "conv_env": "benchmarking_data_integration_dev", "timing": False, "FEATURE_SELECTION": { "hvg": 2000, "full_feature": 0 }, "SCALING": ["unscaled", "scaled"], "METHODS": {k: METHODS[k] for k in methods}, "DATA_SCENARIOS": { "test_data": { "batch_key": "batch", "label_key": "celltype", "organism": "mouse", "assay": "expression", "file": input_adata_file } } } config_file = os.path.join(data_dir, "config.json") with open(config_file, 'w') as f: f.write(json.dumps(config, indent=4)) workdir = pathlib.Path(scIB.__file__).parent.parent return { "workdir": workdir, "config": config, "configfile": config_file, "data_dir": data_dir, "input_dir": input_dir, "output_dir": output_dir }