Exemple #1
0
def cell_cycle():
    adata = utils.create_adata_dummy()
    adata_int = adata.copy()

    # only final score implementation
    score = me.cell_cycle(adata,
                          adata_int,
                          batch_key='batch',
                          organism='mouse',
                          agg_func=np.mean,
                          verbose=True)
    print(f"score: {score}")
    assert score == 1

    # get all intermediate scores
    scores_df = me.cell_cycle(adata,
                              adata_int,
                              batch_key='batch',
                              organism='mouse',
                              agg_func=None,
                              verbose=True)
    print(f"score: {scores_df}")
    assert isinstance(scores_df, pd.DataFrame)
    for i in scores_df['score']:
        assert i == 1
Exemple #2
0
def silhouette():
    adata = utils.create_adata_dummy(pca=True, n_top_genes=2000)
    score = me.silhouette(adata,
                          group_key='celltype',
                          embed='X_pca',
                          scale=True)
    print(f"score: {score}")
    assert score >= 0
    assert score <= 1
Exemple #3
0
def test_all_metrics():
    adata = utils.create_adata_dummy()
    adata_int = adata.copy()

    for ot in ["full", "embed", "knn"]:
        all_metrics(adata,
                    adata_int,
                    script="metrics.py",
                    type_=ot,
                    method="orig")
Exemple #4
0
def pcr_comparison():

    verbose = True

    # no PCA precomputed
    adata = utils.create_adata_dummy()
    adata_int = adata.copy()
    score = me.pcr_comparison(adata,
                              adata_int,
                              covariate='batch',
                              n_comps=50,
                              scale=True,
                              verbose=verbose)
    print(f"no PCA precomputed: {score}")
    assert score < 1e-6

    # use different embedding
    adata = utils.create_adata_dummy()
    adata_int = adata.copy()
    utils.add_emb(adata_int, type_='full')
    score = me.pcr_comparison(adata,
                              adata_int,
                              covariate='batch',
                              embed='X_emb',
                              n_comps=50,
                              scale=True,
                              verbose=verbose)
    print(f"using embedding: {score}")
    assert score >= 0
    assert score <= 1
    assert score < 1e-6

    # precomputed PCA
    adata = utils.create_adata_dummy(pca=True, n_top_genes=2000)
    adata_int = adata.copy()
    score = me.pcr_comparison(adata,
                              adata_int,
                              covariate='batch',
                              scale=True,
                              verbose=verbose)
    print(f"precomputed PCA: {score}")
    assert score == 0  # same PCA values -> difference should be 0
Exemple #5
0
def silhouette_batch():
    adata = utils.create_adata_dummy(pca=True, n_top_genes=2000)
    _, sil = me.silhouette_batch(adata,
                                 batch_key='batch',
                                 group_key='celltype',
                                 embed='X_pca',
                                 scale=True,
                                 verbose=False)
    score = sil['silhouette_score'].mean()
    print(f"score: {score}")
    assert score >= 0
    assert score <= 1
Exemple #6
0
def test_cluster():
    adata = utils.create_adata_dummy(pca=True,
                                     n_top_genes=2000,
                                     neighbors=True)

    _, _, score_all, clustering = cl.opt_louvain(adata,
                                                 label_key='celltype',
                                                 cluster_key='cluster',
                                                 plot=True,
                                                 inplace=False)
    assert isinstance(score_all, pd.DataFrame)
    assert isinstance(clustering, pd.Series)
Exemple #7
0
def metrics_all_methods():
    adata = utils.create_adata_dummy()

    methods = {
        'scanorama': runScanorama,
        'trvae': runTrVae,
        'seurat': runSeurat,
        'harmony': runHarmony,
        'mnn': runMNN,
        'bbknn': runBBKNN,
        'conos': runConos,
        'scvi': runScvi
    }
Exemple #8
0
def ari():

    adata = utils.create_adata_dummy(pca=True,
                                     n_top_genes=2000,
                                     neighbors=True)

    # trivial score
    score = scIB.me.ari(adata, 'celltype', 'celltype')
    assert score == 1

    # on cell type
    cluster(adata, cluster_key='cluster', label_key='celltype')
    score = me.ari(adata, group1='cluster', group2='celltype')
    print(f"score: {score}")
    assert score >= 0
    assert score <= 1
Exemple #9
0
def isolated_labels():
    adata = utils.create_adata_dummy(pca=True,
                                     n_top_genes=2000,
                                     neighbors=True)

    # test 2 different implementations of score
    for impl in [True, False]:
        score = me.isolated_labels(adata,
                                   label_key='celltype',
                                   batch_key='batch',
                                   cluster=impl,
                                   n=4,
                                   verbose=True)
        print(f"score: {score}")
        assert score <= 1
        assert score >= 0
Exemple #10
0
def nmi():

    adata = utils.create_adata_dummy(pca=True,
                                     n_top_genes=2000,
                                     neighbors=True)

    # trivial score
    score = scIB.me.nmi(adata, 'celltype', 'celltype')
    assert score == 1

    # on cell type
    _, _, nmi_all = cluster(adata,
                            cluster_key='cluster',
                            label_key='celltype',
                            verbose=True)
    for score in nmi_all['score']:
        print(score)
        assert score >= 0
        assert score <= 1
Exemple #11
0
def hvg_overlap():
    adata = utils.create_adata_dummy()
    adata_int = adata.copy()
    score = me.hvg_overlap(adata_int, adata, batch='batch', n_hvg=500)
    print(f"score: {score}")
    assert score == 1
Exemple #12
0
def setup_test_directory(methods):
    """
    create necessary files for a test directory
    TODO: use fixtures
    TODO: create environments
    :params methods: list of method names to be used
    """
    methods = [methods] if isinstance(methods, str) else methods
    data_dir = os.path.abspath(f"./pipeline-{'_'.join(methods)}")
    create_if_missing(data_dir)
    print(f"created {data_dir}")

    # create input and output directories
    input_dir = os.path.join(data_dir, "input")
    create_if_missing(input_dir)
    output_dir = os.path.join(data_dir, "output")
    create_if_missing(output_dir)

    # write data files
    input_adata_file = os.path.join(input_dir, "adata_raw.h5ad")
    if not os.path.isfile(input_adata_file):
        adata = utils.create_adata_dummy(pca=True,
                                         n_top_genes=2000,
                                         neighbors=True)
        adata.write(input_adata_file)

    # write config file
    config = {
        "ROOT": output_dir,
        "r_env": "benchmarking_data_integration_dev",
        "py_env": "benchmarking_data_integration_dev",
        "conv_env": "benchmarking_data_integration_dev",
        "timing": False,
        "FEATURE_SELECTION": {
            "hvg": 2000,
            "full_feature": 0
        },
        "SCALING": ["unscaled", "scaled"],
        "METHODS": {k: METHODS[k]
                    for k in methods},
        "DATA_SCENARIOS": {
            "test_data": {
                "batch_key": "batch",
                "label_key": "celltype",
                "organism": "mouse",
                "assay": "expression",
                "file": input_adata_file
            }
        }
    }
    config_file = os.path.join(data_dir, "config.json")
    with open(config_file, 'w') as f:
        f.write(json.dumps(config, indent=4))

    workdir = pathlib.Path(scIB.__file__).parent.parent

    return {
        "workdir": workdir,
        "config": config,
        "configfile": config_file,
        "data_dir": data_dir,
        "input_dir": input_dir,
        "output_dir": output_dir
    }