コード例 #1
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_extract_edge_weights(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    x = ddl.tl.extract_edge_weights(vdj)
    assert x is None
    x = ddl.tl.extract_edge_weights(vdj, expanded_only=True)
    assert x is None
コード例 #2
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity_gini2(create_testfolder, metric):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    vdj.data['sample_id'] = 'sample_test'
    vdj.data['contig_QC_pass'] = '******'
    ddl.update_metadata(vdj,
                        retrieve=['sample_id', 'contig_QC_pass'],
                        split=False)
    ddl.tl.clone_diversity(vdj,
                           groupby='sample_id',
                           resample=True,
                           downsample=6,
                           key='sequence',
                           n_resample=5,
                           metric=metric)
    if metric == 'clone_network' or metric is None:
        assert not vdj.metadata.clone_network_cluster_size_gini.empty
        assert not vdj.metadata.clone_network_vertex_size_gini.empty
    if metric == 'clone_degree':
        assert not vdj.metadata.clone_degree.empty
        assert not vdj.metadata.clone_size_gini.empty
        assert not vdj.metadata.clone_degree_gini.empty
    if metric == 'clone_centrality':
        assert not vdj.metadata.clone_centrality.empty
        assert not vdj.metadata.clone_centrality_gini.empty
コード例 #3
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_clone_size(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.clone_size(vdj)
    assert not vdj.metadata.clone_id_size.empty
    ddl.tl.clone_size(vdj, max_size=3)
    assert not vdj.metadata.clone_id_size.empty
コード例 #4
0
def test_diversity_shannon(create_testfolder, resample, normalize):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    if resample:
        ddl.tl.clone_diversity(vdj,
                               groupby='sample_id',
                               method='shannon',
                               resample=resample,
                               normalize=normalize,
                               downsample=6)
    else:
        ddl.tl.clone_diversity(vdj,
                               groupby='sample_id',
                               method='shannon',
                               resample=resample,
                               normalize=normalize)
    if normalize:
        assert not vdj.metadata.clone_size_normalized_shannon.empty
    else:
        assert not vdj.metadata.clone_size_shannon.empty
    tmp = ddl.tl.clone_diversity(vdj,
                                 groupby='sample_id',
                                 method='shannon',
                                 update_obs_meta=False)
    assert isinstance(tmp, pd.DataFrame)
コード例 #5
0
ファイル: test_plotting.py プロジェクト: zktuong/dandelion
def test_plot_stackedbar(create_testfolder, norm):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ax = ddl.pl.stackedbarplot(vdj,
                               color='v_call_genotyped_VDJ',
                               groupby='isotype',
                               normalize=norm)
    assert ax is not None
コード例 #6
0
ファイル: test_plotting.py プロジェクト: zktuong/dandelion
def test_plot_spectratype(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ax = ddl.pl.spectratype(vdj,
                            color='junction_length',
                            groupby='c_call',
                            locus='IGH')
    assert ax is not None
コード例 #7
0
ファイル: test_airr_input.py プロジェクト: grst/dandelion
def test_filter():
    adata = sc.read_10x_h5("tests/sctest.h5")
    test = ddl.read_h5("tests/test.h5")
    adata.obs["filter_rna"] = False
    test, adata = ddl.pp.filter_bcr(test, adata)
    adata.write("tests/sctest.h5ad", compression="gzip")
    test.write_h5("tests/test.h5", compression="bzip2")
    print(test)
コード例 #8
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_find_clones(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.find_clones(vdj)
    assert not vdj.data.clone_id.empty
    assert not vdj.metadata.clone_id.empty
    assert len(set(x for x in vdj.metadata['clone_id'] if pd.notnull(x))) == 4
    vdj.write_h5(f)
コード例 #9
0
def test_diversity_gini(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.clone_diversity(vdj, groupby='sample_id')
    tmp = ddl.tl.clone_diversity(vdj,
                                 groupby='sample_id',
                                 update_obs_meta=False)
    assert isinstance(tmp, pd.DataFrame)
コード例 #10
0
ファイル: test_airr_input.py プロジェクト: grst/dandelion
def test_create_germlines():
    test = ddl.read_h5("tests/test.h5")
    test.update_germline(germline="database/germlines/imgt/human/vdj/")
    ddl.pp.create_germlines(test,
                            germline="database/germlines/imgt/human/vdj/",
                            v_field="v_call",
                            germ_types="dmask")
    test.write_h5("tests/test.h5", compression="bzip2")
    print(test)
コード例 #11
0
ファイル: test_mouse.py プロジェクト: ktpolanski/dandelion
def test_generate_network(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.find_clones(vdj)
    ddl.tl.generate_network(vdj)
    assert vdj.distance is not None
    assert vdj.edges is not None
    assert vdj.n_obs == 392
    assert vdj.layout is not None
    assert vdj.graph is not None
コード例 #12
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity_gini(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.clone_diversity(vdj, groupby='sample_id')
    assert not vdj.metadata.clone_network_vertex_size_gini.empty
    assert not vdj.metadata.clone_network_cluster_size_gini.empty
    ddl.tl.generate_network(vdj)
    ddl.tl.clone_diversity(vdj, groupby='sample_id', metric='clone_centrality')
    assert not vdj.metadata.clone_centrality_gini.empty
    assert not vdj.metadata.clone_size_gini.empty
コード例 #13
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_transfer(create_testfolder, dummy_adata):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    vdj, adata = ddl.pp.filter_contigs(vdj, dummy_adata)
    ddl.tl.transfer(dummy_adata, vdj)
    assert 'clone_id' in dummy_adata.obs
    ddl.tl.generate_network(vdj)
    ddl.tl.transfer(dummy_adata, vdj)
    assert 'X_vdj' in dummy_adata.obsm
    f2 = create_testfolder / "test.h5ad"
    dummy_adata.write_h5ad(f2)
コード例 #14
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_setup(create_testfolder, airr_reannotated, dummy_adata):
    vdj, adata = ddl.pp.filter_contigs(airr_reannotated, dummy_adata)
    assert airr_reannotated.shape[0] == 9
    assert vdj.data.shape[0] == 7
    assert vdj.metadata.shape[0] == 4
    assert adata.n_obs == 5
    f = create_testfolder / "test.h5"
    vdj.write_h5(f)
    assert len(list(create_testfolder.iterdir())) == 1
    vdj2 = ddl.read_h5(f)
    assert vdj2.metadata is not None
コード例 #15
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_find_clones_key(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.find_clones(vdj, key_added='test_clone')
    assert not vdj.metadata.test_clone.empty
    assert vdj.data.test_clone.dtype == 'object'
    ddl.tl.generate_network(vdj, clone_key='test_clone')
    assert vdj.distance is not None
    assert vdj.edges is None
    assert vdj.layout is not None
    assert vdj.graph is not None
コード例 #16
0
ファイル: test_plotting.py プロジェクト: zktuong/dandelion
def test_plot_bar(create_testfolder, sort, norm):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ax = ddl.pl.barplot(vdj, color='v_call_genotyped_VDJ')
    assert ax is not None
    ax = ddl.pl.barplot(vdj,
                        color='v_call_genotyped_VDJ',
                        sort_descending=sort)
    assert ax is not None
    ax = ddl.pl.barplot(vdj, color='v_call_genotyped_VDJ', normalize=norm)
    assert ax is not None
コード例 #17
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity_rarefaction3(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    vdj.data['sample_id'] = 'sample_test'
    vdj.data['contig_QC_pass'] = '******'
    ddl.update_metadata(vdj,
                        retrieve=['sample_id', 'contig_QC_pass'],
                        split=False)
    df = ddl.tl.clone_rarefaction(vdj, groupby='sample_id')
    assert isinstance(df, dict)
    p = ddl.pl.clone_rarefaction(vdj, color='sample_id')
    assert p is not None
コード例 #18
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity2c(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    vdj.data['sample_id'] = 'sample_test'
    vdj.data['contig_QC_pass'] = '******'
    ddl.update_metadata(vdj,
                        retrieve=['sample_id', 'contig_QC_pass'],
                        split=False)
    x = ddl.tl.clone_diversity(vdj,
                               groupby='sample_id',
                               key='sequence',
                               update_obs_meta=False)
    assert isinstance(x, pd.DataFrame)
コード例 #19
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity2b(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    vdj.data['sample_id'] = 'sample_test'
    vdj.data['contig_QC_pass'] = '******'
    ddl.update_metadata(vdj,
                        retrieve=['sample_id', 'contig_QC_pass'],
                        split=False)
    ddl.tl.clone_diversity(vdj,
                           groupby='sample_id',
                           use_contracted=True,
                           key='sequence')
    assert not vdj.metadata.clone_network_cluster_size_gini.empty
    assert not vdj.metadata.clone_network_vertex_size_gini.empty
コード例 #20
0
def test_diversity2a(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    vdj.data['sample_id'] = 'sample_test'
    vdj.data['contig_QC_pass'] = '******'
    ddl.update_metadata(
        vdj,
        retrieve=['sample_id', 'contig_QC_pass'],
        retrieve_mode=['merge and unique only', 'merge and unique only'])
    ddl.tl.clone_diversity(vdj,
                           groupby='sample_id',
                           reconstruct_network=False,
                           key='sequence')
    assert not vdj.metadata.clone_network_cluster_size_gini.empty
    assert not vdj.metadata.clone_network_vertex_size_gini.empty
コード例 #21
0
def test_diversity_gini(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.clone_diversity(vdj, groupby='sample_id')
    assert not vdj.metadata.clone_network_vertex_size_gini.empty
    assert not vdj.metadata.clone_network_cluster_size_gini.empty
    ddl.tl.generate_network(vdj)
    ddl.tl.clone_diversity(vdj, groupby='sample_id', metric='clone_centrality')
    assert not vdj.metadata.clone_centrality_gini.empty
    assert not vdj.metadata.clone_size_gini.empty
    tmp = ddl.tl.clone_diversity(vdj,
                                 groupby='sample_id',
                                 metric='clone_centrality',
                                 update_obs_meta=False)
    assert isinstance(tmp, pd.DataFrame)
コード例 #22
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity_chao(create_testfolder, resample):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    if resample:
        ddl.tl.clone_diversity(vdj,
                               groupby='sample_id',
                               method='chao1',
                               resample=resample,
                               downsample=6)
    else:
        ddl.tl.clone_diversity(vdj,
                               groupby='sample_id',
                               method='chao1',
                               resample=resample)
    assert not vdj.metadata.clone_size_chao1.empty
コード例 #23
0
ファイル: test_io.py プロジェクト: ktpolanski/dandelion
def test_readwrite_h5(create_testfolder):
    out_file1 = str(create_testfolder) + "/test_airr_reannotated.tsv"
    out_file2 = str(create_testfolder) + "/test_airr_reannotated.h5"
    vdj = ddl.Dandelion(out_file1)
    assert not vdj.data.np1_length.empty
    assert not vdj.data.np2_length.empty
    assert not vdj.data.junction_length.empty
    vdj.write_h5(out_file2)
    vdj2 = ddl.read_h5(out_file2)
    assert not vdj2.data.np1_length.empty
    assert not vdj2.data.np2_length.empty
    assert not vdj2.data.junction_length.empty
    vdj.write_h5(out_file2, complib='blosc:lz4')
    vdj2 = ddl.read_h5(out_file2)
    assert not vdj2.data.np1_length.empty
    assert not vdj2.data.np2_length.empty
    assert not vdj2.data.junction_length.empty
    vdj.write_h5(out_file2, compression='blosc:lz4')
    vdj2 = ddl.read_h5(out_file2)
    assert not vdj2.data.np1_length.empty
    assert not vdj2.data.np2_length.empty
    assert not vdj2.data.junction_length.empty
    with pytest.raises(ValueError):
        vdj.write_h5(out_file2, complib='blosc:lz4', compression='blosc:lz4')
コード例 #24
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_generate_network(create_testfolder, resample, expected):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    if resample is not None:
        vdj = ddl.tl.generate_network(vdj, downsample=resample)
    else:
        ddl.tl.generate_network(vdj)
    assert vdj.distance is not None
    assert vdj.edges is None
    assert vdj.n_obs == expected
    assert vdj.layout is not None
    assert vdj.graph is not None
    vdj.data['clone_id'] = '1'
    vdj = ddl.Dandelion(vdj.data)
    assert vdj.data.clone_id.dtype == 'object'
    ddl.tl.generate_network(vdj)
    assert vdj.edges is not None
コード例 #25
0
ファイル: test_preprocess.py プロジェクト: grst/dandelion
def test_transfer():
    test = ddl.read_h5("tests/test2.h5")
    adata = sc.read_h5ad("tests/sctest2.h5ad")
    sc.pp.filter_cells(adata, min_genes=200)
    sc.pp.filter_genes(adata, min_cells=3)
    sc.pp.normalize_total(adata, target_sum=1e4)
    sc.pp.log1p(adata)
    sc.pp.highly_variable_genes(adata,
                                min_mean=0.0125,
                                max_mean=3,
                                min_disp=0.5)
    adata = adata[:, adata.var["highly_variable"]].copy()
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, svd_solver="arpack")
    sc.pp.neighbors(adata)
    ddl.tl.transfer(adata, test)
    adata.write("tests/sctest2.h5ad", compression="gzip")
    print(adata)
コード例 #26
0
def test_update_germlines(create_testfolder, processed_files, database_paths,
                          fasta_10x):
    f = create_testfolder / str('dandelion/' + processed_files['filtered'])
    vdj = ddl.Dandelion(f)
    vdj.update_germline(germline=database_paths['germline'])
    assert len(vdj.germline) > 0
    out_file = str(create_testfolder) + "/test_airr_reannotated.h5"
    vdj.write_h5(out_file)
    tmp = ddl.read_h5(out_file)
    assert len(tmp.germline) > 0
    vdj.update_germline(germline=database_paths['germline'],
                        corrected=str(create_testfolder) +
                        "/filtered_contig.fasta")
    assert len(vdj.germline) > 0
    vdj.update_germline(germline=database_paths['germline'],
                        corrected=fasta_10x)
    assert len(vdj.germline) > 0
    with pytest.raises(TypeError):
        vdj.update_germline(germline=database_paths['germline'], corrected=[])
コード例 #27
0
def test_diversity_chao(create_testfolder, resample):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    if resample:
        ddl.tl.clone_diversity(vdj,
                               groupby='sample_id',
                               method='chao1',
                               resample=resample,
                               downsample=6)
    else:
        ddl.tl.clone_diversity(vdj,
                               groupby='sample_id',
                               method='chao1',
                               resample=resample)
    assert not vdj.metadata.clone_size_chao1.empty
    tmp = ddl.tl.clone_diversity(vdj,
                                 groupby='sample_id',
                                 method='chao1',
                                 update_obs_meta=False)
    assert isinstance(tmp, pd.DataFrame)
コード例 #28
0
ファイル: test_airr_input.py プロジェクト: grst/dandelion
def test_setup():
    file = "https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_1k_multi_5gex_b/sc5p_v2_hs_B_1k_multi_5gex_b_vdj_b_airr_rearrangement.tsv"
    r = requests.get(file)
    test_data = pd.read_csv(StringIO(r.text), sep="\t")
    test_data["locus"] = [
        "IGH" if "IGH" in i else
        "IGK" if "IGK" in i else "IGL" if "IGL" in i else None
        for i in test_data.v_call
    ]
    test_data["umi_count"] = test_data["duplicate_count"]
    test_data["sample_id"] = "test"
    test_ddl = ddl.Dandelion(test_data)
    test_ddl.write_h5("tests/test.h5", compression="bzip2")
    test_ddl.write_pkl("tests/test.pkl.pbz2")
    test = ddl.read_h5("tests/test.h5")
    _ = ddl.read_pkl("tests/test.pkl.pbz2")
    scfile = "https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_1k_multi_5gex_b/sc5p_v2_hs_B_1k_multi_5gex_b_count_filtered_feature_bc_matrix.h5"
    r = requests.get(scfile)
    open("tests/sctest.h5", "wb").write(r.content)
    adata = sc.read_10x_h5("tests/sctest.h5")
    adata.write("tests/sctest.h5ad", compression="gzip")
    print(test)
    print(adata)
コード例 #29
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity_gini(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)
    ddl.tl.clone_diversity(vdj, groupby='sample_id')
コード例 #30
0
ファイル: test_tools.py プロジェクト: zktuong/dandelion
def test_diversity_gini_simple(create_testfolder):
    f = create_testfolder / "test.h5"
    vdj = ddl.read_h5(f)