def test_extract_edge_weights(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) x = ddl.tl.extract_edge_weights(vdj) assert x is None x = ddl.tl.extract_edge_weights(vdj, expanded_only=True) assert x is None
def test_diversity_gini2(create_testfolder, metric): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) vdj.data['sample_id'] = 'sample_test' vdj.data['contig_QC_pass'] = '******' ddl.update_metadata(vdj, retrieve=['sample_id', 'contig_QC_pass'], split=False) ddl.tl.clone_diversity(vdj, groupby='sample_id', resample=True, downsample=6, key='sequence', n_resample=5, metric=metric) if metric == 'clone_network' or metric is None: assert not vdj.metadata.clone_network_cluster_size_gini.empty assert not vdj.metadata.clone_network_vertex_size_gini.empty if metric == 'clone_degree': assert not vdj.metadata.clone_degree.empty assert not vdj.metadata.clone_size_gini.empty assert not vdj.metadata.clone_degree_gini.empty if metric == 'clone_centrality': assert not vdj.metadata.clone_centrality.empty assert not vdj.metadata.clone_centrality_gini.empty
def test_clone_size(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.clone_size(vdj) assert not vdj.metadata.clone_id_size.empty ddl.tl.clone_size(vdj, max_size=3) assert not vdj.metadata.clone_id_size.empty
def test_diversity_shannon(create_testfolder, resample, normalize): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) if resample: ddl.tl.clone_diversity(vdj, groupby='sample_id', method='shannon', resample=resample, normalize=normalize, downsample=6) else: ddl.tl.clone_diversity(vdj, groupby='sample_id', method='shannon', resample=resample, normalize=normalize) if normalize: assert not vdj.metadata.clone_size_normalized_shannon.empty else: assert not vdj.metadata.clone_size_shannon.empty tmp = ddl.tl.clone_diversity(vdj, groupby='sample_id', method='shannon', update_obs_meta=False) assert isinstance(tmp, pd.DataFrame)
def test_plot_stackedbar(create_testfolder, norm): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ax = ddl.pl.stackedbarplot(vdj, color='v_call_genotyped_VDJ', groupby='isotype', normalize=norm) assert ax is not None
def test_plot_spectratype(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ax = ddl.pl.spectratype(vdj, color='junction_length', groupby='c_call', locus='IGH') assert ax is not None
def test_filter(): adata = sc.read_10x_h5("tests/sctest.h5") test = ddl.read_h5("tests/test.h5") adata.obs["filter_rna"] = False test, adata = ddl.pp.filter_bcr(test, adata) adata.write("tests/sctest.h5ad", compression="gzip") test.write_h5("tests/test.h5", compression="bzip2") print(test)
def test_find_clones(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.find_clones(vdj) assert not vdj.data.clone_id.empty assert not vdj.metadata.clone_id.empty assert len(set(x for x in vdj.metadata['clone_id'] if pd.notnull(x))) == 4 vdj.write_h5(f)
def test_diversity_gini(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.clone_diversity(vdj, groupby='sample_id') tmp = ddl.tl.clone_diversity(vdj, groupby='sample_id', update_obs_meta=False) assert isinstance(tmp, pd.DataFrame)
def test_create_germlines(): test = ddl.read_h5("tests/test.h5") test.update_germline(germline="database/germlines/imgt/human/vdj/") ddl.pp.create_germlines(test, germline="database/germlines/imgt/human/vdj/", v_field="v_call", germ_types="dmask") test.write_h5("tests/test.h5", compression="bzip2") print(test)
def test_generate_network(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.find_clones(vdj) ddl.tl.generate_network(vdj) assert vdj.distance is not None assert vdj.edges is not None assert vdj.n_obs == 392 assert vdj.layout is not None assert vdj.graph is not None
def test_diversity_gini(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.clone_diversity(vdj, groupby='sample_id') assert not vdj.metadata.clone_network_vertex_size_gini.empty assert not vdj.metadata.clone_network_cluster_size_gini.empty ddl.tl.generate_network(vdj) ddl.tl.clone_diversity(vdj, groupby='sample_id', metric='clone_centrality') assert not vdj.metadata.clone_centrality_gini.empty assert not vdj.metadata.clone_size_gini.empty
def test_transfer(create_testfolder, dummy_adata): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) vdj, adata = ddl.pp.filter_contigs(vdj, dummy_adata) ddl.tl.transfer(dummy_adata, vdj) assert 'clone_id' in dummy_adata.obs ddl.tl.generate_network(vdj) ddl.tl.transfer(dummy_adata, vdj) assert 'X_vdj' in dummy_adata.obsm f2 = create_testfolder / "test.h5ad" dummy_adata.write_h5ad(f2)
def test_setup(create_testfolder, airr_reannotated, dummy_adata): vdj, adata = ddl.pp.filter_contigs(airr_reannotated, dummy_adata) assert airr_reannotated.shape[0] == 9 assert vdj.data.shape[0] == 7 assert vdj.metadata.shape[0] == 4 assert adata.n_obs == 5 f = create_testfolder / "test.h5" vdj.write_h5(f) assert len(list(create_testfolder.iterdir())) == 1 vdj2 = ddl.read_h5(f) assert vdj2.metadata is not None
def test_find_clones_key(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.find_clones(vdj, key_added='test_clone') assert not vdj.metadata.test_clone.empty assert vdj.data.test_clone.dtype == 'object' ddl.tl.generate_network(vdj, clone_key='test_clone') assert vdj.distance is not None assert vdj.edges is None assert vdj.layout is not None assert vdj.graph is not None
def test_plot_bar(create_testfolder, sort, norm): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ax = ddl.pl.barplot(vdj, color='v_call_genotyped_VDJ') assert ax is not None ax = ddl.pl.barplot(vdj, color='v_call_genotyped_VDJ', sort_descending=sort) assert ax is not None ax = ddl.pl.barplot(vdj, color='v_call_genotyped_VDJ', normalize=norm) assert ax is not None
def test_diversity_rarefaction3(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) vdj.data['sample_id'] = 'sample_test' vdj.data['contig_QC_pass'] = '******' ddl.update_metadata(vdj, retrieve=['sample_id', 'contig_QC_pass'], split=False) df = ddl.tl.clone_rarefaction(vdj, groupby='sample_id') assert isinstance(df, dict) p = ddl.pl.clone_rarefaction(vdj, color='sample_id') assert p is not None
def test_diversity2c(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) vdj.data['sample_id'] = 'sample_test' vdj.data['contig_QC_pass'] = '******' ddl.update_metadata(vdj, retrieve=['sample_id', 'contig_QC_pass'], split=False) x = ddl.tl.clone_diversity(vdj, groupby='sample_id', key='sequence', update_obs_meta=False) assert isinstance(x, pd.DataFrame)
def test_diversity2b(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) vdj.data['sample_id'] = 'sample_test' vdj.data['contig_QC_pass'] = '******' ddl.update_metadata(vdj, retrieve=['sample_id', 'contig_QC_pass'], split=False) ddl.tl.clone_diversity(vdj, groupby='sample_id', use_contracted=True, key='sequence') assert not vdj.metadata.clone_network_cluster_size_gini.empty assert not vdj.metadata.clone_network_vertex_size_gini.empty
def test_diversity2a(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) vdj.data['sample_id'] = 'sample_test' vdj.data['contig_QC_pass'] = '******' ddl.update_metadata( vdj, retrieve=['sample_id', 'contig_QC_pass'], retrieve_mode=['merge and unique only', 'merge and unique only']) ddl.tl.clone_diversity(vdj, groupby='sample_id', reconstruct_network=False, key='sequence') assert not vdj.metadata.clone_network_cluster_size_gini.empty assert not vdj.metadata.clone_network_vertex_size_gini.empty
def test_diversity_gini(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.clone_diversity(vdj, groupby='sample_id') assert not vdj.metadata.clone_network_vertex_size_gini.empty assert not vdj.metadata.clone_network_cluster_size_gini.empty ddl.tl.generate_network(vdj) ddl.tl.clone_diversity(vdj, groupby='sample_id', metric='clone_centrality') assert not vdj.metadata.clone_centrality_gini.empty assert not vdj.metadata.clone_size_gini.empty tmp = ddl.tl.clone_diversity(vdj, groupby='sample_id', metric='clone_centrality', update_obs_meta=False) assert isinstance(tmp, pd.DataFrame)
def test_diversity_chao(create_testfolder, resample): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) if resample: ddl.tl.clone_diversity(vdj, groupby='sample_id', method='chao1', resample=resample, downsample=6) else: ddl.tl.clone_diversity(vdj, groupby='sample_id', method='chao1', resample=resample) assert not vdj.metadata.clone_size_chao1.empty
def test_readwrite_h5(create_testfolder): out_file1 = str(create_testfolder) + "/test_airr_reannotated.tsv" out_file2 = str(create_testfolder) + "/test_airr_reannotated.h5" vdj = ddl.Dandelion(out_file1) assert not vdj.data.np1_length.empty assert not vdj.data.np2_length.empty assert not vdj.data.junction_length.empty vdj.write_h5(out_file2) vdj2 = ddl.read_h5(out_file2) assert not vdj2.data.np1_length.empty assert not vdj2.data.np2_length.empty assert not vdj2.data.junction_length.empty vdj.write_h5(out_file2, complib='blosc:lz4') vdj2 = ddl.read_h5(out_file2) assert not vdj2.data.np1_length.empty assert not vdj2.data.np2_length.empty assert not vdj2.data.junction_length.empty vdj.write_h5(out_file2, compression='blosc:lz4') vdj2 = ddl.read_h5(out_file2) assert not vdj2.data.np1_length.empty assert not vdj2.data.np2_length.empty assert not vdj2.data.junction_length.empty with pytest.raises(ValueError): vdj.write_h5(out_file2, complib='blosc:lz4', compression='blosc:lz4')
def test_generate_network(create_testfolder, resample, expected): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) if resample is not None: vdj = ddl.tl.generate_network(vdj, downsample=resample) else: ddl.tl.generate_network(vdj) assert vdj.distance is not None assert vdj.edges is None assert vdj.n_obs == expected assert vdj.layout is not None assert vdj.graph is not None vdj.data['clone_id'] = '1' vdj = ddl.Dandelion(vdj.data) assert vdj.data.clone_id.dtype == 'object' ddl.tl.generate_network(vdj) assert vdj.edges is not None
def test_transfer(): test = ddl.read_h5("tests/test2.h5") adata = sc.read_h5ad("tests/sctest2.h5ad") sc.pp.filter_cells(adata, min_genes=200) sc.pp.filter_genes(adata, min_cells=3) sc.pp.normalize_total(adata, target_sum=1e4) sc.pp.log1p(adata) sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5) adata = adata[:, adata.var["highly_variable"]].copy() sc.pp.scale(adata, max_value=10) sc.tl.pca(adata, svd_solver="arpack") sc.pp.neighbors(adata) ddl.tl.transfer(adata, test) adata.write("tests/sctest2.h5ad", compression="gzip") print(adata)
def test_update_germlines(create_testfolder, processed_files, database_paths, fasta_10x): f = create_testfolder / str('dandelion/' + processed_files['filtered']) vdj = ddl.Dandelion(f) vdj.update_germline(germline=database_paths['germline']) assert len(vdj.germline) > 0 out_file = str(create_testfolder) + "/test_airr_reannotated.h5" vdj.write_h5(out_file) tmp = ddl.read_h5(out_file) assert len(tmp.germline) > 0 vdj.update_germline(germline=database_paths['germline'], corrected=str(create_testfolder) + "/filtered_contig.fasta") assert len(vdj.germline) > 0 vdj.update_germline(germline=database_paths['germline'], corrected=fasta_10x) assert len(vdj.germline) > 0 with pytest.raises(TypeError): vdj.update_germline(germline=database_paths['germline'], corrected=[])
def test_diversity_chao(create_testfolder, resample): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) if resample: ddl.tl.clone_diversity(vdj, groupby='sample_id', method='chao1', resample=resample, downsample=6) else: ddl.tl.clone_diversity(vdj, groupby='sample_id', method='chao1', resample=resample) assert not vdj.metadata.clone_size_chao1.empty tmp = ddl.tl.clone_diversity(vdj, groupby='sample_id', method='chao1', update_obs_meta=False) assert isinstance(tmp, pd.DataFrame)
def test_setup(): file = "https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_1k_multi_5gex_b/sc5p_v2_hs_B_1k_multi_5gex_b_vdj_b_airr_rearrangement.tsv" r = requests.get(file) test_data = pd.read_csv(StringIO(r.text), sep="\t") test_data["locus"] = [ "IGH" if "IGH" in i else "IGK" if "IGK" in i else "IGL" if "IGL" in i else None for i in test_data.v_call ] test_data["umi_count"] = test_data["duplicate_count"] test_data["sample_id"] = "test" test_ddl = ddl.Dandelion(test_data) test_ddl.write_h5("tests/test.h5", compression="bzip2") test_ddl.write_pkl("tests/test.pkl.pbz2") test = ddl.read_h5("tests/test.h5") _ = ddl.read_pkl("tests/test.pkl.pbz2") scfile = "https://cf.10xgenomics.com/samples/cell-vdj/5.0.0/sc5p_v2_hs_B_1k_multi_5gex_b/sc5p_v2_hs_B_1k_multi_5gex_b_count_filtered_feature_bc_matrix.h5" r = requests.get(scfile) open("tests/sctest.h5", "wb").write(r.content) adata = sc.read_10x_h5("tests/sctest.h5") adata.write("tests/sctest.h5ad", compression="gzip") print(test) print(adata)
def test_diversity_gini(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f) ddl.tl.clone_diversity(vdj, groupby='sample_id')
def test_diversity_gini_simple(create_testfolder): f = create_testfolder / "test.h5" vdj = ddl.read_h5(f)