예제 #1
0
파일: test_io.py 프로젝트: jsleescgl/scirpy
def test_read_10x():
    anndata = read_10x_vdj("tests/data/10x/all_contig_annotations.json")
    obs = anndata.obs
    # this has `is_cell=false` and should be filtered out
    assert "AAACCTGAGACCTTTG-1" not in anndata.obs_names
    assert obs.shape[0] == 2
    cell1 = obs.iloc[0, :]
    cell2 = obs.iloc[1, :]

    assert cell1.name == "AAACCTGAGACCTTTG-2"
    assert cell1["TRB_1_cdr3"] == "CASSPPSQGLSTGELFF"
    assert (
        cell1["TRB_1_cdr3_nt"] == "TGTGCCAGCTCACCACCGAGCCAGGGCCTTTCTACCGGGGAGCTGTTTTTT"
    )
    assert cell1["TRB_1_junction_ins"] == 4 + 7
    assert cell1["TRB_1_expr"] == 1
    assert cell1["TRB_1_v_gene"] == "TRBV18"
    assert cell1["TRB_1_d_gene"] == "TRBD1"
    assert cell1["TRB_1_j_gene"] == "TRBJ2-2"
    assert cell1["TRB_1_c_gene"] == "TRBC2"
    assert _is_false(cell1["multi_chain"])
    assert np.all(_is_na(cell1[["TRA_1_cdr3", "TRB_2_cdr3", "TRA_1_junction_ins"]]))

    assert cell2.name == "AAACCTGAGTACGCCC-1"
    assert cell2["TRA_1_cdr3"] == "CAMRVGGSQGNLIF"
    assert cell2["TRA_2_cdr3"] == "CATDAKDSNYQLIW"
    assert cell2["TRA_1_expr"] == 9
    assert cell2["TRA_2_expr"] == 4
    assert np.all(_is_na(cell2[["TRB_1_cdr3", "TRB_2_cdr3"]]))
    assert cell2["TRA_1_junction_ins"] == 4
    assert cell2["TRA_2_junction_ins"] == 4
예제 #2
0
def test_read_10x_csv():
    anndata = read_10x_vdj(TESTDATA / "10x/filtered_contig_annotations.csv")
    obs = anndata.obs
    assert obs.shape[0] == 5
    cell1 = obs.iloc[1, :]
    cell2 = obs.iloc[3, :]
    cell3 = obs.iloc[4, :]

    assert cell1.name == "AAACCTGAGTACGCCC-1"
    assert cell1["IR_VDJ_1_cdr3"] == "CASSLGPSTDTQYF"
    assert cell1[
        "IR_VDJ_1_cdr3_nt"] == "TGTGCCAGCAGCTTGGGACCTAGCACAGATACGCAGTATTTT"
    assert _is_na(cell1["IR_VDJ_1_junction_ins"])
    assert cell1["IR_VDJ_1_expr"] == 55
    assert cell1["IR_VDJ_1_v_gene"] == "TRBV7-2"
    assert cell1["IR_VDJ_1_d_gene"] == "TRBD2"
    assert cell1["IR_VDJ_1_j_gene"] == "TRBJ2-3"
    assert cell1["IR_VDJ_1_c_gene"] == "TRBC2"
    assert _is_false(cell1["multi_chain"])
    assert cell1["IR_VJ_1_locus"] == "TRA"
    assert cell1["IR_VDJ_1_locus"] == "TRB"

    assert cell2.name == "AAACCTGGTCCGTTAA-1"
    assert cell2["IR_VJ_1_cdr3"] == "CALNTGGFKTIF"
    assert cell2["IR_VJ_2_cdr3"] == "CAVILDARLMF"
    assert cell2["IR_VJ_1_expr"] == 5
    assert cell2["IR_VJ_2_expr"] == 5
    assert cell2["IR_VJ_1_locus"] == "TRA"
    assert cell2["IR_VDJ_1_locus"] == "TRB"
    assert cell2["IR_VJ_2_locus"] == "TRA"
    assert _is_na(cell2["IR_VDJ_2_cdr3"])

    assert cell3.name == "AAACTTGGTCCGTTAA-1"
    assert cell3["IR_VJ_1_locus"] == "IGK"
    assert cell3["IR_VDJ_1_locus"] == "IGH"
예제 #3
0
def test_is_na():
    warnings.filterwarnings("error")
    assert _is_na(None)
    assert _is_na(np.nan)
    assert _is_na("nan")
    assert not _is_na(42)
    assert not _is_na("Foobar")
    assert not _is_na(dict())
    array_test = np.array(["None", "nan", None, np.nan, "foobar"])
    array_expect = np.array([True, True, True, True, False])
    array_test_bool = np.array([True, False, True])
    array_expect_bool = np.array([False, False, False])

    npt.assert_equal(_is_na(array_test), array_expect)
    npt.assert_equal(_is_na(pd.Series(array_test)), array_expect)

    npt.assert_equal(_is_na(array_test_bool), array_expect_bool)
    npt.assert_equal(_is_na(pd.Series(array_test_bool)), array_expect_bool)
예제 #4
0
def test_read_10x():
    anndata = read_10x_vdj(TESTDATA / "10x/all_contig_annotations.json",
                           include_fields=None)
    obs = anndata.obs
    # this has `is_cell=false` and should be filtered out
    assert "AAACCTGAGACCTTTG-1" not in anndata.obs_names
    assert obs.shape[0] == 3
    cell1 = obs.iloc[0, :]
    cell2 = obs.iloc[1, :]
    cell3 = obs.iloc[2, :]

    assert cell1.name == "AAACCTGAGACCTTTG-2"
    assert cell1["IR_VDJ_1_junction_aa"] == "CASSPPSQGLSTGELFF"
    assert (cell1["IR_VDJ_1_junction"] ==
            "TGTGCCAGCTCACCACCGAGCCAGGGCCTTTCTACCGGGGAGCTGTTTTTT")
    assert cell1["IR_VDJ_1_np1_length"] == 4
    assert cell1["IR_VDJ_1_np2_length"] == 7
    assert cell1["IR_VDJ_1_duplicate_count"] == 1
    assert cell1["IR_VDJ_1_consensus_count"] == 494
    assert cell1["IR_VDJ_1_v_call"] == "TRBV18"
    assert cell1["IR_VDJ_1_d_call"] == "TRBD1"
    assert cell1["IR_VDJ_1_j_call"] == "TRBJ2-2"
    assert cell1["IR_VDJ_1_c_call"] == "TRBC2"
    assert _is_false(cell1["multi_chain"])
    assert np.all(
        _is_na(cell1[[
            "IR_VJ_1_junction_aa", "IR_VDJ_2_junction_aa", "IR_VJ_1_np1_length"
        ]]))

    assert cell2.name == "AAACCTGAGTACGCCC-1"
    assert cell2["IR_VJ_1_junction_aa"] == "CAMRVGGSQGNLIF"
    assert cell2["IR_VJ_2_junction_aa"] == "CATDAKDSNYQLIW"
    assert cell2["IR_VJ_1_duplicate_count"] == 9
    assert cell2["IR_VJ_2_duplicate_count"] == 4
    assert np.all(
        _is_na(cell2[["IR_VDJ_1_junction_aa", "IR_VDJ_2_junction_aa"]]))
    assert cell2["IR_VJ_1_np1_length"] == 4
    assert _is_na(cell2["IR_VJ_1_np2_length"])
    assert cell2["IR_VJ_2_np1_length"] == 4
    assert _is_na(cell2["IR_VJ_2_np2_length"])

    assert cell3.name == "CAGGTGCTCGTGGTCG-1"
    assert cell3["IR_VJ_1_locus"] == "IGK"
    assert _is_na(cell3["IR_VJ_2_locus"])  # non-productive
    assert cell3["IR_VDJ_1_locus"] == "IGH"
    assert _is_na(cell3["IR_VDJ_2_locus"])  # non-productive
예제 #5
0
def test_read_bracer():
    anndata = read_bracer(TESTDATA / "bracer/changeodb.tab")
    assert "SRR10788834" in anndata.obs.index
    assert anndata.obs.shape[0] == 6

    cell1 = anndata.obs.loc["SRR10779208", :]
    cell2 = anndata.obs.loc["SRR10788834", :]

    assert cell1.name == "SRR10779208"
    assert cell1["IR_VJ_1_locus"] == "IGK"
    assert cell1["IR_VDJ_1_locus"] == "IGH"
    assert cell1["IR_VDJ_1_j_call"] == "IGHJ4"
    assert cell1["IR_VDJ_1_junction"] == "TGTGCGACGATGACGGGGGGTGACCTTGACTACTGG"
    assert cell1["IR_VDJ_1_junction_aa"] == "CATMTGGDLDYW"
    assert cell1["IR_VJ_1_np1_length"] == 1
    assert _is_na(cell1["IR_VJ_1_np2_length"])

    assert cell2.name == "SRR10788834"
    assert cell2["IR_VDJ_1_junction_aa"] == "CARDHIVVLEPTPKRYGMDVW"
    assert (cell2["IR_VDJ_1_junction"] ==
            "TGTGCGAGAGATCATATTGTAGTCTTGGAACCTACCCCTAAGAGATACGGTATGGACGTCTGG")
    assert cell2["IR_VDJ_1_np1_length"] == 2
    assert cell2["IR_VDJ_1_np2_length"] == 22