Ejemplo n.º 1
0
def test_read_airr():
    # Test that reading the files one-by-one or at once yields the same results
    anndata_tra = read_airr(TESTDATA / "airr/rearrangement_tra.tsv")
    anndata_trb = read_airr(TESTDATA / "airr/rearrangement_trb.tsv")
    anndata_ig = read_airr(TESTDATA / "airr/rearrangement_ig.tsv")
    anndata = read_airr([
        TESTDATA / "airr/rearrangement_tra.tsv",
        TESTDATA / "airr/rearrangement_trb.tsv",
        TESTDATA / "airr/rearrangement_ig.tsv",
    ])
    tra_cols = [
        "IR_VJ_1_junction_aa",
        "IR_VJ_1_junction",
        "IR_VJ_1_v_call",
        "IR_VJ_1_d_call",
        "IR_VJ_1_j_call",
        "IR_VJ_1_c_call",
        "IR_VJ_1_consensus_count",
    ]
    trb_cols = [x.replace("IR_VJ", "IR_VDJ") for x in tra_cols]
    ig_cols = tra_cols + trb_cols
    pdt.assert_frame_equal(
        anndata.obs.loc[anndata.obs["IR_VJ_1_locus"] == "TRA", tra_cols],
        anndata_tra.obs.loc[:, tra_cols],
        check_categorical=False,  # categories differ, obviously
        check_dtype=False,
    )
    pdt.assert_frame_equal(
        anndata.obs.loc[anndata.obs["IR_VDJ_1_locus"] == "TRB", trb_cols],
        anndata_trb.obs.loc[:, trb_cols],
        check_categorical=False,  # categories differ, obviously
        check_dtype=False,
    )
    pdt.assert_frame_equal(
        anndata.obs.loc[anndata.obs["IR_VDJ_1_locus"] == "IGH", ig_cols],
        anndata_ig.obs.loc[:, ig_cols],
        check_categorical=False,  # categories differ, obviously
        check_dtype=False,
    )

    # test some fundamental values
    assert anndata.obs.shape[0] == 5

    cell1 = anndata.obs.loc["cell1", :]
    cell2 = anndata.obs.loc["cell2", :]
    cell3 = anndata.obs.loc["AAACCTGCAGCGTAAG-1", :]

    assert cell1.name == "cell1"
    assert cell1["IR_VJ_1_junction_aa"] == "CTRPKWESPMVDAFDIW"
    assert cell1["IR_VDJ_2_junction_aa"] == "CQQYDNLQITF"
    assert cell1["IR_VDJ_1_junction_aa"] == "CQQYYHTPYSF"
    assert cell1["IR_VJ_1_locus"] == "TRA"
    assert cell1["IR_VDJ_1_locus"] == "TRB"

    assert cell2.name == "cell2"

    # check that inferring the locus name from genes works
    assert cell3["IR_VJ_1_locus"] == "IGL"
    assert cell3["IR_VDJ_1_locus"] == "IGH"
Ejemplo n.º 2
0
def test_airr_roundtrip_conversion(anndata_from_10x_sample, tmp_path):
    """Test that writing and reading to and from AIRR format results in the
    identity"""
    anndata = anndata_from_10x_sample
    tmp_file = tmp_path / "test.airr.tsv"
    write_airr(anndata, tmp_file)
    anndata2 = read_airr(tmp_file, include_fields=None)
    _normalize_df_types(anndata.obs)
    _normalize_df_types(anndata2.obs)
    pdt.assert_frame_equal(anndata.obs,
                           anndata2.obs,
                           check_dtype=False,
                           check_categorical=False)
Ejemplo n.º 3
0
def test_read_airr():
    # Test that reading the files one-by-one or at once yields the same results
    anndata_tra = read_airr("tests/data/airr/rearrangement_tra.tsv")
    anndata_trb = read_airr("tests/data/airr/rearrangement_trb.tsv")
    anndata = read_airr(
        [
            "tests/data/airr/rearrangement_tra.tsv",
            "tests/data/airr/rearrangement_trb.tsv",
        ]
    )
    tra_cols = [
        "TRA_1_cdr3",
        "TRA_1_cdr3_nt",
        "TRA_1_v_gene",
        "TRA_1_d_gene",
        "TRA_1_j_gene",
        "TRA_1_c_gene",
        "TRA_1_expr",
    ]
    trb_cols = [x.replace("TRA", "TRB") for x in tra_cols]
    pdt.assert_frame_equal(anndata.obs[tra_cols], anndata_tra.obs[tra_cols])
    pdt.assert_frame_equal(anndata.obs[trb_cols], anndata_trb.obs[trb_cols])

    # test some fundamental values
    assert "cell1" in anndata.obs_names and "cell2" in anndata.obs_names
    assert anndata.obs.shape[0] == 3

    cell1 = anndata.obs.loc["cell1", :]
    cell2 = anndata.obs.loc["cell2", :]

    assert cell1.name == "cell1"
    assert cell1["TRA_1_cdr3"] == "CTRPKWESPMVDAFDIW"
    assert cell1["TRB_2_cdr3"] == "CQQYDNLQITF"
    assert cell1["TRB_1_cdr3"] == "CQQYYHTPYSF"

    assert cell2.name == "cell2"
Ejemplo n.º 4
0
def test_read_airr_issue280():
    """Test that reading the example shown in issue #280 works."""
    anndata = read_airr(TESTDATA / "airr" / "tra_issue_280.tsv")
    assert anndata.obs["IR_VDJ_1_junction_aa"][0] == "CASSLGGESQNTLYF"
    assert anndata.obs["IR_VJ_1_junction_aa"][0] == "CAARGNRIFF"