def test_read_airr(): # Test that reading the files one-by-one or at once yields the same results anndata_tra = read_airr(TESTDATA / "airr/rearrangement_tra.tsv") anndata_trb = read_airr(TESTDATA / "airr/rearrangement_trb.tsv") anndata_ig = read_airr(TESTDATA / "airr/rearrangement_ig.tsv") anndata = read_airr([ TESTDATA / "airr/rearrangement_tra.tsv", TESTDATA / "airr/rearrangement_trb.tsv", TESTDATA / "airr/rearrangement_ig.tsv", ]) tra_cols = [ "IR_VJ_1_junction_aa", "IR_VJ_1_junction", "IR_VJ_1_v_call", "IR_VJ_1_d_call", "IR_VJ_1_j_call", "IR_VJ_1_c_call", "IR_VJ_1_consensus_count", ] trb_cols = [x.replace("IR_VJ", "IR_VDJ") for x in tra_cols] ig_cols = tra_cols + trb_cols pdt.assert_frame_equal( anndata.obs.loc[anndata.obs["IR_VJ_1_locus"] == "TRA", tra_cols], anndata_tra.obs.loc[:, tra_cols], check_categorical=False, # categories differ, obviously check_dtype=False, ) pdt.assert_frame_equal( anndata.obs.loc[anndata.obs["IR_VDJ_1_locus"] == "TRB", trb_cols], anndata_trb.obs.loc[:, trb_cols], check_categorical=False, # categories differ, obviously check_dtype=False, ) pdt.assert_frame_equal( anndata.obs.loc[anndata.obs["IR_VDJ_1_locus"] == "IGH", ig_cols], anndata_ig.obs.loc[:, ig_cols], check_categorical=False, # categories differ, obviously check_dtype=False, ) # test some fundamental values assert anndata.obs.shape[0] == 5 cell1 = anndata.obs.loc["cell1", :] cell2 = anndata.obs.loc["cell2", :] cell3 = anndata.obs.loc["AAACCTGCAGCGTAAG-1", :] assert cell1.name == "cell1" assert cell1["IR_VJ_1_junction_aa"] == "CTRPKWESPMVDAFDIW" assert cell1["IR_VDJ_2_junction_aa"] == "CQQYDNLQITF" assert cell1["IR_VDJ_1_junction_aa"] == "CQQYYHTPYSF" assert cell1["IR_VJ_1_locus"] == "TRA" assert cell1["IR_VDJ_1_locus"] == "TRB" assert cell2.name == "cell2" # check that inferring the locus name from genes works assert cell3["IR_VJ_1_locus"] == "IGL" assert cell3["IR_VDJ_1_locus"] == "IGH"
def test_airr_roundtrip_conversion(anndata_from_10x_sample, tmp_path): """Test that writing and reading to and from AIRR format results in the identity""" anndata = anndata_from_10x_sample tmp_file = tmp_path / "test.airr.tsv" write_airr(anndata, tmp_file) anndata2 = read_airr(tmp_file, include_fields=None) _normalize_df_types(anndata.obs) _normalize_df_types(anndata2.obs) pdt.assert_frame_equal(anndata.obs, anndata2.obs, check_dtype=False, check_categorical=False)
def test_read_airr(): # Test that reading the files one-by-one or at once yields the same results anndata_tra = read_airr("tests/data/airr/rearrangement_tra.tsv") anndata_trb = read_airr("tests/data/airr/rearrangement_trb.tsv") anndata = read_airr( [ "tests/data/airr/rearrangement_tra.tsv", "tests/data/airr/rearrangement_trb.tsv", ] ) tra_cols = [ "TRA_1_cdr3", "TRA_1_cdr3_nt", "TRA_1_v_gene", "TRA_1_d_gene", "TRA_1_j_gene", "TRA_1_c_gene", "TRA_1_expr", ] trb_cols = [x.replace("TRA", "TRB") for x in tra_cols] pdt.assert_frame_equal(anndata.obs[tra_cols], anndata_tra.obs[tra_cols]) pdt.assert_frame_equal(anndata.obs[trb_cols], anndata_trb.obs[trb_cols]) # test some fundamental values assert "cell1" in anndata.obs_names and "cell2" in anndata.obs_names assert anndata.obs.shape[0] == 3 cell1 = anndata.obs.loc["cell1", :] cell2 = anndata.obs.loc["cell2", :] assert cell1.name == "cell1" assert cell1["TRA_1_cdr3"] == "CTRPKWESPMVDAFDIW" assert cell1["TRB_2_cdr3"] == "CQQYDNLQITF" assert cell1["TRB_1_cdr3"] == "CQQYYHTPYSF" assert cell2.name == "cell2"
def test_read_airr_issue280(): """Test that reading the example shown in issue #280 works.""" anndata = read_airr(TESTDATA / "airr" / "tra_issue_280.tsv") assert anndata.obs["IR_VDJ_1_junction_aa"][0] == "CASSLGGESQNTLYF" assert anndata.obs["IR_VJ_1_junction_aa"][0] == "CAARGNRIFF"