Exemple #1
0
def test_ng_tcrs_BOnly():
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["B"],
                   dist_a=da,
                   dist_b=db)
    tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(),
                  organism=ts.organism,
                  chains=ts.chains,
                  epitopes=ts.epitopes,
                  db_file="alphabeta_db.tsv")
    ng_tcrs = dict()
    # Default Behavior
    for chain in tm.chains:  #['A','B']:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    assert isinstance(ng_tcrs['B'], dict)
    assert set(ng_tcrs['B'].keys()) == set([
        'TRBV29*01', 'TRBV16*01', 'TRBV13-2*01', 'TRBV13-1*01', 'TRBV3*01',
        'TRBV3*02', 'TRBV17*01', 'TRBV15*01', 'TRBV19*01', 'TRBV19*03',
        'TRBV4*01', 'TRBV4*02', 'TRBV23*01', 'TRBV13-2*05', 'TRBV13-3*01',
        'TRBV12-1*01', 'TRBV12-2*01', 'TRBV2*01', 'TRBV21*01', 'TRBV5*01',
        'TRBV8*01', 'TRBV1*01', 'TRBV1*02', 'TRBV26*02', 'TRBV24*01',
        'TRBV26*01', 'TRBV14*01', 'TRBV24*02', 'TRBV31*01', 'TRBV5*05',
        'TRBV9*01', 'TRBV20*01', 'TRBV30*01'
    ])
Exemple #2
0
def test_complete_example_without_motifs_step():

    import pandas as pd
    import numpy as np
    import tcrdist as td

    from tcrdist import mappers
    from tcrdist.repertoire import TCRrep
    from tcrdist.cdr3_motif import TCRMotif
    from tcrdist.subset import TCRsubset
    from tcrdist.storage import StoreIOMotif, StoreIOEntropy
    from tcrdist.plotting import plot_pwm

    tcrdist_clone_fn = 'tcrdist/test_files/mouse_pairseqs_v1_parsed_seqs_probs_mq20_clones.tsv'
    tcrdist_clone_df = pd.read_csv(tcrdist_clone_fn, sep="\t")  #1

    ind = (tcrdist_clone_df.epitope == "PA") | (tcrdist_clone_df.epitope
                                                == "F2")
    tcrdist_clone_df = tcrdist_clone_df[ind].copy()

    mapping = mappers.tcrdist_clone_df_to_tcrdist2_mapping  #3
    tcrdist2_df = mappers.generic_pandas_mapper(
        df=tcrdist_clone_df,  #4
        mapping=mapping)
    #1
    tr = TCRrep(cell_df=tcrdist2_df, organism="mouse")

    #2
    tr.infer_cdrs_from_v_gene(chain='alpha', imgt_aligned=True)
    tr.infer_cdrs_from_v_gene(chain='beta', imgt_aligned=True)

    #3
    tr.index_cols = [
        'clone_id', 'subject', 'epitope', 'v_a_gene', 'j_a_gene', 'v_b_gene',
        'j_b_gene', 'cdr3_a_aa', 'cdr3_b_aa', 'cdr1_a_aa', 'cdr2_a_aa',
        'pmhc_a_aa', 'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq',
        'cdr3_a_nucseq', 'va_countreps', 'ja_countreps', 'vb_countreps',
        'jb_countreps', 'va_gene', 'vb_gene', 'ja_gene', 'jb_gene'
    ]

    #4
    tr.deduplicate()

    #5
    tr._tcrdist_legacy_method_alpha_beta()

    #6
    distA = tr.dist_a
    distB = tr.dist_b
    assert np.all(((distA + distB) - tr.paired_tcrdist) == 0)

    # 1
    criteria = tr.clone_df.epitope == "PA"
    clone_df_subset = tr.clone_df[criteria]

    # 2
    distA_subset = distA.loc[clone_df_subset.clone_id,
                             clone_df_subset.clone_id].copy()
    distB_subset = distB.loc[clone_df_subset.clone_id,
                             clone_df_subset.clone_id].copy()

    # 3
    ts = TCRsubset(clone_df_subset,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["A", "B"],
                   dist_a=distA_subset,
                   dist_b=distB_subset)

    tm = TCRMotif(
        ts.tcr_motif_clones_df(),  #1
        organism="mouse",
        chains=["A"],
        epitopes=["PA"])

    cnames = [
        "file_type", "count", "expect_random", "expect_nextgen", "chi_squared",
        "nfixed", "showmotif", "num", "othernum", "overlap", "ep", "ab",
        "nseqs", "v_rep_counts", "j_rep_counts"
    ]

    motif_fn = 'tcrdist/test_files/mouse_pairseqs_v1_parsed_seqs_probs_mq20_clones_cdr3_motifs_PA.log'
    x = open(motif_fn, "r").readlines()
    motif_df = pd.DataFrame([l.split() for l in x], columns=cnames)
    i = 0
    row = motif_df.iloc[i, :].to_dict()

    from tcrdist.plotting import plot_pwm
    from tcrdist.storage import StoreIOMotif, StoreIOEntropy
    import pandas as pd

    # 1
    StoreIOMotif_instance = StoreIOMotif(**row)
    StoreIOMotif_instance._coerce_attrs()
    assert StoreIOMotif_instance._validate_attrs()
    # 2
    StoreIOMotif_instance = ts.analyze_motif(s=StoreIOMotif_instance)
    # 3
    StoreIOMotif_instance = ts.analyze_matches(s=StoreIOMotif_instance)
    svg = plot_pwm(StoreIOMotif_instance,
                   create_file=False,
                   my_height=200,
                   my_width=600)
Exemple #3
0
def test_ng_tcrs_AOnly():
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["A"],
                   dist_a=da,
                   dist_b=None)
    tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(),
                  organism=ts.organism,
                  chains=ts.chains,
                  epitopes=ts.epitopes,
                  db_file="alphabeta_db.tsv")
    ng_tcrs = dict()
    # Default Behavior
    for chain in tm.chains:  #['A','B']:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    assert isinstance(
        ng_tcrs['A'], dict
    )  #== set(['TRGV2*01', 'TRGV5P*01', 'TRGV5P*02', 'TRGV8*01', 'TRGV4*01', 'TRGV11*01', 'TRGV10*01', 'TRGV1*01', 'TRGV3*01', 'TRGV5*01', 'TRGV9*02', 'TRGV9*01'])
    assert set(ng_tcrs['A'].keys()) == set([
        'TRAV14D-3/DV8*02', 'TRAV14-1*02', 'TRAV14-2*01', 'TRAV14D-1*01',
        'TRAV14D-2*01', 'TRAV14D-2*02', 'TRAV14-3*01', 'TRAV14-3*02',
        'TRAV14D-3/DV8*01', 'TRAV6-7/DV9*02', 'TRAV6-6*01', 'TRAV6-7/DV9*01',
        'TRAV6-7/DV9*04', 'TRAV6D-6*01', 'TRAV6-5*01', 'TRAV6D-7*01',
        'TRAV13-1*01', 'TRAV13D-4*01', 'TRAV13-3*01', 'TRAV10*01', 'TRAV2*01',
        'TRAV6-4*01', 'TRAV3D-3*02', 'TRAV3-1*01', 'TRAV3-4*01', 'TRAV4D-4*03',
        'TRAV4D-3*01', 'TRAV4D-4*01', 'TRAV4-3*01', 'TRAV4-2*01',
        'TRAV4-4/DV10*01', 'TRAV4D-3*03', 'TRAV5-4*01', 'TRAV5D-4*02',
        'TRAV3-3*01', 'TRAV12-3*04', 'TRAV4D-4*04', 'TRAV13-4/DV7*03',
        'TRAV16*01', 'TRAV9N-3*01', 'TRAV9N-2*01', 'TRAV9D-4*03',
        'TRAV9D-3*01', 'TRAV9-1*01', 'TRAV9D-2*01', 'TRAV9D-4*04',
        'TRAV6-1*01', 'TRAV14-1*01', 'TRAV7-2*01', 'TRAV12D-1*01',
        'TRAV12-2*01', 'TRAV12N-3*01', 'TRAV16D/DV11*03', 'TRAV12-1*01',
        'TRAV12-3*01', 'TRAV12D-3*02', 'TRAV12D-2*01', 'TRAV8-2*01',
        'TRAV13D-2*01', 'TRAV13-4/DV7*02', 'TRAV19*01', 'TRAV21/DV12*01',
        'TRAV7-5*01', 'TRAV7-5*03', 'TRAV11*01', 'TRAV11N*01', 'TRAV11*02',
        'TRAV12D-1*05', 'TRAV4D-4*02', 'TRAV7-6*02', 'TRAV7-3*01',
        'TRAV6D-6*03', 'TRAV7-3*02', 'TRAV15D-2/DV6D-2*04',
        'TRAV15-2/DV6-2*02', 'TRAV15-2/DV6-2*01', 'TRAV15D-2/DV6D-2*01',
        'TRAV15D-2/DV6D-2*02', 'TRAV15D-1/DV6D-1*04', 'TRAV15-1/DV6-1*01',
        'TRAV17*02', 'TRAV17*01', 'TRAV12-1*05', 'TRAV7N-5*01',
        'TRAV15D-1/DV6D-1*01', 'TRAV7-4*01', 'TRAV12D-2*02', 'TRAV9-4*01',
        'TRAV9-3*01', 'TRAV6-2*01', 'TRAV8-1*01', 'TRAV13D-1*03',
        'TRAV13-2*01', 'TRAV6-7/DV9*08', 'TRAV12D-1*03', 'TRAV12-3*02',
        'TRAV7-6*01', 'TRAV4-2*02', 'TRAV1*01', 'TRAV5-1*01', 'TRAV8-1*02',
        'TRAV15D-1/DV6D-1*05', 'TRAV7-1*01', 'TRAV9-2*01', 'TRAV13-5*01',
        'TRAV6-6*03', 'TRAV12-1*02', 'TRAV12D-3*01', 'TRAV6-4*02',
        'TRAV6-1*02', 'TRAV20*01', 'TRAV14D-3/DV8*05', 'TRAV18*01',
        'TRAV7D-2*03', 'TRAV16*04', 'TRAV7-5*02', 'TRAV4D-2*01',
        'TRAV13D-2*02', 'TRAV5D-2*01', 'TRAV5-2*01', 'TRAV16*03'
    ])
Exemple #4
0
def test_ng_tcrs_DOnly():
    ts = TCRsubset(clone_df=clone_df_subset_d,
                   organism="human",
                   epitopes=["X"],
                   epitope="X",
                   chains=["delta"],
                   dist_d=dist_d_subset)

    tcr_motif_delta_input = pd.DataFrame({
        'subject': {
            53: 'SRR5130260.1',
            54: 'SRR5130260.1',
            55: 'SRR5130260.1',
            56: 'SRR5130260.1',
            57: 'SRR5130260.1',
            58: 'SRR5130260.1',
            59: 'SRR5130260.1',
            60: 'SRR5130260.1',
            61: 'SRR5130260.1',
            62: 'SRR5130260.1',
            63: 'SRR5130260.1',
            64: 'SRR5130260.1',
            65: 'SRR5130260.1'
        },
        'epitope': {
            53: 'X',
            54: 'X',
            55: 'X',
            56: 'X',
            57: 'X',
            58: 'X',
            59: 'X',
            60: 'X',
            61: 'X',
            62: 'X',
            63: 'X',
            64: 'X',
            65: 'X'
        },
        'va_rep': {
            53: 'TRGV1*01',
            54: 'TRGV1*01',
            55: 'TRGV1*01',
            56: 'TRGV1*01',
            57: 'TRGV1*01',
            58: 'TRGV1*01',
            59: 'TRGV1*01',
            60: 'TRGV1*01',
            61: 'TRGV1*01',
            62: 'TRGV1*01',
            63: 'TRGV1*01',
            64: 'TRGV1*01',
            65: 'TRGV1*01'
        },
        'ja_rep': {
            53: 'TRGJ1*01',
            54: 'TRGJ1*01',
            55: 'TRGJ1*01',
            56: 'TRGJ1*01',
            57: 'TRGJ1*01',
            58: 'TRGJ1*01',
            59: 'TRGJ1*01',
            60: 'TRGJ1*01',
            61: 'TRGJ1*01',
            62: 'TRGJ1*01',
            63: 'TRGJ1*01',
            64: 'TRGJ1*01',
            65: 'TRGJ1*01'
        },
        'vb_rep': {
            53: 'TRDV2*01',
            54: 'TRDV2*01',
            55: 'TRDV2*01',
            56: 'TRDV2*01',
            57: 'TRDV2*01',
            58: 'TRDV2*01',
            59: 'TRDV2*01',
            60: 'TRDV2*01',
            61: 'TRDV2*01',
            62: 'TRDV2*01',
            63: 'TRDV2*01',
            64: 'TRDV2*01',
            65: 'TRDV2*01'
        },
        'jb_rep': {
            53: 'TRDJ1*01',
            54: 'TRDJ1*01',
            55: 'TRDJ1*01',
            56: 'TRDJ1*01',
            57: 'TRDJ2*01',
            58: 'TRDJ2*01',
            59: 'TRDJ2*01',
            60: 'TRDJ2*01',
            61: 'TRDJ2*01',
            62: 'TRDJ2*01',
            63: 'TRDJ2*01',
            64: 'TRDJ2*01',
            65: 'TRDJ2*01'
        },
        'cdr3a': {
            53: 'CATWAKNYYKKLF',
            54: 'CATWAKNYYKKLF',
            55: 'CATWAKNYYKKLF',
            56: 'CATWAKNYYKKLF',
            57: 'CATWAKNYYKKLF',
            58: 'CATWAKNYYKKLF',
            59: 'CATWAKNYYKKLF',
            60: 'CATWAKNYYKKLF',
            61: 'CATWAKNYYKKLF',
            62: 'CATWAKNYYKKLF',
            63: 'CATWAKNYYKKLF',
            64: 'CATWAKNYYKKLF',
            65: 'CATWAKNYYKKLF'
        },
        'cdr3b': {
            53: 'CACHRGTDTDKLIF',
            54: 'CACDKNGGYVRYTDKLIF',
            55: 'CACDTVGIPDKLIF',
            56: 'CACVRLPLRGRPYTDKLIF',
            57: 'CACDNWGALTAQLFF',
            58: 'CACDTILGDITLTAQLFF',
            59: 'CACDTGRGTLTAQLFF',
            60: 'CACDTWGMTAQLFF',
            61: 'CACDTGGALTAQLFF',
            62: 'CACDIRDTRVLTAQLFF',
            63: 'CACDIVLGDPSLTAQLFF',
            64: 'CACDHLLGDTAQLFF',
            65: 'CACDPVTGGSLTAQLFF'
        }
    })
    assert np.all(ts.tcr_motif_clones_df(gdmode=True) == tcr_motif_delta_input)

    tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(gdmode=True),
                  organism=ts.organism,
                  chains=ts.chains,
                  epitopes=ts.epitopes,
                  db_file="gammadelta_db.tsv")

    ng_tcrs = dict()
    for chain in tm.chains:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    # Notice that 'B' represents 'delta'
    # Notice that 'A' represents 'gamma'

    ng_tcrs = dict()
    for chain in tm.chains:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    assert set(ng_tcrs['B'].keys()) == set([
        'TRDV2*01', 'TRDV3*01', 'TRDV1*01', 'TRAV38-2/DV8*01', 'TRAV22*01',
        'TRAV29/DV5*01', 'TRAV41*01', 'TRAV39*01', 'TRAV14/DV4*02',
        'TRAV40*01', 'TRAV23/DV6*01', 'TRAV34*01', 'TRAV38-1*01',
        'TRAV26-2*01', 'TRAV26-1*01', 'TRAV19*01', 'TRAV35*01', 'TRAV17*01',
        'TRAV20*01', 'TRAV21*01', 'TRAV36/DV7*01', 'TRAV14/DV4*01',
        'TRAV9-2*01', 'TRAV24*01', 'TRAV30*01', 'TRAV38-1*03', 'TRAV38-1*02',
        'TRAV36/DV7*02', 'TRAV12-3*01', 'TRAV27*01', 'TRAV8-3*01', 'TRAV16*01',
        'TRAV13-1*01', 'TRAV30*03', 'TRAV8-4*01', 'TRAV8-4*06', 'TRAV8-2*01',
        'TRAV10*01', 'TRAV8-4*07', 'TRAV8-6*01', 'TRAV25*01', 'TRAV9-1*01',
        'TRAV36/DV7*03', 'TRAV6*01', 'TRAV13-2*01', 'TRAV8-7*01'
    ])