Beispiel #1
0
def test_ng_tcrs_AOnly():
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df = df,            
            organism = "mouse",
            epitopes = ["PA"] ,
            epitope = "PA",
            chains = ["A"],
            dist_a = da,
            dist_b = None)
    tm = TCRMotif(  clones_df = ts.tcr_motif_clones_df(), 
                    organism = ts.organism, 
                    chains = ts.chains, 
                    epitopes = ts.epitopes,
                    db_file = "alphabeta_db.tsv")
    ng_tcrs = dict()
    # Default Behavior
    for chain in tm.chains: #['A','B']:
        next_gen_ref = tm.generate_background_set(chain = chain,
                            ng_log_path = paths.path_to_current_db_files(db_file = tm.db_file),
                            ng_log_file = 'new_nextgen_chains_{}_{}.tsv'.format(tm.organism,chain) )
        ng_tcrs[chain] = next_gen_ref

    assert isinstance(ng_tcrs['A'], dict) #== set(['TRGV2*01', 'TRGV5P*01', 'TRGV5P*02', 'TRGV8*01', 'TRGV4*01', 'TRGV11*01', 'TRGV10*01', 'TRGV1*01', 'TRGV3*01', 'TRGV5*01', 'TRGV9*02', 'TRGV9*01'])   
    assert set(ng_tcrs['A'].keys()) == set(['TRAV14D-3/DV8*02', 'TRAV14-1*02', 'TRAV14-2*01', 'TRAV14D-1*01', 'TRAV14D-2*01', 'TRAV14D-2*02', 'TRAV14-3*01', 'TRAV14-3*02', 'TRAV14D-3/DV8*01', 'TRAV6-7/DV9*02', 'TRAV6-6*01', 'TRAV6-7/DV9*01', 'TRAV6-7/DV9*04', 'TRAV6D-6*01', 'TRAV6-5*01', 'TRAV6D-7*01', 'TRAV13-1*01', 'TRAV13D-4*01', 'TRAV13-3*01', 'TRAV10*01', 'TRAV2*01', 'TRAV6-4*01', 'TRAV3D-3*02', 'TRAV3-1*01', 'TRAV3-4*01', 'TRAV4D-4*03', 'TRAV4D-3*01', 'TRAV4D-4*01', 'TRAV4-3*01', 'TRAV4-2*01', 'TRAV4-4/DV10*01', 'TRAV4D-3*03', 'TRAV5-4*01', 'TRAV5D-4*02', 'TRAV3-3*01', 'TRAV12-3*04', 'TRAV4D-4*04', 'TRAV13-4/DV7*03', 'TRAV16*01', 'TRAV9N-3*01', 'TRAV9N-2*01', 'TRAV9D-4*03', 'TRAV9D-3*01', 'TRAV9-1*01', 'TRAV9D-2*01', 'TRAV9D-4*04', 'TRAV6-1*01', 'TRAV14-1*01', 'TRAV7-2*01', 'TRAV12D-1*01', 'TRAV12-2*01', 'TRAV12N-3*01', 'TRAV16D/DV11*03', 'TRAV12-1*01', 'TRAV12-3*01', 'TRAV12D-3*02', 'TRAV12D-2*01', 'TRAV8-2*01', 'TRAV13D-2*01', 'TRAV13-4/DV7*02', 'TRAV19*01', 'TRAV21/DV12*01', 'TRAV7-5*01', 'TRAV7-5*03', 'TRAV11*01', 'TRAV11N*01', 'TRAV11*02', 'TRAV12D-1*05', 'TRAV4D-4*02', 'TRAV7-6*02', 'TRAV7-3*01', 'TRAV6D-6*03', 'TRAV7-3*02', 'TRAV15D-2/DV6D-2*04', 'TRAV15-2/DV6-2*02', 'TRAV15-2/DV6-2*01', 'TRAV15D-2/DV6D-2*01', 'TRAV15D-2/DV6D-2*02', 'TRAV15D-1/DV6D-1*04', 'TRAV15-1/DV6-1*01', 'TRAV17*02', 'TRAV17*01', 'TRAV12-1*05', 'TRAV7N-5*01', 'TRAV15D-1/DV6D-1*01', 'TRAV7-4*01', 'TRAV12D-2*02', 'TRAV9-4*01', 'TRAV9-3*01', 'TRAV6-2*01', 'TRAV8-1*01', 'TRAV13D-1*03', 'TRAV13-2*01', 'TRAV6-7/DV9*08', 'TRAV12D-1*03', 'TRAV12-3*02', 'TRAV7-6*01', 'TRAV4-2*02', 'TRAV1*01', 'TRAV5-1*01', 'TRAV8-1*02', 'TRAV15D-1/DV6D-1*05', 'TRAV7-1*01', 'TRAV9-2*01', 'TRAV13-5*01', 'TRAV6-6*03', 'TRAV12-1*02', 'TRAV12D-3*01', 'TRAV6-4*02', 'TRAV6-1*02', 'TRAV20*01', 'TRAV14D-3/DV8*05', 'TRAV18*01', 'TRAV7D-2*03', 'TRAV16*04', 'TRAV7-5*02', 'TRAV4D-2*01', 'TRAV13D-2*02', 'TRAV5D-2*01', 'TRAV5-2*01', 'TRAV16*03']) 
Beispiel #2
0
def test_ng_tcrs_BOnly():
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df = df,            
            organism = "mouse",
            epitopes = ["PA"] ,
            epitope = "PA",
            chains = ["B"],
            dist_a = da,
            dist_b = db)
    tm = TCRMotif(  clones_df = ts.tcr_motif_clones_df(), 
                    organism = ts.organism, 
                    chains = ts.chains, 
                    epitopes = ts.epitopes,
                    db_file = "alphabeta_db.tsv")
    ng_tcrs = dict()
    # Default Behavior
    for chain in tm.chains: #['A','B']:
        next_gen_ref = tm.generate_background_set(chain = chain,
                            ng_log_path = paths.path_to_current_db_files(db_file = tm.db_file),
                            ng_log_file = 'new_nextgen_chains_{}_{}.tsv'.format(tm.organism,chain) )
        ng_tcrs[chain] = next_gen_ref

    assert isinstance(ng_tcrs['B'], dict) 
    assert set(ng_tcrs['B'].keys()) == set(['TRBV29*01', 'TRBV16*01', 'TRBV13-2*01', 'TRBV13-1*01', 'TRBV3*01', 'TRBV3*02', 'TRBV17*01', 'TRBV15*01', 'TRBV19*01', 'TRBV19*03', 'TRBV4*01', 'TRBV4*02', 'TRBV23*01', 'TRBV13-2*05', 'TRBV13-3*01', 'TRBV12-1*01', 'TRBV12-2*01', 'TRBV2*01', 'TRBV21*01', 'TRBV5*01', 'TRBV8*01', 'TRBV1*01', 'TRBV1*02', 'TRBV26*02', 'TRBV24*01', 'TRBV26*01', 'TRBV14*01', 'TRBV24*02', 'TRBV31*01', 'TRBV5*05', 'TRBV9*01', 'TRBV20*01', 'TRBV30*01'])  
Beispiel #3
0
def test_ng_tcrs_DOnly():
    ts = TCRsubset(clone_df = clone_df_subset_d,            
            organism = "human",
            epitopes = ["X"] ,
            epitope = "X",
            chains = ["delta"],
            dist_d = dist_d_subset)

    tcr_motif_delta_input = pd.DataFrame({'subject': {53: 'SRR5130260.1',  54: 'SRR5130260.1',  55: 'SRR5130260.1',  56: 'SRR5130260.1',  57: 'SRR5130260.1',  58: 'SRR5130260.1',  59: 'SRR5130260.1',  60: 'SRR5130260.1',  61: 'SRR5130260.1',  62: 'SRR5130260.1',  63: 'SRR5130260.1',  64: 'SRR5130260.1',  65: 'SRR5130260.1'}, 'epitope': {53: 'X',  54: 'X',  55: 'X',  56: 'X',  57: 'X',  58: 'X',  59: 'X',  60: 'X',  61: 'X',  62: 'X',  63: 'X',  64: 'X',  65: 'X'}, 'va_rep': {53: 'TRGV1*01',  54: 'TRGV1*01',  55: 'TRGV1*01',  56: 'TRGV1*01',  57: 'TRGV1*01',  58: 'TRGV1*01',  59: 'TRGV1*01',  60: 'TRGV1*01',  61: 'TRGV1*01',  62: 'TRGV1*01',  63: 'TRGV1*01',  64: 'TRGV1*01',  65: 'TRGV1*01'}, 'ja_rep': {53: 'TRGJ1*01',  54: 'TRGJ1*01',  55: 'TRGJ1*01',  56: 'TRGJ1*01',  57: 'TRGJ1*01',  58: 'TRGJ1*01',  59: 'TRGJ1*01',  60: 'TRGJ1*01',  61: 'TRGJ1*01',  62: 'TRGJ1*01',  63: 'TRGJ1*01',  64: 'TRGJ1*01',  65: 'TRGJ1*01'}, 'vb_rep': {53: 'TRDV2*01',  54: 'TRDV2*01',  55: 'TRDV2*01',  56: 'TRDV2*01',  57: 'TRDV2*01',  58: 'TRDV2*01',  59: 'TRDV2*01',  60: 'TRDV2*01',  61: 'TRDV2*01',  62: 'TRDV2*01',  63: 'TRDV2*01',  64: 'TRDV2*01',  65: 'TRDV2*01'}, 'jb_rep': {53: 'TRDJ1*01',  54: 'TRDJ1*01',  55: 'TRDJ1*01',  56: 'TRDJ1*01',  57: 'TRDJ2*01',  58: 'TRDJ2*01',  59: 'TRDJ2*01',  60: 'TRDJ2*01',  61: 'TRDJ2*01',  62: 'TRDJ2*01',  63: 'TRDJ2*01',  64: 'TRDJ2*01',  65: 'TRDJ2*01'}, 'cdr3a': {53: 'CATWAKNYYKKLF',  54: 'CATWAKNYYKKLF',  55: 'CATWAKNYYKKLF',  56: 'CATWAKNYYKKLF',  57: 'CATWAKNYYKKLF',  58: 'CATWAKNYYKKLF',  59: 'CATWAKNYYKKLF',  60: 'CATWAKNYYKKLF',  61: 'CATWAKNYYKKLF',  62: 'CATWAKNYYKKLF',  63: 'CATWAKNYYKKLF',  64: 'CATWAKNYYKKLF',  65: 'CATWAKNYYKKLF'}, 'cdr3b': {53: 'CACHRGTDTDKLIF',  54: 'CACDKNGGYVRYTDKLIF',  55: 'CACDTVGIPDKLIF',  56: 'CACVRLPLRGRPYTDKLIF',  57: 'CACDNWGALTAQLFF',  58: 'CACDTILGDITLTAQLFF',  59: 'CACDTGRGTLTAQLFF',  60: 'CACDTWGMTAQLFF',  61: 'CACDTGGALTAQLFF',  62: 'CACDIRDTRVLTAQLFF',  63: 'CACDIVLGDPSLTAQLFF',  64: 'CACDHLLGDTAQLFF',  65: 'CACDPVTGGSLTAQLFF'}})
    assert np.all( ts.tcr_motif_clones_df(gdmode=True)   == tcr_motif_delta_input)

    tm = TCRMotif(  clones_df = ts.tcr_motif_clones_df(gdmode = True), 
                    organism = ts.organism, 
                    chains = ts.chains, 
                    epitopes = ts.epitopes,
                    db_file = "gammadelta_db.tsv")
    
    ng_tcrs = dict()
    for chain in tm.chains:
        next_gen_ref = tm.generate_background_set(chain = chain,
                            ng_log_path = paths.path_to_current_db_files(db_file = tm.db_file),
                            ng_log_file = 'new_nextgen_chains_{}_{}.tsv'.format(tm.organism,chain) )
        ng_tcrs[chain] = next_gen_ref
    
    # Notice that 'B' represents 'delta'
    # Notice that 'A' represents 'gamma'

    ng_tcrs = dict()
    for chain in tm.chains:
        next_gen_ref = tm.generate_background_set(chain = chain,
                            ng_log_path = paths.path_to_current_db_files(db_file = tm.db_file),
                            ng_log_file = 'new_nextgen_chains_{}_{}.tsv'.format(tm.organism,chain) )
        ng_tcrs[chain] = next_gen_ref

    assert set(ng_tcrs['B'].keys()) == set(['TRDV2*01', 'TRDV3*01', 'TRDV1*01', 'TRAV38-2/DV8*01', 'TRAV22*01', 'TRAV29/DV5*01', 'TRAV41*01', 'TRAV39*01', 'TRAV14/DV4*02', 'TRAV40*01', 'TRAV23/DV6*01', 'TRAV34*01', 'TRAV38-1*01', 'TRAV26-2*01', 'TRAV26-1*01', 'TRAV19*01', 'TRAV35*01', 'TRAV17*01', 'TRAV20*01', 'TRAV21*01', 'TRAV36/DV7*01', 'TRAV14/DV4*01', 'TRAV9-2*01', 'TRAV24*01', 'TRAV30*01', 'TRAV38-1*03', 'TRAV38-1*02', 'TRAV36/DV7*02', 'TRAV12-3*01', 'TRAV27*01', 'TRAV8-3*01', 'TRAV16*01', 'TRAV13-1*01', 'TRAV30*03', 'TRAV8-4*01', 'TRAV8-4*06', 'TRAV8-2*01', 'TRAV10*01', 'TRAV8-4*07', 'TRAV8-6*01', 'TRAV25*01', 'TRAV9-1*01', 'TRAV36/DV7*03', 'TRAV6*01', 'TRAV13-2*01', 'TRAV8-7*01'])  
Beispiel #4
0
def test_initialization_of_TCRsubset_alpha_beta_case_plus_motif_finding():
    """
    Test that we can create a TCRsubset from paired input files
    """
    import pytest
    import pandas as pd
    from tcrregex.subset import TCRsubset
    from tcrregex.tests.my_test_subset import dist_a_subset, dist_b_subset, clone_df_subset 
    from tcrregex.cdr3_motif import TCRMotif

    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts=TCRsubset(clone_df = df,            
            organism = "mouse",
            epitopes = ["PA"] ,
            epitope = "PA",
            chains = ["A","B"],
            dist_a = da,
            dist_b = db)
    motif_df = ts.find_motif()
    assert isinstance(motif_df, pd.DataFrame)
    assert isinstance(ts.motif_df, pd.DataFrame)
Beispiel #5
0
def test_initialization_of_TCRsubset_beta_only_find_motifs():
    """
    Test that we can create a TCRsubset from paired input files
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset[['clone_id', 'subject', 'epitope', 
                        'v_b_gene', 'j_b_gene', 
                        'cdr3_b_aa', 'cdr1_b_aa', 'cdr2_b_aa', 
                        'pmhc_b_aa', 'cdr3_b_nucseq', 'count',
                        'vb_countreps', 'jb_countreps','vb_gene', 'jb_gene']]
    df = df.iloc[0:20, :]
    
    db = dist_b_subset.iloc[0:20, 0:20]

    ts=TCRsubset(clone_df = df,            
                organism = "mouse",
                epitopes = ["PA"] ,
                epitope = "PA",
                chains = ["beta"],
                dist_b = db)

    motif_df = ts.find_motif()
    assert isinstance(motif_df, pd.DataFrame)
    assert isinstance(ts.motif_df, pd.DataFrame)
Beispiel #6
0
def test_initialization_of_TCRsubset_alpha_beta_case():
    """
    Test that we can create a TCRsubset from paired input files
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    TCRsubset(clone_df = clone_df_subset,            
            organism = "mouse",
            epitopes = ["PA"] ,
            epitope = "PA",
            chains = ["A","B"],
            dist_a = dist_a_subset,
            dist_b = dist_b_subset)
Beispiel #7
0
def test_initialization_of_TCRsubset_alpha_case():
    """
    Test that we can create a TCRsubset from paired input files
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset[['clone_id', 'subject', 'epitope', 
                        'v_a_gene', 'j_a_gene', 
                        'cdr3_a_aa', 'cdr1_a_aa', 'cdr2_a_aa', 
                        'pmhc_a_aa', 'cdr3_a_nucseq', 'count',
                        'va_countreps', 'ja_countreps','va_gene', 'ja_gene']]
    ts = TCRsubset(  clone_df = df,            
                organism = "mouse",
                epitopes = ["PA"] ,
                epitope = "PA",
                chains = ["alpha"],
                dist_a = dist_a_subset)

    assert(isinstance(ts.clone_df.vb_gene, pd.Series))
    assert(ts.clone_df.vb_gene.iloc[0] == "TRBV1*01")
Beispiel #8
0
def test_initialization_of_TCRsubset_beta_case():
    """
    Test that we can create a TCRsubset from just beta chain files.
    For this to work, fake alpha sequences will have to be created:
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset[['clone_id', 'subject', 'epitope', 
                        'v_b_gene', 'j_b_gene', 
                        'cdr3_b_aa', 'cdr1_b_aa', 'cdr2_b_aa', 
                        'pmhc_b_aa', 'cdr3_b_nucseq', 'count',
                        'vb_countreps', 'jb_countreps','vb_gene', 'jb_gene']]
    
    ts = TCRsubset(  clone_df = df,            
                organism = "mouse",
                epitopes = ["PA"] ,
                epitope = "PA",
                chains = ["beta"],
                dist_b = dist_b_subset)
    
    assert(isinstance(ts.clone_df.va_gene, pd.Series))
    assert(ts.clone_df.va_gene.iloc[0] == "TRAV10*01")
Beispiel #9
0
def test_non_default_manual_generation_of_ng_tcrsAB():
    """ Notices the usage of non default mode and specification of files"""
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts=TCRsubset(clone_df = df,            
            organism = "mouse",
            epitopes = ["PA"] ,
            epitope = "PA",
            chains = ["A","B"],
            dist_a = da,
            dist_b = db,
            default_mode = False)
    ts.ng_tcrs['B'] = ts.generate_background_set(chain = ['B'],ng_log_path = 'tcrregex/db/alphabeta_db_tsv_files',ng_log_file = 'new_nextgen_chains_mouse_B.tsv')
    ts.ng_tcrs['A'] = ts.generate_background_set(chain = ['A'],ng_log_path = 'tcrregex/db/alphabeta_db_tsv_files',ng_log_file = 'new_nextgen_chains_mouse_A.tsv')    
    
    motif_df = ts.find_motif()
    assert isinstance(motif_df, pd.DataFrame)
    assert isinstance(ts.motif_df, pd.DataFrame)
    assert motif_df.shape[0] > 1