def test_TCRcodon_single_example2(): tc = TCRcodon(organism="mouse", db_file="alphabeta_db.tsv") r = tc.guess_reverse_translation(v_gene_name= 'TRBV29*01' , \ j_gene_name= 'TRBJ2-2*01' , cdr3_aa = 'CASSPTGQLYF') # Only the edges are gauranteed to match the real seq shown below, as insertion codons a # unkown and degenerate assert r[0:10] == 'tgtgctagcagccccaccgggcagctctacttt'.upper()[0:10] assert r[-10:-1] == 'tgtgctagcagccccaccgggcagctctacttt'.upper()[-10:-1] assert r == 'TGTGCTAGCAGTCCTACCGGGCAGCTCTACTTT'
def test_TCRcodon_small_dataframe_beta(): tc = TCRcodon(organism="mouse", db_file="alphabeta_db.tsv") df = clone_df_subset[[ 'v_b_gene', 'j_b_gene', 'cdr3_b_aa', 'cdr3_b_nucseq' ]] syn_nucs = df.apply(lambda r: \ tc.guess_reverse_translation(r['v_b_gene'], r['j_b_gene'], r['cdr3_b_aa'], verbose = False), axis = 1) len_syn = [len(x) for x in syn_nucs] len_real = [len(x) for x in df['cdr3_b_nucseq']] assert np.all(len_syn == len_real)
def test_TCRcodon_smal_dataframe_alpha_beta_lots(): """Bigger Example """ tc = TCRcodon(organism="mouse", db_file="alphabeta_db.tsv") df = pd.read_csv("tcrdist/test_files_compact/dash.csv") syn_nucs = df.apply(lambda r: \ tc.guess_reverse_translation( \ r['v_b_gene'], r['j_b_gene'], r['cdr3_b_aa'],\ verbose = False), axis = 1) len_syn = [len(x) for x in syn_nucs] len_real = [len(x) for x in df['cdr3_b_nucseq']] assert np.all(len_syn == len_real) syn_nucs = df.apply(lambda r: \ tc.guess_reverse_translation(\ r['v_a_gene'], r['j_a_gene'], r['cdr3_a_aa'],\ verbose = False), axis = 1) # Check that synthestic and real seqs are same length len_syn = [len(x) for x in syn_nucs] len_real = [len(x) for x in df['cdr3_a_nucseq']] assert np.all(len_syn == len_real)
def test_TCRcodon_smal_dataframe_delta_lots(): tc = TCRcodon(organism="human", db_file="gammadelta_db.tsv") df = pd.read_csv("tcrdist/test_files_compact/sant.csv") # Sant data Doesn't provide J gene so we are handicapped in that regard, for testing we just guess on df['j_g_gene'] = 'TRGJ1*01' syn_nucs = df.apply(lambda r: \ tc.guess_reverse_translation(\ r['v_g_gene'], r['j_g_gene'], r['cdr3_g_aa'],\ verbose = False), axis = 1) # Check that synthestic and real seqs are same length len_syn = [len(x) for x in syn_nucs] len_real = [3 * len(x) for x in df['cdr3_g_aa']] assert np.all(len_syn == len_real)
def test_TCRcodon_smal_dataframe_gama_lots(): tc = TCRcodon(organism="human", db_file="gammadelta_db.tsv") df = pd.read_csv("tcrdist/test_files_compact/sant.csv") df['j_d_gene'] = [ tc.get_best_j_gene(aa_seq=x, verbose=False) for x in df['cdr3_d_aa'] ] df = df[df['v_d_gene'].notna()].copy() syn_nucs = df.apply(lambda r: \ tc.guess_reverse_translation(\ r['v_d_gene'], r['j_d_gene'], r['cdr3_d_aa'],\ verbose = False), axis = 1) # Check that synthestic and real seqs are same length len_syn = [len(x) for x in syn_nucs] len_real = [3 * len(x) for x in df['cdr3_d_aa']] assert np.all(len_syn == len_real) assert np.all(len_syn == len_real)
def test_TCRcodon_single_example(): tc = TCRcodon(organism="mouse", db_file="alphabeta_db.tsv") r = tc.guess_reverse_translation(v_gene_name='TRBV29*01', j_gene_name='TRBJ1-5*01', cdr3_aa='CASSEGEAPLF') assert r == 'TGTGCTAGCAGTGAGGGAGAGGCTCCGCTTTTT'