def test_initialization_of_TCRsubset_beta_only_find_motifs(): """ Test that we can create a TCRsubset from paired input files """ assert isinstance(dist_a_subset, pd.DataFrame) assert isinstance(dist_b_subset, pd.DataFrame) assert isinstance(clone_df_subset, pd.DataFrame) df = clone_df_subset[[ 'clone_id', 'subject', 'epitope', 'v_b_gene', 'j_b_gene', 'cdr3_b_aa', 'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq', 'count', 'vb_countreps', 'jb_countreps', 'vb_gene', 'jb_gene' ]] df = df.iloc[0:20, :] db = dist_b_subset.iloc[0:20, 0:20] ts = TCRsubset(clone_df=df, organism="mouse", epitopes=["PA"], epitope="PA", chains=["beta"], dist_b=db) motif_df = ts.find_motif() assert isinstance(motif_df, pd.DataFrame) assert isinstance(ts.motif_df, pd.DataFrame)
def test_initialization_of_TCRsubset_alpha_beta_case_plus_motif_finding(): """ Test that we can create a TCRsubset from paired input files """ import pytest import pandas as pd from tcrdist.subset import TCRsubset from tcrdist.tests.my_test_subset import dist_a_subset, dist_b_subset, clone_df_subset from tcrdist.cdr3_motif import TCRMotif assert isinstance(dist_a_subset, pd.DataFrame) assert isinstance(dist_b_subset, pd.DataFrame) assert isinstance(clone_df_subset, pd.DataFrame) df = clone_df_subset.iloc[0:20, :].copy() db = dist_b_subset.iloc[0:20, 0:20] da = dist_a_subset.iloc[0:20, 0:20] ts = TCRsubset(clone_df=df, organism="mouse", epitopes=["PA"], epitope="PA", chains=["A", "B"], dist_a=da, dist_b=db) motif_df = ts.find_motif() assert isinstance(motif_df, pd.DataFrame) assert isinstance(ts.motif_df, pd.DataFrame)
def test_non_default_manual_generation_of_ng_tcrsAB(): """ Notices the usage of non default mode and specification of files""" df = clone_df_subset.iloc[0:20, :].copy() db = dist_b_subset.iloc[0:20, 0:20] da = dist_a_subset.iloc[0:20, 0:20] ts = TCRsubset(clone_df=df, organism="mouse", epitopes=["PA"], epitope="PA", chains=["A", "B"], dist_a=da, dist_b=db, default_mode=False) ts.ng_tcrs['B'] = ts.generate_background_set( chain=['B'], ng_log_path='tcrdist/db/alphabeta_db.tsv_files', ng_log_file='new_nextgen_chains_mouse_B.tsv') ts.ng_tcrs['A'] = ts.generate_background_set( chain=['A'], ng_log_path='tcrdist/db/alphabeta_db.tsv_files', ng_log_file='new_nextgen_chains_mouse_A.tsv') motif_df = ts.find_motif() assert isinstance(motif_df, pd.DataFrame) assert isinstance(ts.motif_df, pd.DataFrame) assert motif_df.shape[0] > 1
def test_initialization_of_TCRsubset_alpha_beta_case(): """ Test that we can create a TCRsubset from paired input files """ assert isinstance(dist_a_subset, pd.DataFrame) assert isinstance(dist_b_subset, pd.DataFrame) assert isinstance(clone_df_subset, pd.DataFrame) TCRsubset(clone_df=clone_df_subset, organism="mouse", epitopes=["PA"], epitope="PA", chains=["A", "B"], dist_a=dist_a_subset, dist_b=dist_b_subset)
def test_ng_tcrs_BOnly(): df = clone_df_subset.iloc[0:20, :].copy() db = dist_b_subset.iloc[0:20, 0:20] da = dist_a_subset.iloc[0:20, 0:20] ts = TCRsubset(clone_df=df, organism="mouse", epitopes=["PA"], epitope="PA", chains=["B"], dist_a=da, dist_b=db) tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(), organism=ts.organism, chains=ts.chains, epitopes=ts.epitopes, db_file="alphabeta_db.tsv") ng_tcrs = dict() # Default Behavior for chain in tm.chains: #['A','B']: next_gen_ref = tm.generate_background_set( chain=chain, ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file), ng_log_file='new_nextgen_chains_{}_{}.tsv'.format( tm.organism, chain)) ng_tcrs[chain] = next_gen_ref assert isinstance(ng_tcrs['B'], dict) assert set(ng_tcrs['B'].keys()) == set([ 'TRBV29*01', 'TRBV16*01', 'TRBV13-2*01', 'TRBV13-1*01', 'TRBV3*01', 'TRBV3*02', 'TRBV17*01', 'TRBV15*01', 'TRBV19*01', 'TRBV19*03', 'TRBV4*01', 'TRBV4*02', 'TRBV23*01', 'TRBV13-2*05', 'TRBV13-3*01', 'TRBV12-1*01', 'TRBV12-2*01', 'TRBV2*01', 'TRBV21*01', 'TRBV5*01', 'TRBV8*01', 'TRBV1*01', 'TRBV1*02', 'TRBV26*02', 'TRBV24*01', 'TRBV26*01', 'TRBV14*01', 'TRBV24*02', 'TRBV31*01', 'TRBV5*05', 'TRBV9*01', 'TRBV20*01', 'TRBV30*01' ])
def test_initialization_of_TCRsubset_alpha_case(): """ Test that we can create a TCRsubset from paired input files """ assert isinstance(dist_a_subset, pd.DataFrame) assert isinstance(dist_b_subset, pd.DataFrame) assert isinstance(clone_df_subset, pd.DataFrame) df = clone_df_subset[[ 'clone_id', 'subject', 'epitope', 'v_a_gene', 'j_a_gene', 'cdr3_a_aa', 'cdr1_a_aa', 'cdr2_a_aa', 'pmhc_a_aa', 'cdr3_a_nucseq', 'count', 'va_countreps', 'ja_countreps', 'va_gene', 'ja_gene' ]] ts = TCRsubset(clone_df=df, organism="mouse", epitopes=["PA"], epitope="PA", chains=["alpha"], dist_a=dist_a_subset) assert (isinstance(ts.clone_df.vb_gene, pd.Series)) assert (ts.clone_df.vb_gene.iloc[0] == "TRBV1*01")
def test_initialization_of_TCRsubset_beta_case(): """ Test that we can create a TCRsubset from just beta chain files. For this to work, fake alpha sequences will have to be created: """ assert isinstance(dist_a_subset, pd.DataFrame) assert isinstance(dist_b_subset, pd.DataFrame) assert isinstance(clone_df_subset, pd.DataFrame) df = clone_df_subset[[ 'clone_id', 'subject', 'epitope', 'v_b_gene', 'j_b_gene', 'cdr3_b_aa', 'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq', 'count', 'vb_countreps', 'jb_countreps', 'vb_gene', 'jb_gene' ]] ts = TCRsubset(clone_df=df, organism="mouse", epitopes=["PA"], epitope="PA", chains=["beta"], dist_b=dist_b_subset) assert (isinstance(ts.clone_df.va_gene, pd.Series)) assert (ts.clone_df.va_gene.iloc[0] == "TRAV10*01")
def test_CompleteExample_with_TCRMotif_Invoked_From_within_TCRsubset(): import pandas as pd import numpy as np import tcrdist as td #import IPython from tcrdist import mappers from tcrdist.repertoire import TCRrep from tcrdist.cdr3_motif import TCRMotif from tcrdist.subset import TCRsubset from tcrdist.storage import StoreIOMotif, StoreIOEntropy from tcrdist.plotting import plot_pwm tcrdist_clone_fn = 'tcrdist/test_files/mouse_pairseqs_v1_parsed_seqs_probs_mq20_clones.tsv' tcrdist_clone_df = pd.read_csv(tcrdist_clone_fn, sep="\t") #1 ind = (tcrdist_clone_df.epitope == "PA") | (tcrdist_clone_df.epitope == "F2") tcrdist_clone_df = tcrdist_clone_df[ind].copy() mapping = mappers.tcrdist_clone_df_to_tcrdist2_mapping #3 tcrdist2_df = mappers.generic_pandas_mapper( df=tcrdist_clone_df, #4 mapping=mapping) #1 tr = TCRrep(cell_df=tcrdist2_df, organism="mouse") #2 tr.infer_cdrs_from_v_gene(chain='alpha', imgt_aligned=True) tr.infer_cdrs_from_v_gene(chain='beta', imgt_aligned=True) #3 tr.index_cols = [ 'clone_id', 'subject', 'epitope', 'v_a_gene', 'j_a_gene', 'v_b_gene', 'j_b_gene', 'cdr3_a_aa', 'cdr3_b_aa', 'cdr1_a_aa', 'cdr2_a_aa', 'pmhc_a_aa', 'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq', 'cdr3_a_nucseq', 'va_countreps', 'ja_countreps', 'vb_countreps', 'jb_countreps', 'va_gene', 'vb_gene', 'ja_gene', 'jb_gene' ] #4 tr.deduplicate() #5 tr._tcrdist_legacy_method_alpha_beta() #6 distA = tr.dist_a distB = tr.dist_b assert np.all(((distA + distB) - tr.paired_tcrdist) == 0) # 1 criteria = tr.clone_df.epitope == "PA" clone_df_subset = tr.clone_df[criteria] # 2 distA_subset = distA.loc[clone_df_subset.clone_id, clone_df_subset.clone_id].copy() distB_subset = distB.loc[clone_df_subset.clone_id, clone_df_subset.clone_id].copy() # 3 ts = TCRsubset(clone_df_subset, organism="mouse", epitopes=["PA"], epitope="PA", chains=["A", "B"], dist_a=distA_subset, dist_b=distB_subset) # ts.find_motif() cnames = [ "file_type", "count", "expect_random", "expect_nextgen", "chi_squared", "nfixed", "showmotif", "num", "othernum", "overlap", "ep", "ab", "nseqs", "v_rep_counts", "j_rep_counts" ] motif_fn = 'tcrdist/test_files/mouse_pairseqs_v1_parsed_seqs_probs_mq20_clones_cdr3_motifs_PA.log' x = open(motif_fn, "r").readlines() ts.motif_df = pd.DataFrame([l.split() for l in x], columns=cnames) i = 0 row = ts.motif_df.iloc[i, :].to_dict() motif_list = list() motif_logo = list() for i, row in ts.motif_df.iterrows(): StoreIOMotif_instance = ts.eval_motif(row) motif_list.append(StoreIOMotif_instance) motif_logo.append( plot_pwm(StoreIOMotif_instance, create_file=False, my_height=200, my_width=600)) if i > 1: break
def test_hot_start_example_in_full(): """ This is the code that makes up the HotStart example in the docs """ # basic imports import os import pandas as pd import numpy as np #import IPython # tcrdist classes from tcrdist.repertoire import TCRrep from tcrdist.subset import TCRsubset from tcrdist.cdr3_motif import TCRMotif from tcrdist.storage import StoreIOMotif, StoreIOEntropy # tcrdist functions from tcrdist import plotting from tcrdist.mappers import populate_legacy_fields # scipy functions for clustering from scipy.spatial import distance from scipy.cluster.hierarchy import linkage, dendrogram, fcluster # sklearn functions for low-dimensional embeddings from sklearn.manifold import TSNE, MDS # plotnine to allow grammar of graphics plotting akin to R's ggplot2 #import plotnine as gg #1 load data, subset to receptors recognizing "PA" epitope tcrdist2_df = pd.read_csv( os.path.join("tcrdist", "test_files_compact", "dash.csv")) tcrdist2_df = tcrdist2_df[tcrdist2_df.epitope == "PA"].copy() #2 create instance of TCRrep class, initializes input as tr.cell_df attribute tr = TCRrep(cell_df=tcrdist2_df, chains=['alpha', 'beta'], organism="mouse") #3 Infer CDR1,CDR2,CDR2.5 (a.k.a. phmc) from germline v-genes tr.infer_cdrs_from_v_gene(chain='alpha', imgt_aligned=True) tr.infer_cdrs_from_v_gene(chain='beta', imgt_aligned=True) #4 Define index columns for determining unique clones. tr.index_cols = [ 'clone_id', 'subject', 'epitope', 'v_a_gene', 'j_a_gene', 'v_b_gene', 'j_b_gene', 'cdr3_a_aa', 'cdr3_b_aa', 'cdr1_a_aa', 'cdr2_a_aa', 'pmhc_a_aa', 'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq', 'cdr3_a_nucseq' ] #4 Deduplicate based on index cols, creating tr.clone_df attribute tr.deduplicate() #5 calculate tcrdists by method in Dash et al. tr._tcrdist_legacy_method_alpha_beta() #6 Check that sum of alpah-chain and beta-chain distance matrices equal paired_tcrdist distA = tr.dist_a distB = tr.dist_b assert np.all(((distA + distB) - tr.paired_tcrdist) == 0) # Cluster from scipy.spatial import distance from scipy.cluster.hierarchy import linkage, dendrogram, fcluster compressed_dmat = distance.squareform(tr.paired_tcrdist, force="vector") Z = linkage(compressed_dmat, method="complete") den = dendrogram(Z, color_threshold=np.inf, no_plot=True) cluster_index = fcluster(Z, t=20, criterion="maxclust") assert len(cluster_index) == tr.clone_df.shape[0] assert len(cluster_index) == tr.paired_tcrdist.shape[0] tr.clone_df['cluster_index'] = cluster_index # Subset to Cluster 5 criteria = (cluster_index == 5) clone_df_subset = tr.clone_df[criteria] clone_df_subset = clone_df_subset[clone_df_subset.epitope == "PA"].copy() dist_a_subset = tr.dist_a.loc[clone_df_subset.clone_id, clone_df_subset.clone_id].copy() dist_b_subset = tr.dist_b.loc[clone_df_subset.clone_id, clone_df_subset.clone_id].copy() clone_df_subset = populate_legacy_fields(df=clone_df_subset, chains=['alpha', 'beta']) ts = TCRsubset(clone_df_subset, organism="mouse", epitopes=["PA"], epitope="PA", chains=["A", "B"], dist_a=dist_a_subset, dist_b=dist_b_subset) # Find Motifs if os.path.isfile( os.path.join("tcrdist", "test_files_compact", "dash_PA_cluster_5_motifs.csv")): ts.motif_df = pd.read_csv( os.path.join("tcrdist", "test_files_compact", "dash_PA_cluster_5_motifs.csv")) else: motif_df = ts.find_motif() # Save Motifs ts.motif_df.to_csv(os.path.join("tcrdist", "test_files_compact", "dash_PA_cluster_5_motifs.csv"), index=False) # Preprocess Motifs motif_list_a = list() motif_logos_a = list() for i, row in ts.motif_df[ts.motif_df.ab == "A"].iterrows(): StoreIOMotif_instance = ts.eval_motif(row) motif_list_a.append(StoreIOMotif_instance) motif_logos_a.append( plotting.plot_pwm(StoreIOMotif_instance, create_file=False, my_height=200, my_width=600)) motif_list_b = list() motif_logos_b = list() for i, row in ts.motif_df[ts.motif_df.ab == "B"].iterrows(): StoreIOMotif_instance = ts.eval_motif(row) motif_list_b.append(StoreIOMotif_instance) motif_logos_b.append( plotting.plot_pwm(StoreIOMotif_instance, create_file=False, my_height=200, my_width=600))
def test_ng_tcrs_AOnly(): df = clone_df_subset.iloc[0:20, :].copy() db = dist_b_subset.iloc[0:20, 0:20] da = dist_a_subset.iloc[0:20, 0:20] ts = TCRsubset(clone_df=df, organism="mouse", epitopes=["PA"], epitope="PA", chains=["A"], dist_a=da, dist_b=None) tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(), organism=ts.organism, chains=ts.chains, epitopes=ts.epitopes, db_file="alphabeta_db.tsv") ng_tcrs = dict() # Default Behavior for chain in tm.chains: #['A','B']: next_gen_ref = tm.generate_background_set( chain=chain, ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file), ng_log_file='new_nextgen_chains_{}_{}.tsv'.format( tm.organism, chain)) ng_tcrs[chain] = next_gen_ref assert isinstance( ng_tcrs['A'], dict ) #== set(['TRGV2*01', 'TRGV5P*01', 'TRGV5P*02', 'TRGV8*01', 'TRGV4*01', 'TRGV11*01', 'TRGV10*01', 'TRGV1*01', 'TRGV3*01', 'TRGV5*01', 'TRGV9*02', 'TRGV9*01']) assert set(ng_tcrs['A'].keys()) == set([ 'TRAV14D-3/DV8*02', 'TRAV14-1*02', 'TRAV14-2*01', 'TRAV14D-1*01', 'TRAV14D-2*01', 'TRAV14D-2*02', 'TRAV14-3*01', 'TRAV14-3*02', 'TRAV14D-3/DV8*01', 'TRAV6-7/DV9*02', 'TRAV6-6*01', 'TRAV6-7/DV9*01', 'TRAV6-7/DV9*04', 'TRAV6D-6*01', 'TRAV6-5*01', 'TRAV6D-7*01', 'TRAV13-1*01', 'TRAV13D-4*01', 'TRAV13-3*01', 'TRAV10*01', 'TRAV2*01', 'TRAV6-4*01', 'TRAV3D-3*02', 'TRAV3-1*01', 'TRAV3-4*01', 'TRAV4D-4*03', 'TRAV4D-3*01', 'TRAV4D-4*01', 'TRAV4-3*01', 'TRAV4-2*01', 'TRAV4-4/DV10*01', 'TRAV4D-3*03', 'TRAV5-4*01', 'TRAV5D-4*02', 'TRAV3-3*01', 'TRAV12-3*04', 'TRAV4D-4*04', 'TRAV13-4/DV7*03', 'TRAV16*01', 'TRAV9N-3*01', 'TRAV9N-2*01', 'TRAV9D-4*03', 'TRAV9D-3*01', 'TRAV9-1*01', 'TRAV9D-2*01', 'TRAV9D-4*04', 'TRAV6-1*01', 'TRAV14-1*01', 'TRAV7-2*01', 'TRAV12D-1*01', 'TRAV12-2*01', 'TRAV12N-3*01', 'TRAV16D/DV11*03', 'TRAV12-1*01', 'TRAV12-3*01', 'TRAV12D-3*02', 'TRAV12D-2*01', 'TRAV8-2*01', 'TRAV13D-2*01', 'TRAV13-4/DV7*02', 'TRAV19*01', 'TRAV21/DV12*01', 'TRAV7-5*01', 'TRAV7-5*03', 'TRAV11*01', 'TRAV11N*01', 'TRAV11*02', 'TRAV12D-1*05', 'TRAV4D-4*02', 'TRAV7-6*02', 'TRAV7-3*01', 'TRAV6D-6*03', 'TRAV7-3*02', 'TRAV15D-2/DV6D-2*04', 'TRAV15-2/DV6-2*02', 'TRAV15-2/DV6-2*01', 'TRAV15D-2/DV6D-2*01', 'TRAV15D-2/DV6D-2*02', 'TRAV15D-1/DV6D-1*04', 'TRAV15-1/DV6-1*01', 'TRAV17*02', 'TRAV17*01', 'TRAV12-1*05', 'TRAV7N-5*01', 'TRAV15D-1/DV6D-1*01', 'TRAV7-4*01', 'TRAV12D-2*02', 'TRAV9-4*01', 'TRAV9-3*01', 'TRAV6-2*01', 'TRAV8-1*01', 'TRAV13D-1*03', 'TRAV13-2*01', 'TRAV6-7/DV9*08', 'TRAV12D-1*03', 'TRAV12-3*02', 'TRAV7-6*01', 'TRAV4-2*02', 'TRAV1*01', 'TRAV5-1*01', 'TRAV8-1*02', 'TRAV15D-1/DV6D-1*05', 'TRAV7-1*01', 'TRAV9-2*01', 'TRAV13-5*01', 'TRAV6-6*03', 'TRAV12-1*02', 'TRAV12D-3*01', 'TRAV6-4*02', 'TRAV6-1*02', 'TRAV20*01', 'TRAV14D-3/DV8*05', 'TRAV18*01', 'TRAV7D-2*03', 'TRAV16*04', 'TRAV7-5*02', 'TRAV4D-2*01', 'TRAV13D-2*02', 'TRAV5D-2*01', 'TRAV5-2*01', 'TRAV16*03' ])
def test_ng_tcrs_DOnly(): ts = TCRsubset(clone_df=clone_df_subset_d, organism="human", epitopes=["X"], epitope="X", chains=["delta"], dist_d=dist_d_subset) tcr_motif_delta_input = pd.DataFrame({ 'subject': { 53: 'SRR5130260.1', 54: 'SRR5130260.1', 55: 'SRR5130260.1', 56: 'SRR5130260.1', 57: 'SRR5130260.1', 58: 'SRR5130260.1', 59: 'SRR5130260.1', 60: 'SRR5130260.1', 61: 'SRR5130260.1', 62: 'SRR5130260.1', 63: 'SRR5130260.1', 64: 'SRR5130260.1', 65: 'SRR5130260.1' }, 'epitope': { 53: 'X', 54: 'X', 55: 'X', 56: 'X', 57: 'X', 58: 'X', 59: 'X', 60: 'X', 61: 'X', 62: 'X', 63: 'X', 64: 'X', 65: 'X' }, 'va_rep': { 53: 'TRGV1*01', 54: 'TRGV1*01', 55: 'TRGV1*01', 56: 'TRGV1*01', 57: 'TRGV1*01', 58: 'TRGV1*01', 59: 'TRGV1*01', 60: 'TRGV1*01', 61: 'TRGV1*01', 62: 'TRGV1*01', 63: 'TRGV1*01', 64: 'TRGV1*01', 65: 'TRGV1*01' }, 'ja_rep': { 53: 'TRGJ1*01', 54: 'TRGJ1*01', 55: 'TRGJ1*01', 56: 'TRGJ1*01', 57: 'TRGJ1*01', 58: 'TRGJ1*01', 59: 'TRGJ1*01', 60: 'TRGJ1*01', 61: 'TRGJ1*01', 62: 'TRGJ1*01', 63: 'TRGJ1*01', 64: 'TRGJ1*01', 65: 'TRGJ1*01' }, 'vb_rep': { 53: 'TRDV2*01', 54: 'TRDV2*01', 55: 'TRDV2*01', 56: 'TRDV2*01', 57: 'TRDV2*01', 58: 'TRDV2*01', 59: 'TRDV2*01', 60: 'TRDV2*01', 61: 'TRDV2*01', 62: 'TRDV2*01', 63: 'TRDV2*01', 64: 'TRDV2*01', 65: 'TRDV2*01' }, 'jb_rep': { 53: 'TRDJ1*01', 54: 'TRDJ1*01', 55: 'TRDJ1*01', 56: 'TRDJ1*01', 57: 'TRDJ2*01', 58: 'TRDJ2*01', 59: 'TRDJ2*01', 60: 'TRDJ2*01', 61: 'TRDJ2*01', 62: 'TRDJ2*01', 63: 'TRDJ2*01', 64: 'TRDJ2*01', 65: 'TRDJ2*01' }, 'cdr3a': { 53: 'CATWAKNYYKKLF', 54: 'CATWAKNYYKKLF', 55: 'CATWAKNYYKKLF', 56: 'CATWAKNYYKKLF', 57: 'CATWAKNYYKKLF', 58: 'CATWAKNYYKKLF', 59: 'CATWAKNYYKKLF', 60: 'CATWAKNYYKKLF', 61: 'CATWAKNYYKKLF', 62: 'CATWAKNYYKKLF', 63: 'CATWAKNYYKKLF', 64: 'CATWAKNYYKKLF', 65: 'CATWAKNYYKKLF' }, 'cdr3b': { 53: 'CACHRGTDTDKLIF', 54: 'CACDKNGGYVRYTDKLIF', 55: 'CACDTVGIPDKLIF', 56: 'CACVRLPLRGRPYTDKLIF', 57: 'CACDNWGALTAQLFF', 58: 'CACDTILGDITLTAQLFF', 59: 'CACDTGRGTLTAQLFF', 60: 'CACDTWGMTAQLFF', 61: 'CACDTGGALTAQLFF', 62: 'CACDIRDTRVLTAQLFF', 63: 'CACDIVLGDPSLTAQLFF', 64: 'CACDHLLGDTAQLFF', 65: 'CACDPVTGGSLTAQLFF' } }) assert np.all(ts.tcr_motif_clones_df(gdmode=True) == tcr_motif_delta_input) tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(gdmode=True), organism=ts.organism, chains=ts.chains, epitopes=ts.epitopes, db_file="gammadelta_db.tsv") ng_tcrs = dict() for chain in tm.chains: next_gen_ref = tm.generate_background_set( chain=chain, ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file), ng_log_file='new_nextgen_chains_{}_{}.tsv'.format( tm.organism, chain)) ng_tcrs[chain] = next_gen_ref # Notice that 'B' represents 'delta' # Notice that 'A' represents 'gamma' ng_tcrs = dict() for chain in tm.chains: next_gen_ref = tm.generate_background_set( chain=chain, ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file), ng_log_file='new_nextgen_chains_{}_{}.tsv'.format( tm.organism, chain)) ng_tcrs[chain] = next_gen_ref assert set(ng_tcrs['B'].keys()) == set([ 'TRDV2*01', 'TRDV3*01', 'TRDV1*01', 'TRAV38-2/DV8*01', 'TRAV22*01', 'TRAV29/DV5*01', 'TRAV41*01', 'TRAV39*01', 'TRAV14/DV4*02', 'TRAV40*01', 'TRAV23/DV6*01', 'TRAV34*01', 'TRAV38-1*01', 'TRAV26-2*01', 'TRAV26-1*01', 'TRAV19*01', 'TRAV35*01', 'TRAV17*01', 'TRAV20*01', 'TRAV21*01', 'TRAV36/DV7*01', 'TRAV14/DV4*01', 'TRAV9-2*01', 'TRAV24*01', 'TRAV30*01', 'TRAV38-1*03', 'TRAV38-1*02', 'TRAV36/DV7*02', 'TRAV12-3*01', 'TRAV27*01', 'TRAV8-3*01', 'TRAV16*01', 'TRAV13-1*01', 'TRAV30*03', 'TRAV8-4*01', 'TRAV8-4*06', 'TRAV8-2*01', 'TRAV10*01', 'TRAV8-4*07', 'TRAV8-6*01', 'TRAV25*01', 'TRAV9-1*01', 'TRAV36/DV7*03', 'TRAV6*01', 'TRAV13-2*01', 'TRAV8-7*01' ])