예제 #1
0
def test_initialization_of_TCRsubset_beta_only_find_motifs():
    """
    Test that we can create a TCRsubset from paired input files
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset[[
        'clone_id', 'subject', 'epitope', 'v_b_gene', 'j_b_gene', 'cdr3_b_aa',
        'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq', 'count',
        'vb_countreps', 'jb_countreps', 'vb_gene', 'jb_gene'
    ]]
    df = df.iloc[0:20, :]

    db = dist_b_subset.iloc[0:20, 0:20]

    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["beta"],
                   dist_b=db)

    motif_df = ts.find_motif()
    assert isinstance(motif_df, pd.DataFrame)
    assert isinstance(ts.motif_df, pd.DataFrame)
예제 #2
0
def test_initialization_of_TCRsubset_alpha_beta_case_plus_motif_finding():
    """
    Test that we can create a TCRsubset from paired input files
    """
    import pytest
    import pandas as pd
    from tcrdist.subset import TCRsubset
    from tcrdist.tests.my_test_subset import dist_a_subset, dist_b_subset, clone_df_subset
    from tcrdist.cdr3_motif import TCRMotif

    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["A", "B"],
                   dist_a=da,
                   dist_b=db)
    motif_df = ts.find_motif()
    assert isinstance(motif_df, pd.DataFrame)
    assert isinstance(ts.motif_df, pd.DataFrame)
예제 #3
0
def test_non_default_manual_generation_of_ng_tcrsAB():
    """ Notices the usage of non default mode and specification of files"""
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["A", "B"],
                   dist_a=da,
                   dist_b=db,
                   default_mode=False)
    ts.ng_tcrs['B'] = ts.generate_background_set(
        chain=['B'],
        ng_log_path='tcrdist/db/alphabeta_db.tsv_files',
        ng_log_file='new_nextgen_chains_mouse_B.tsv')
    ts.ng_tcrs['A'] = ts.generate_background_set(
        chain=['A'],
        ng_log_path='tcrdist/db/alphabeta_db.tsv_files',
        ng_log_file='new_nextgen_chains_mouse_A.tsv')

    motif_df = ts.find_motif()
    assert isinstance(motif_df, pd.DataFrame)
    assert isinstance(ts.motif_df, pd.DataFrame)
    assert motif_df.shape[0] > 1
예제 #4
0
def test_initialization_of_TCRsubset_alpha_beta_case():
    """
    Test that we can create a TCRsubset from paired input files
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    TCRsubset(clone_df=clone_df_subset,
              organism="mouse",
              epitopes=["PA"],
              epitope="PA",
              chains=["A", "B"],
              dist_a=dist_a_subset,
              dist_b=dist_b_subset)
예제 #5
0
def test_ng_tcrs_BOnly():
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["B"],
                   dist_a=da,
                   dist_b=db)
    tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(),
                  organism=ts.organism,
                  chains=ts.chains,
                  epitopes=ts.epitopes,
                  db_file="alphabeta_db.tsv")
    ng_tcrs = dict()
    # Default Behavior
    for chain in tm.chains:  #['A','B']:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    assert isinstance(ng_tcrs['B'], dict)
    assert set(ng_tcrs['B'].keys()) == set([
        'TRBV29*01', 'TRBV16*01', 'TRBV13-2*01', 'TRBV13-1*01', 'TRBV3*01',
        'TRBV3*02', 'TRBV17*01', 'TRBV15*01', 'TRBV19*01', 'TRBV19*03',
        'TRBV4*01', 'TRBV4*02', 'TRBV23*01', 'TRBV13-2*05', 'TRBV13-3*01',
        'TRBV12-1*01', 'TRBV12-2*01', 'TRBV2*01', 'TRBV21*01', 'TRBV5*01',
        'TRBV8*01', 'TRBV1*01', 'TRBV1*02', 'TRBV26*02', 'TRBV24*01',
        'TRBV26*01', 'TRBV14*01', 'TRBV24*02', 'TRBV31*01', 'TRBV5*05',
        'TRBV9*01', 'TRBV20*01', 'TRBV30*01'
    ])
예제 #6
0
def test_initialization_of_TCRsubset_alpha_case():
    """
    Test that we can create a TCRsubset from paired input files
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset[[
        'clone_id', 'subject', 'epitope', 'v_a_gene', 'j_a_gene', 'cdr3_a_aa',
        'cdr1_a_aa', 'cdr2_a_aa', 'pmhc_a_aa', 'cdr3_a_nucseq', 'count',
        'va_countreps', 'ja_countreps', 'va_gene', 'ja_gene'
    ]]
    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["alpha"],
                   dist_a=dist_a_subset)

    assert (isinstance(ts.clone_df.vb_gene, pd.Series))
    assert (ts.clone_df.vb_gene.iloc[0] == "TRBV1*01")
예제 #7
0
def test_initialization_of_TCRsubset_beta_case():
    """
    Test that we can create a TCRsubset from just beta chain files.
    For this to work, fake alpha sequences will have to be created:
    """
    assert isinstance(dist_a_subset, pd.DataFrame)
    assert isinstance(dist_b_subset, pd.DataFrame)
    assert isinstance(clone_df_subset, pd.DataFrame)
    df = clone_df_subset[[
        'clone_id', 'subject', 'epitope', 'v_b_gene', 'j_b_gene', 'cdr3_b_aa',
        'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq', 'count',
        'vb_countreps', 'jb_countreps', 'vb_gene', 'jb_gene'
    ]]

    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["beta"],
                   dist_b=dist_b_subset)

    assert (isinstance(ts.clone_df.va_gene, pd.Series))
    assert (ts.clone_df.va_gene.iloc[0] == "TRAV10*01")
예제 #8
0
def test_CompleteExample_with_TCRMotif_Invoked_From_within_TCRsubset():
    import pandas as pd
    import numpy as np
    import tcrdist as td
    #import IPython

    from tcrdist import mappers
    from tcrdist.repertoire import TCRrep
    from tcrdist.cdr3_motif import TCRMotif
    from tcrdist.subset import TCRsubset
    from tcrdist.storage import StoreIOMotif, StoreIOEntropy
    from tcrdist.plotting import plot_pwm

    tcrdist_clone_fn = 'tcrdist/test_files/mouse_pairseqs_v1_parsed_seqs_probs_mq20_clones.tsv'
    tcrdist_clone_df = pd.read_csv(tcrdist_clone_fn, sep="\t")  #1

    ind = (tcrdist_clone_df.epitope == "PA") | (tcrdist_clone_df.epitope
                                                == "F2")
    tcrdist_clone_df = tcrdist_clone_df[ind].copy()

    mapping = mappers.tcrdist_clone_df_to_tcrdist2_mapping  #3
    tcrdist2_df = mappers.generic_pandas_mapper(
        df=tcrdist_clone_df,  #4
        mapping=mapping)

    #1
    tr = TCRrep(cell_df=tcrdist2_df, organism="mouse")

    #2
    tr.infer_cdrs_from_v_gene(chain='alpha', imgt_aligned=True)
    tr.infer_cdrs_from_v_gene(chain='beta', imgt_aligned=True)

    #3
    tr.index_cols = [
        'clone_id', 'subject', 'epitope', 'v_a_gene', 'j_a_gene', 'v_b_gene',
        'j_b_gene', 'cdr3_a_aa', 'cdr3_b_aa', 'cdr1_a_aa', 'cdr2_a_aa',
        'pmhc_a_aa', 'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq',
        'cdr3_a_nucseq', 'va_countreps', 'ja_countreps', 'vb_countreps',
        'jb_countreps', 'va_gene', 'vb_gene', 'ja_gene', 'jb_gene'
    ]

    #4
    tr.deduplicate()

    #5
    tr._tcrdist_legacy_method_alpha_beta()

    #6
    distA = tr.dist_a
    distB = tr.dist_b
    assert np.all(((distA + distB) - tr.paired_tcrdist) == 0)

    # 1
    criteria = tr.clone_df.epitope == "PA"
    clone_df_subset = tr.clone_df[criteria]

    # 2
    distA_subset = distA.loc[clone_df_subset.clone_id,
                             clone_df_subset.clone_id].copy()
    distB_subset = distB.loc[clone_df_subset.clone_id,
                             clone_df_subset.clone_id].copy()

    # 3
    ts = TCRsubset(clone_df_subset,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["A", "B"],
                   dist_a=distA_subset,
                   dist_b=distB_subset)

    # ts.find_motif()

    cnames = [
        "file_type", "count", "expect_random", "expect_nextgen", "chi_squared",
        "nfixed", "showmotif", "num", "othernum", "overlap", "ep", "ab",
        "nseqs", "v_rep_counts", "j_rep_counts"
    ]
    motif_fn = 'tcrdist/test_files/mouse_pairseqs_v1_parsed_seqs_probs_mq20_clones_cdr3_motifs_PA.log'
    x = open(motif_fn, "r").readlines()
    ts.motif_df = pd.DataFrame([l.split() for l in x], columns=cnames)

    i = 0
    row = ts.motif_df.iloc[i, :].to_dict()

    motif_list = list()
    motif_logo = list()
    for i, row in ts.motif_df.iterrows():
        StoreIOMotif_instance = ts.eval_motif(row)
        motif_list.append(StoreIOMotif_instance)
        motif_logo.append(
            plot_pwm(StoreIOMotif_instance,
                     create_file=False,
                     my_height=200,
                     my_width=600))
        if i > 1:
            break
예제 #9
0
def test_hot_start_example_in_full():
    """
    This is the code that makes up the HotStart example in the docs
    """
    # basic imports
    import os
    import pandas as pd
    import numpy as np
    #import IPython

    # tcrdist classes
    from tcrdist.repertoire import TCRrep
    from tcrdist.subset import TCRsubset
    from tcrdist.cdr3_motif import TCRMotif
    from tcrdist.storage import StoreIOMotif, StoreIOEntropy

    # tcrdist functions
    from tcrdist import plotting
    from tcrdist.mappers import populate_legacy_fields

    # scipy functions for clustering
    from scipy.spatial import distance
    from scipy.cluster.hierarchy import linkage, dendrogram, fcluster

    # sklearn functions for low-dimensional embeddings
    from sklearn.manifold import TSNE, MDS

    # plotnine to allow grammar of graphics plotting akin to R's ggplot2
    #import plotnine as gg

    #1 load data, subset to receptors recognizing "PA" epitope
    tcrdist2_df = pd.read_csv(
        os.path.join("tcrdist", "test_files_compact", "dash.csv"))
    tcrdist2_df = tcrdist2_df[tcrdist2_df.epitope == "PA"].copy()

    #2 create instance of TCRrep class, initializes input as tr.cell_df attribute
    tr = TCRrep(cell_df=tcrdist2_df,
                chains=['alpha', 'beta'],
                organism="mouse")

    #3 Infer CDR1,CDR2,CDR2.5 (a.k.a. phmc) from germline v-genes
    tr.infer_cdrs_from_v_gene(chain='alpha', imgt_aligned=True)
    tr.infer_cdrs_from_v_gene(chain='beta', imgt_aligned=True)

    #4 Define index columns for determining unique clones.
    tr.index_cols = [
        'clone_id', 'subject', 'epitope', 'v_a_gene', 'j_a_gene', 'v_b_gene',
        'j_b_gene', 'cdr3_a_aa', 'cdr3_b_aa', 'cdr1_a_aa', 'cdr2_a_aa',
        'pmhc_a_aa', 'cdr1_b_aa', 'cdr2_b_aa', 'pmhc_b_aa', 'cdr3_b_nucseq',
        'cdr3_a_nucseq'
    ]

    #4 Deduplicate based on index cols, creating tr.clone_df attribute
    tr.deduplicate()

    #5 calculate tcrdists by method in Dash et al.
    tr._tcrdist_legacy_method_alpha_beta()

    #6 Check that sum of alpah-chain and beta-chain distance matrices equal paired_tcrdist
    distA = tr.dist_a
    distB = tr.dist_b
    assert np.all(((distA + distB) - tr.paired_tcrdist) == 0)

    # Cluster
    from scipy.spatial import distance
    from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
    compressed_dmat = distance.squareform(tr.paired_tcrdist, force="vector")
    Z = linkage(compressed_dmat, method="complete")
    den = dendrogram(Z, color_threshold=np.inf, no_plot=True)
    cluster_index = fcluster(Z, t=20, criterion="maxclust")
    assert len(cluster_index) == tr.clone_df.shape[0]
    assert len(cluster_index) == tr.paired_tcrdist.shape[0]
    tr.clone_df['cluster_index'] = cluster_index

    # Subset to Cluster 5
    criteria = (cluster_index == 5)
    clone_df_subset = tr.clone_df[criteria]
    clone_df_subset = clone_df_subset[clone_df_subset.epitope == "PA"].copy()
    dist_a_subset = tr.dist_a.loc[clone_df_subset.clone_id,
                                  clone_df_subset.clone_id].copy()
    dist_b_subset = tr.dist_b.loc[clone_df_subset.clone_id,
                                  clone_df_subset.clone_id].copy()

    clone_df_subset = populate_legacy_fields(df=clone_df_subset,
                                             chains=['alpha', 'beta'])

    ts = TCRsubset(clone_df_subset,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["A", "B"],
                   dist_a=dist_a_subset,
                   dist_b=dist_b_subset)

    # Find Motifs
    if os.path.isfile(
            os.path.join("tcrdist", "test_files_compact",
                         "dash_PA_cluster_5_motifs.csv")):
        ts.motif_df = pd.read_csv(
            os.path.join("tcrdist", "test_files_compact",
                         "dash_PA_cluster_5_motifs.csv"))
    else:
        motif_df = ts.find_motif()

    # Save Motifs
    ts.motif_df.to_csv(os.path.join("tcrdist", "test_files_compact",
                                    "dash_PA_cluster_5_motifs.csv"),
                       index=False)

    # Preprocess Motifs
    motif_list_a = list()
    motif_logos_a = list()
    for i, row in ts.motif_df[ts.motif_df.ab == "A"].iterrows():
        StoreIOMotif_instance = ts.eval_motif(row)
        motif_list_a.append(StoreIOMotif_instance)
        motif_logos_a.append(
            plotting.plot_pwm(StoreIOMotif_instance,
                              create_file=False,
                              my_height=200,
                              my_width=600))

    motif_list_b = list()
    motif_logos_b = list()
    for i, row in ts.motif_df[ts.motif_df.ab == "B"].iterrows():
        StoreIOMotif_instance = ts.eval_motif(row)
        motif_list_b.append(StoreIOMotif_instance)
        motif_logos_b.append(
            plotting.plot_pwm(StoreIOMotif_instance,
                              create_file=False,
                              my_height=200,
                              my_width=600))
예제 #10
0
def test_ng_tcrs_AOnly():
    df = clone_df_subset.iloc[0:20, :].copy()
    db = dist_b_subset.iloc[0:20, 0:20]
    da = dist_a_subset.iloc[0:20, 0:20]
    ts = TCRsubset(clone_df=df,
                   organism="mouse",
                   epitopes=["PA"],
                   epitope="PA",
                   chains=["A"],
                   dist_a=da,
                   dist_b=None)
    tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(),
                  organism=ts.organism,
                  chains=ts.chains,
                  epitopes=ts.epitopes,
                  db_file="alphabeta_db.tsv")
    ng_tcrs = dict()
    # Default Behavior
    for chain in tm.chains:  #['A','B']:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    assert isinstance(
        ng_tcrs['A'], dict
    )  #== set(['TRGV2*01', 'TRGV5P*01', 'TRGV5P*02', 'TRGV8*01', 'TRGV4*01', 'TRGV11*01', 'TRGV10*01', 'TRGV1*01', 'TRGV3*01', 'TRGV5*01', 'TRGV9*02', 'TRGV9*01'])
    assert set(ng_tcrs['A'].keys()) == set([
        'TRAV14D-3/DV8*02', 'TRAV14-1*02', 'TRAV14-2*01', 'TRAV14D-1*01',
        'TRAV14D-2*01', 'TRAV14D-2*02', 'TRAV14-3*01', 'TRAV14-3*02',
        'TRAV14D-3/DV8*01', 'TRAV6-7/DV9*02', 'TRAV6-6*01', 'TRAV6-7/DV9*01',
        'TRAV6-7/DV9*04', 'TRAV6D-6*01', 'TRAV6-5*01', 'TRAV6D-7*01',
        'TRAV13-1*01', 'TRAV13D-4*01', 'TRAV13-3*01', 'TRAV10*01', 'TRAV2*01',
        'TRAV6-4*01', 'TRAV3D-3*02', 'TRAV3-1*01', 'TRAV3-4*01', 'TRAV4D-4*03',
        'TRAV4D-3*01', 'TRAV4D-4*01', 'TRAV4-3*01', 'TRAV4-2*01',
        'TRAV4-4/DV10*01', 'TRAV4D-3*03', 'TRAV5-4*01', 'TRAV5D-4*02',
        'TRAV3-3*01', 'TRAV12-3*04', 'TRAV4D-4*04', 'TRAV13-4/DV7*03',
        'TRAV16*01', 'TRAV9N-3*01', 'TRAV9N-2*01', 'TRAV9D-4*03',
        'TRAV9D-3*01', 'TRAV9-1*01', 'TRAV9D-2*01', 'TRAV9D-4*04',
        'TRAV6-1*01', 'TRAV14-1*01', 'TRAV7-2*01', 'TRAV12D-1*01',
        'TRAV12-2*01', 'TRAV12N-3*01', 'TRAV16D/DV11*03', 'TRAV12-1*01',
        'TRAV12-3*01', 'TRAV12D-3*02', 'TRAV12D-2*01', 'TRAV8-2*01',
        'TRAV13D-2*01', 'TRAV13-4/DV7*02', 'TRAV19*01', 'TRAV21/DV12*01',
        'TRAV7-5*01', 'TRAV7-5*03', 'TRAV11*01', 'TRAV11N*01', 'TRAV11*02',
        'TRAV12D-1*05', 'TRAV4D-4*02', 'TRAV7-6*02', 'TRAV7-3*01',
        'TRAV6D-6*03', 'TRAV7-3*02', 'TRAV15D-2/DV6D-2*04',
        'TRAV15-2/DV6-2*02', 'TRAV15-2/DV6-2*01', 'TRAV15D-2/DV6D-2*01',
        'TRAV15D-2/DV6D-2*02', 'TRAV15D-1/DV6D-1*04', 'TRAV15-1/DV6-1*01',
        'TRAV17*02', 'TRAV17*01', 'TRAV12-1*05', 'TRAV7N-5*01',
        'TRAV15D-1/DV6D-1*01', 'TRAV7-4*01', 'TRAV12D-2*02', 'TRAV9-4*01',
        'TRAV9-3*01', 'TRAV6-2*01', 'TRAV8-1*01', 'TRAV13D-1*03',
        'TRAV13-2*01', 'TRAV6-7/DV9*08', 'TRAV12D-1*03', 'TRAV12-3*02',
        'TRAV7-6*01', 'TRAV4-2*02', 'TRAV1*01', 'TRAV5-1*01', 'TRAV8-1*02',
        'TRAV15D-1/DV6D-1*05', 'TRAV7-1*01', 'TRAV9-2*01', 'TRAV13-5*01',
        'TRAV6-6*03', 'TRAV12-1*02', 'TRAV12D-3*01', 'TRAV6-4*02',
        'TRAV6-1*02', 'TRAV20*01', 'TRAV14D-3/DV8*05', 'TRAV18*01',
        'TRAV7D-2*03', 'TRAV16*04', 'TRAV7-5*02', 'TRAV4D-2*01',
        'TRAV13D-2*02', 'TRAV5D-2*01', 'TRAV5-2*01', 'TRAV16*03'
    ])
예제 #11
0
def test_ng_tcrs_DOnly():
    ts = TCRsubset(clone_df=clone_df_subset_d,
                   organism="human",
                   epitopes=["X"],
                   epitope="X",
                   chains=["delta"],
                   dist_d=dist_d_subset)

    tcr_motif_delta_input = pd.DataFrame({
        'subject': {
            53: 'SRR5130260.1',
            54: 'SRR5130260.1',
            55: 'SRR5130260.1',
            56: 'SRR5130260.1',
            57: 'SRR5130260.1',
            58: 'SRR5130260.1',
            59: 'SRR5130260.1',
            60: 'SRR5130260.1',
            61: 'SRR5130260.1',
            62: 'SRR5130260.1',
            63: 'SRR5130260.1',
            64: 'SRR5130260.1',
            65: 'SRR5130260.1'
        },
        'epitope': {
            53: 'X',
            54: 'X',
            55: 'X',
            56: 'X',
            57: 'X',
            58: 'X',
            59: 'X',
            60: 'X',
            61: 'X',
            62: 'X',
            63: 'X',
            64: 'X',
            65: 'X'
        },
        'va_rep': {
            53: 'TRGV1*01',
            54: 'TRGV1*01',
            55: 'TRGV1*01',
            56: 'TRGV1*01',
            57: 'TRGV1*01',
            58: 'TRGV1*01',
            59: 'TRGV1*01',
            60: 'TRGV1*01',
            61: 'TRGV1*01',
            62: 'TRGV1*01',
            63: 'TRGV1*01',
            64: 'TRGV1*01',
            65: 'TRGV1*01'
        },
        'ja_rep': {
            53: 'TRGJ1*01',
            54: 'TRGJ1*01',
            55: 'TRGJ1*01',
            56: 'TRGJ1*01',
            57: 'TRGJ1*01',
            58: 'TRGJ1*01',
            59: 'TRGJ1*01',
            60: 'TRGJ1*01',
            61: 'TRGJ1*01',
            62: 'TRGJ1*01',
            63: 'TRGJ1*01',
            64: 'TRGJ1*01',
            65: 'TRGJ1*01'
        },
        'vb_rep': {
            53: 'TRDV2*01',
            54: 'TRDV2*01',
            55: 'TRDV2*01',
            56: 'TRDV2*01',
            57: 'TRDV2*01',
            58: 'TRDV2*01',
            59: 'TRDV2*01',
            60: 'TRDV2*01',
            61: 'TRDV2*01',
            62: 'TRDV2*01',
            63: 'TRDV2*01',
            64: 'TRDV2*01',
            65: 'TRDV2*01'
        },
        'jb_rep': {
            53: 'TRDJ1*01',
            54: 'TRDJ1*01',
            55: 'TRDJ1*01',
            56: 'TRDJ1*01',
            57: 'TRDJ2*01',
            58: 'TRDJ2*01',
            59: 'TRDJ2*01',
            60: 'TRDJ2*01',
            61: 'TRDJ2*01',
            62: 'TRDJ2*01',
            63: 'TRDJ2*01',
            64: 'TRDJ2*01',
            65: 'TRDJ2*01'
        },
        'cdr3a': {
            53: 'CATWAKNYYKKLF',
            54: 'CATWAKNYYKKLF',
            55: 'CATWAKNYYKKLF',
            56: 'CATWAKNYYKKLF',
            57: 'CATWAKNYYKKLF',
            58: 'CATWAKNYYKKLF',
            59: 'CATWAKNYYKKLF',
            60: 'CATWAKNYYKKLF',
            61: 'CATWAKNYYKKLF',
            62: 'CATWAKNYYKKLF',
            63: 'CATWAKNYYKKLF',
            64: 'CATWAKNYYKKLF',
            65: 'CATWAKNYYKKLF'
        },
        'cdr3b': {
            53: 'CACHRGTDTDKLIF',
            54: 'CACDKNGGYVRYTDKLIF',
            55: 'CACDTVGIPDKLIF',
            56: 'CACVRLPLRGRPYTDKLIF',
            57: 'CACDNWGALTAQLFF',
            58: 'CACDTILGDITLTAQLFF',
            59: 'CACDTGRGTLTAQLFF',
            60: 'CACDTWGMTAQLFF',
            61: 'CACDTGGALTAQLFF',
            62: 'CACDIRDTRVLTAQLFF',
            63: 'CACDIVLGDPSLTAQLFF',
            64: 'CACDHLLGDTAQLFF',
            65: 'CACDPVTGGSLTAQLFF'
        }
    })
    assert np.all(ts.tcr_motif_clones_df(gdmode=True) == tcr_motif_delta_input)

    tm = TCRMotif(clones_df=ts.tcr_motif_clones_df(gdmode=True),
                  organism=ts.organism,
                  chains=ts.chains,
                  epitopes=ts.epitopes,
                  db_file="gammadelta_db.tsv")

    ng_tcrs = dict()
    for chain in tm.chains:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    # Notice that 'B' represents 'delta'
    # Notice that 'A' represents 'gamma'

    ng_tcrs = dict()
    for chain in tm.chains:
        next_gen_ref = tm.generate_background_set(
            chain=chain,
            ng_log_path=paths.path_to_current_db_files(db_file=tm.db_file),
            ng_log_file='new_nextgen_chains_{}_{}.tsv'.format(
                tm.organism, chain))
        ng_tcrs[chain] = next_gen_ref

    assert set(ng_tcrs['B'].keys()) == set([
        'TRDV2*01', 'TRDV3*01', 'TRDV1*01', 'TRAV38-2/DV8*01', 'TRAV22*01',
        'TRAV29/DV5*01', 'TRAV41*01', 'TRAV39*01', 'TRAV14/DV4*02',
        'TRAV40*01', 'TRAV23/DV6*01', 'TRAV34*01', 'TRAV38-1*01',
        'TRAV26-2*01', 'TRAV26-1*01', 'TRAV19*01', 'TRAV35*01', 'TRAV17*01',
        'TRAV20*01', 'TRAV21*01', 'TRAV36/DV7*01', 'TRAV14/DV4*01',
        'TRAV9-2*01', 'TRAV24*01', 'TRAV30*01', 'TRAV38-1*03', 'TRAV38-1*02',
        'TRAV36/DV7*02', 'TRAV12-3*01', 'TRAV27*01', 'TRAV8-3*01', 'TRAV16*01',
        'TRAV13-1*01', 'TRAV30*03', 'TRAV8-4*01', 'TRAV8-4*06', 'TRAV8-2*01',
        'TRAV10*01', 'TRAV8-4*07', 'TRAV8-6*01', 'TRAV25*01', 'TRAV9-1*01',
        'TRAV36/DV7*03', 'TRAV6*01', 'TRAV13-2*01', 'TRAV8-7*01'
    ])