예제 #1
0
def test_TCRsampler_build():
    t = TCRsampler()
    fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt')
    t.clean_mixcr(filename=fn)
    t.build_background()
    assert isinstance(t.ref_dict, dict)
    assert isinstance(t.ref_dict.popitem()[1], pd.DataFrame)
예제 #2
0
def test_prob_sampler_sample_key_warn():
    t = TCRsampler()
    fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt')
    t.clean_mixcr(filename=fn)
    t.build_background()
    with pytest.warns(None):
        r = t.sample([['TRBV999*01', 'TRBJ2-7*01', 2]])
    assert r == [[None]]
예제 #3
0
def test_prob_sampler_sample_background():
    t = TCRsampler()
    fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt')
    t.clean_mixcr(filename=fn)
    t.build_background()
    r = t.sample_background('TRBV9*01', 'TRBJ2-7*01', n=10)
    assert r == [
        'CASSRTGSLADEQYF', 'CASSATGVVSAQYF', 'CASSAWGQVYEQYF',
        'CASSVSGSPYEQYF', 'CASSAWGQVYEQYF', 'CASSAWGQVYEQYF', 'CASRWGEQYF',
        'CASSGDDWEQYF', 'CASSATGTSGPYEQYF', 'CASSSRTSGSNSEQYF'
    ]
예제 #4
0
def test_TCRsampler_build_vj_components():
    t = TCRsampler()
    fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt')
    t.clean_mixcr(filename=fn)
    t.build_background()
    assert np.isclose(np.sum([k for _, k in t.vj_freq.items()]), 1.0)
    assert np.isclose(np.sum([k for _, k in t.j_freq.items()]), 1.0)
    assert np.isclose(np.sum([k for _, k in t.v_freq.items()]), 1.0)
    assert np.isclose(np.sum([k for _, k in t.vj_occur_freq.items()]), 1.0)
    assert np.isclose(np.sum([k for _, k in t.v_occur_freq.items()]), 1.0)
    assert np.isclose(np.sum([k for _, k in t.j_occur_freq.items()]), 1.0)
예제 #5
0
def test_prob_sampler_sample():
    t = TCRsampler()
    fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt')
    t.clean_mixcr(filename=fn)
    t.build_background()
    r = t.sample([['TRBV9*01', 'TRBJ2-7*01', 2]])
    assert r == [['CASSRTGSLADEQYF', 'CASSATGVVSAQYF']]
    r = t.sample([['TRBV9*01', 'TRBJ2-7*01', 2]], flatten=True)
    assert r == ['CASSRTGSLADEQYF', 'CASSATGVVSAQYF']
    r = t.sample([['TRBV9*01', 'TRBJ2-7*01', 2],
                  ['TRBV7-7*01', 'TRBJ2-4*01', 4]])
    assert r == [['CASSRTGSLADEQYF', 'CASSATGVVSAQYF'],
                 [
                     'CASSLGQAARGIQYF', 'CASSLGQAARGIQYF', 'CASSLGQAARGIQYF',
                     'CASSLGQAARGIQYF'
                 ]]
예제 #6
0
mixcr exportClones -cloneId -count -fraction -vGene -jGene -vHit -jHit -vHits -jHits -aaFeature CDR3 -nFeature CDR3 SRR2079522.1.clns SRR2079522.1.clns.best.txt -f
mixcr exportAlignments SRR2079522.1.vdjca  SRR2079522.1.vdjca.txt -f
```


#### Files Available For Download

Beta:  [SRR2079522.1.clns.best.txt](https://www.dropbox.com/s/czcewp7x7auwdsu/SRR2079522.1.clns.best.txt?dl=1)

Alpha: [SRR2079521.1.clns.best.txt](https://www.dropbox.com/s/k4i0mt0cwhcn1h7/SRR2079521.1.clns.best.txt?dl=1)

"""

from tcrsampler.sampler import TCRsampler

fn = 'SRR2079522.1.clns.best.subject.txt'
t = TCRsampler()
t.clean_mixcr(fn)
t.build_background()
t.ref_df
t.ref_df.to_csv('ruggiero_mouse_beta_t.tsv.sampler.tsv', sep="\t", index=False)

fn = 'SRR2079521.1.clns.best.subject.txt'
t = TCRsampler()
t.clean_mixcr(fn)
t.build_background()
t.ref_df
t.ref_df.to_csv('ruggiero_mouse_alpha_t.tsv.sampler.tsv',
                sep="\t",
                index=False)
예제 #7
0
    df['strain'] = 'C57BL6 inbred mouse strain'
    print(df)

    wirasinha = pd.read_csv(
        '/Volumes/Samsung_T5/kmayerbl/tcr_data/wirasinha/Wirasinha.migec.txt',
        sep='\t')
    for i, row in df.iterrows():
        sdf = subset_wirasinha(df=wirasinha,
                               subset=row['subset'],
                               tcr_b=row['tcr_b'],
                               chain=row['chain'])
        sdf[['bestv',
             'bestj']] = sdf[['v', 'j']].apply(lambda x: x.apply(_pick_best))
        sdf[['bestv',
             'bestj']] = sdf[['bestv',
                              'bestj']].apply(lambda x: x.apply(_strip_allele))
        sdf = sdf.rename(columns=wirasinha_to_mixcr_headers)
        sys.stdout.write(f"Writing {row['filename']}\n")
        sdf.to_csv(row['filename'], sep="\t")

        sys.stdout.write(
            f"Testing {row['filename']} for import into TCRsampler\t")
        t = TCRsampler()
        t.clean_mixcr(filename=row['filename'])
        t.build_background()
        print("\n")
        print(t.ref_df.head(3))
        name = f"{row['filename']}.sampler.tsv"
        sys.stdout.write(f"Writing {name} \t")
        t.ref_df.to_csv(name, sep="\t", index=False)
예제 #8
0
def test_TCRsampler_build_stratified():
    t = TCRsampler()
    fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt')
    t.clean_mixcr(filename=fn)
    t.build_background(stratify_by_subject=True)
    r = t.sample_background('TRBV9*01', 'TRBJ2-7*01', n=10)
예제 #9
0
def test_TCRsampler_clean_mixcr():
    t = TCRsampler()
    fn = os.path.join('tcrsampler', 'tests', 'pmbc_mixcr_example_data.txt')
    t.clean_mixcr(filename=fn)
    assert isinstance(t.ref_df, pd.DataFrame)