Esempio n. 1
0
def detect_fake_chains(clones_file, Achain='A', Bchain='B'):
    tcrs = parse_tsv.parse_tsv_file(
        clones_file,
        key_fields=[],
        store_fields=['va_gene', 'cdr3a', 'vb_gene', 'cdr3b'])
    fake_chains = []
    if len(set([(x[0], x[1]) for x in tcrs])) == 1:
        fake_chains.append(Achain)
    if len(set([(x[2], x[3]) for x in tcrs])) == 1:
        fake_chains.append(Bchain)
    if fake_chains:
        print 'Fake sequence data detected for chains: {}'.format(
            ' '.join(fake_chains))
    return fake_chains
clones_file_with_nbrdists = '{}_nbrdists.tsv'.format(clones_file[:-4])
header = open(clones_file_with_nbrdists, 'r').readline()[:-1].split('\t')

nbrdist_tags = [
    x for x in header
    if x in rand_nbrdist_tags and ('wtd' in x or include_non_wtd)
]
nbrdist_tags.sort()
num_nbrdist_tags = len(nbrdist_tags)

Log('parsing {} for {} nbrdist_tags'.format(clones_file_with_nbrdists,
                                            num_nbrdist_tags))

tcr_fields = nbrdist_tags + ['va_genes', 'vb_genes', 'cdr3a', 'cdr3b']
all_tcrs = parse_tsv.parse_tsv_file(clones_file_with_nbrdists, ['epitope'],
                                    tcr_fields)

## look for cross-reactive tcrs
for e in all_tcrs:
    new_tcrs = []
    for l in all_tcrs[e]:
        new_tcrs.append(
            [(nbrdist_rescale * float(x)) for x in l[:num_nbrdist_tags]] + [
                frozenset([
                    cdr3s_human.all_loopseq_representative[organism][y]
                    for y in l[-4].split(';')
                ]),
                frozenset([
                    cdr3s_human.all_loopseq_representative[organism][y]
                    for y in l[-3].split(';')
                ]), l[-2], l[-1], False
Esempio n. 3
0
    p.flag('showmotifs')
    p.flag('use_tsne')
    p.multiword('epitopes').cast(lambda x: x.split())

if pngfile_prefix is None:
    pngfile_prefix = clones_file[:-4]

import matplotlib

matplotlib.rcParams['mathtext.default'] = 'regular'
if not show: matplotlib.use('Agg')
import matplotlib.pyplot as plt

greek_ab = {'a': r'$\alpha$', 'b': r'$\beta$'}

all_tcrs = parse_tsv.parse_tsv_file(clones_file, ['epitope'], ['clone_id'],
                                    True)

if showmotifs:
    motifs_file = clones_file[:-4] + '_motifs.tsv'
    if not exists(motifs_file):
        showmotifs = False
    else:
        assert exists(motifs_file)

        all_motifs = parse_tsv.parse_tsv_file(
            motifs_file, ['epitope', 'chain'], [
                'id', 'showmotif', 'chi_squared', 'matches_with_nbrs',
                'matches_with_nbrs_consensus', 'expected_fraction',
                'cluster_number', 'is_cluster_center', 'cluster_consensus'
            ])
Esempio n. 4
0
if outfile_prefix is None:
    outfile_prefix = clones_file[:-4]

import matplotlib
matplotlib.rcParams['mathtext.default'] = 'regular'
matplotlib.use('Agg')
import matplotlib.pyplot as plt

#recompute_nbrdists = True

clones_file_with_nbrdists = '{}_nbrdists.tsv'.format(clones_file[:-4])
assert exists(clones_file_with_nbrdists)

## read the epitope-specific TCRs
all_tcrs = parse_tsv.parse_tsv_file(
    clones_file, ['epitope', 'subject'],
    ['va_genes', 'vb_genes', 'cdr3a', 'cdr3b'],
    save_l=True)  ## last element will be the full parse_tsv_line dictionary

if not epitopes:
    epitopes = all_tcrs.keys()[:]
    epitopes.sort()

Log('reading {}'.format(clones_file_with_nbrdists))

nbrdist_tag_suffix = '_wtd_nbrdist{}'.format(nbrdist_percentile)
nbrdist_tags = [
    x + '_' + y + nbrdist_tag_suffix for x in epitopes for y in all_chains
]

all_nbrdists = parse_tsv.parse_tsv_file(clones_file_with_nbrdists,
                                        ['epitope', 'subject'], nbrdist_tags)
    footnotes[ab+'_hydro1'] = "Mean CDR3-{} total hydrophobicity (GES scale)".format(ab)
    #footnotes[ab+'_hydro2'] = "Mean total hydrophobicity (KD scale)"
    footnotes[ab+'_hydro2'] = "Mean CDR3-{} total hydrophobicity (HP scale)".format(ab)

for tag in footnotes: ## no typos
    assert tag in header_tags

all_dats = {}
def add_dat( epitope, tag, val ):
    global all_dats
    assert tag in header_tags
    all_dats[ epitope ][ tag ] = val


## parse the clones file
all_tcrs = parse_tsv.parse_tsv_file( clones_file, ['epitope','subject'], ['cdr3a','cdr3b','clone_size'] )
epitopes = all_tcrs.keys()[:]
epitopes.sort()

def get_charge( cdr3 ):
    return sum( ( aa_charge.get(x,0.0) for x in cdr3 ) )

def get_hp1( cdr3 ):
    return sum( ( -1*GES.get(x,0.0) for x in cdr3 ) )

def get_hp2( cdr3 ):
    return sum( ( HP.get(x,0.0) for x in cdr3 ) )
    #return sum( ( KD.get(x,0.0) for x in cdr3 ) )


all_scores = {}