예제 #1
0
 def __init__(self, organism, params=None):
     self.organism = organism[:]
     if params is None:
         self.params = tcr_distances.DistanceParams()
     else:
         self.params = copy.deepcopy(params)
     self.rep_dists = tcr_distances.compute_all_v_region_distances(
         organism, self.params)
예제 #2
0
    p.flag('intrasubject_nbrdists')
    p.multiword('clones_files').cast(lambda x: x.split())
    p.multiword('dist_chains').cast(lambda x: x.split(
    )).default('A B AB').described_as(
        'The chains over which the distance calcn will be performed; default is all three ("A B AB")'
    )
    p.multiword('epitope_prefixes').cast(lambda x: x.split())
    p.multiword('nbrdist_percentiles').cast(
        lambda x: [int(val) for val in x.split()]).default(
            "5 10 25")  #"5 10 25 -1 -5 -10")
    p.str('distance_params')

#internal legacy hack
new_nbrdists = not intrasubject_nbrdists

distance_params = tcr_distances.DistanceParams(config_string=distance_params)

if not clones_files:
    assert clones_file
    clones_files = [clones_file]

if not epitope_prefixes:
    epitope_prefixes = [''] * len(clones_files)

assert len(epitope_prefixes) == len(clones_files)

print 'precomputing v-region distances'
rep_dists = tcr_distances.compute_all_v_region_distances(
    organism, distance_params)
print 'done precomputing v-region distances'
예제 #3
0
def make_default_logo_svg_cmds(upper_left,
                               width,
                               height,
                               organism,
                               tcr_infos,
                               ab,
                               distance_params=None,
                               rep_dists=None,
                               add_fake_alleles=False,
                               show_full_cdr3=False):
    # right now single-chain only
    # returns cmds

    assert ab in 'AB'
    if distance_params is None:
        distance_params = tcr_distances.DistanceParams(config_string=None)

    if rep_dists is None:
        #print 'precomputing v-region distances'
        rep_dists = tcr_distances.compute_all_v_region_distances(
            organism, distance_params)
        #print 'done precomputing v-region distances'

    util.assign_label_reps_and_colors_based_on_most_common_genes_in_repertoire(
        tcr_infos, organism)

    rep_colors = {}
    for info in tcr_infos:
        for vj in 'vj':
            for abl in 'ab':
                rep = info[vj + abl + '_label_rep']
                color = info[vj + abl + '_label_rep_color']
                rep_colors[rep] = color

    tcrs = []

    dist_tcrs = []

    def add_fake_allele_info(x):
        if '*' not in x:
            return x + '*01'
        else:
            return x

    for l in tcr_infos:

        mouse = l['subject']
        epitope = l['epitope']
        cdr3a = l['cdr3a']
        cdr3b = l['cdr3b']

        ## for computing distances
        va_gene = l['va_gene']
        ja_gene = l['ja_gene']
        va_genes = l['va_genes'].split(';')

        vb_gene = l['vb_gene']
        jb_gene = l['jb_gene']
        vb_genes = l['vb_genes'].split(';')

        # add '*01' -- hacky!
        if add_fake_alleles:
            va_genes = map(add_fake_allele_info, va_genes)
            vb_genes = map(add_fake_allele_info, vb_genes)
            va_gene = add_fake_allele_info(va_gene)
            ja_gene = add_fake_allele_info(ja_gene)
            vb_gene = add_fake_allele_info(vb_gene)
            jb_gene = add_fake_allele_info(jb_gene)

        va_reps = frozenset(
            (all_genes.all_genes[organism][x].rep for x in va_genes))
        vb_reps = frozenset(
            (all_genes.all_genes[organism][x].rep for x in vb_genes))

        dist_tcrs.append([va_reps, vb_reps, cdr3a, cdr3b])
        #all_info.append( l )

        ## note that we are using mm1 reps here that also dont have allele info
        va_rep = l['va_label_rep']
        ja_rep = l['ja_label_rep']
        vb_rep = l['vb_label_rep']
        jb_rep = l['jb_label_rep']

        cdr3a_nucseq_src = ['V'] * (3 * len(cdr3a))  ## hack, unused
        cdr3b_nucseq_src = ['V'] * (3 * len(cdr3b))
        if junction_bars:

            if ab == 'A':
                a_junction_results = tcr_sampler.analyze_junction(
                    organism,
                    va_gene,
                    ja_gene,
                    cdr3a,
                    l['cdr3a_nucseq'].lower(),
                    return_cdr3_nucseq_src=True)
                cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,\
                    a_trims, a_inserts, cdr3a_nucseq_src = a_junction_results
            elif ab == 'B':
                b_junction_results = tcr_sampler.analyze_junction(
                    organism,
                    vb_gene,
                    jb_gene,
                    cdr3b,
                    l['cdr3b_nucseq'].lower(),
                    return_cdr3_nucseq_src=True)

                cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,\
                    b_trims, b_inserts, cdr3b_nucseq_src = b_junction_results
                ## try to distinguish between N before D and N after D
                for i in range(len(cdr3b_nucseq_src)):
                    if cdr3b_nucseq_src[i] == 'N':
                        if cdr3b_nucseq_src[:i].count('D') == 0:
                            cdr3b_nucseq_src[i] = 'N1'
                        else:
                            cdr3b_nucseq_src[i] = 'N2'

        assert len(cdr3a_nucseq_src) == 3 * len(cdr3a)
        assert len(cdr3b_nucseq_src) == 3 * len(cdr3b)
        #print cdr3b, cdr3b_nucseq_src
        tcrs.append((mouse, va_rep, ja_rep, vb_rep, jb_rep, cdr3a, cdr3b,
                     cdr3a_nucseq_src, cdr3b_nucseq_src, l['clone_id']))

    ## compute distances, used in logo construction for picking the center tcr for aligning against
    #print 'computing distances:',len(dist_tcrs)
    chains = ab
    all_dists = np.zeros((len(dist_tcrs), len(dist_tcrs)))
    for i, t1 in enumerate(dist_tcrs):
        for j in range(i + 1, len(dist_tcrs)):
            dist = tcr_distances.compute_distance(t1, dist_tcrs[j], chains,
                                                  rep_dists, distance_params)
            all_dists[i][j] = dist
            all_dists[j][i] = dist

    # now make the logo
    members = range(len(tcrs))

    scale_w = float(width) / default_width
    scale_h = float(height) / default_height

    # scale everything by our desired height, width

    return make_tcr_logo(upper_left, tcrs, members, all_dists, ab, rep_colors,
                         scale_w * default_vj_logo_width,
                         scale_w * default_pwmplusgaps_width,
                         scale_w * default_xpad, scale_h * default_pwm_height,
                         scale_h * default_junction_bars_height,
                         scale_h * default_ypad, show_full_cdr3)