Exemple #1
0
    for l in infos:

        mouse = l['subject']
        epitope = l['epitope']
        cdr3a = l['cdr3a']
        cdr3b = l['cdr3b']

        ## note that we are using mm1 reps here that also dont have allele info
        va_rep = l['va_label_rep']
        ja_rep = l['ja_label_rep']
        vb_rep = l['vb_label_rep']
        jb_rep = l['jb_label_rep']

        if junction_bars:
            a_junction_results = tcr_sampler.analyze_junction( organism, l['va_gene'], l['ja_gene'],
                                                               cdr3a, l['cdr3a_nucseq'], return_cdr3_nucseq_src=True )
            b_junction_results = tcr_sampler.analyze_junction( organism, l['vb_gene'], l['jb_gene'],
                                                               cdr3b, l['cdr3b_nucseq'], return_cdr3_nucseq_src=True )

            cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,\
                a_trims, a_inserts, cdr3a_nucseq_src = a_junction_results
            cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,\
                b_trims, b_inserts, cdr3b_nucseq_src = b_junction_results
            ## try to distinguish between N before D and N after D
            for i in range(len(cdr3b_nucseq_src)):
                if cdr3b_nucseq_src[i] == 'N':
                    if cdr3b_nucseq_src[:i].count('D')==0:
                        cdr3b_nucseq_src[i] = 'N1'
                    else:
                        cdr3b_nucseq_src[i] = 'N2'
        else:
Exemple #2
0
def make_default_logo_svg_cmds(upper_left,
                               width,
                               height,
                               organism,
                               tcr_infos,
                               ab,
                               distance_params=None,
                               rep_dists=None,
                               add_fake_alleles=False,
                               show_full_cdr3=False):
    # right now single-chain only
    # returns cmds

    assert ab in 'AB'
    if distance_params is None:
        distance_params = tcr_distances.DistanceParams(config_string=None)

    if rep_dists is None:
        #print 'precomputing v-region distances'
        rep_dists = tcr_distances.compute_all_v_region_distances(
            organism, distance_params)
        #print 'done precomputing v-region distances'

    util.assign_label_reps_and_colors_based_on_most_common_genes_in_repertoire(
        tcr_infos, organism)

    rep_colors = {}
    for info in tcr_infos:
        for vj in 'vj':
            for abl in 'ab':
                rep = info[vj + abl + '_label_rep']
                color = info[vj + abl + '_label_rep_color']
                rep_colors[rep] = color

    tcrs = []

    dist_tcrs = []

    def add_fake_allele_info(x):
        if '*' not in x:
            return x + '*01'
        else:
            return x

    for l in tcr_infos:

        mouse = l['subject']
        epitope = l['epitope']
        cdr3a = l['cdr3a']
        cdr3b = l['cdr3b']

        ## for computing distances
        va_gene = l['va_gene']
        ja_gene = l['ja_gene']
        va_genes = l['va_genes'].split(';')

        vb_gene = l['vb_gene']
        jb_gene = l['jb_gene']
        vb_genes = l['vb_genes'].split(';')

        # add '*01' -- hacky!
        if add_fake_alleles:
            va_genes = map(add_fake_allele_info, va_genes)
            vb_genes = map(add_fake_allele_info, vb_genes)
            va_gene = add_fake_allele_info(va_gene)
            ja_gene = add_fake_allele_info(ja_gene)
            vb_gene = add_fake_allele_info(vb_gene)
            jb_gene = add_fake_allele_info(jb_gene)

        va_reps = frozenset(
            (all_genes.all_genes[organism][x].rep for x in va_genes))
        vb_reps = frozenset(
            (all_genes.all_genes[organism][x].rep for x in vb_genes))

        dist_tcrs.append([va_reps, vb_reps, cdr3a, cdr3b])
        #all_info.append( l )

        ## note that we are using mm1 reps here that also dont have allele info
        va_rep = l['va_label_rep']
        ja_rep = l['ja_label_rep']
        vb_rep = l['vb_label_rep']
        jb_rep = l['jb_label_rep']

        cdr3a_nucseq_src = ['V'] * (3 * len(cdr3a))  ## hack, unused
        cdr3b_nucseq_src = ['V'] * (3 * len(cdr3b))
        if junction_bars:

            if ab == 'A':
                a_junction_results = tcr_sampler.analyze_junction(
                    organism,
                    va_gene,
                    ja_gene,
                    cdr3a,
                    l['cdr3a_nucseq'].lower(),
                    return_cdr3_nucseq_src=True)
                cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,\
                    a_trims, a_inserts, cdr3a_nucseq_src = a_junction_results
            elif ab == 'B':
                b_junction_results = tcr_sampler.analyze_junction(
                    organism,
                    vb_gene,
                    jb_gene,
                    cdr3b,
                    l['cdr3b_nucseq'].lower(),
                    return_cdr3_nucseq_src=True)

                cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,\
                    b_trims, b_inserts, cdr3b_nucseq_src = b_junction_results
                ## try to distinguish between N before D and N after D
                for i in range(len(cdr3b_nucseq_src)):
                    if cdr3b_nucseq_src[i] == 'N':
                        if cdr3b_nucseq_src[:i].count('D') == 0:
                            cdr3b_nucseq_src[i] = 'N1'
                        else:
                            cdr3b_nucseq_src[i] = 'N2'

        assert len(cdr3a_nucseq_src) == 3 * len(cdr3a)
        assert len(cdr3b_nucseq_src) == 3 * len(cdr3b)
        #print cdr3b, cdr3b_nucseq_src
        tcrs.append((mouse, va_rep, ja_rep, vb_rep, jb_rep, cdr3a, cdr3b,
                     cdr3a_nucseq_src, cdr3b_nucseq_src, l['clone_id']))

    ## compute distances, used in logo construction for picking the center tcr for aligning against
    #print 'computing distances:',len(dist_tcrs)
    chains = ab
    all_dists = np.zeros((len(dist_tcrs), len(dist_tcrs)))
    for i, t1 in enumerate(dist_tcrs):
        for j in range(i + 1, len(dist_tcrs)):
            dist = tcr_distances.compute_distance(t1, dist_tcrs[j], chains,
                                                  rep_dists, distance_params)
            all_dists[i][j] = dist
            all_dists[j][i] = dist

    # now make the logo
    members = range(len(tcrs))

    scale_w = float(width) / default_width
    scale_h = float(height) / default_height

    # scale everything by our desired height, width

    return make_tcr_logo(upper_left, tcrs, members, all_dists, ab, rep_colors,
                         scale_w * default_vj_logo_width,
                         scale_w * default_pwmplusgaps_width,
                         scale_w * default_xpad, scale_h * default_pwm_height,
                         scale_h * default_junction_bars_height,
                         scale_h * default_ypad, show_full_cdr3)
    if new_cdr3b_nucseq != cdr3b_nucseq:  ## note note note
        new_cdr3b_protseq = read_sanger_data.get_translation(
            new_cdr3b_nucseq, '+1')[0]
    else:
        new_cdr3b_protseq = cdr3b_protseq[:]
        assert new_cdr3b_protseq == read_sanger_data.get_translation(
            cdr3b_nucseq, '+1')[0]

    bprob_protseq = tcr_sampler.beta_cdr3_protseq_probability(
        organism, vb_gene, jb_gene, new_cdr3b_protseq, verbose=verbose)

    vals = dict(l)  #line.split('\t') + ['']*(len(outfields)-len(infields))

    if add_masked_seqs:
        ## junction analysis
        a_junction_results = tcr_sampler.analyze_junction(
            organism, va_gene, ja_gene, cdr3a_protseq, cdr3a_nucseq)
        b_junction_results = tcr_sampler.analyze_junction(
            organism, vb_gene, jb_gene, cdr3b_protseq, cdr3b_nucseq)

        cdr3a_new_nucseq, cdr3a_protseq_masked, cdr3a_protseq_new_nucleotide_countstring,a_trims,a_inserts \
            = a_junction_results
        cdr3b_new_nucseq, cdr3b_protseq_masked, cdr3b_protseq_new_nucleotide_countstring,b_trims,b_inserts \
            = b_junction_results

        # from tcr_sampler.py:
        # trims = ( v_trim, d0_trim, d1_trim, j_trim )
        # inserts = ( best_d_id, n_vd_insert, n_dj_insert, n_vj_insert )

        assert a_trims[1] + a_trims[2] + a_inserts[0] + a_inserts[
            1] + a_inserts[2] + b_inserts[3] == 0
        assert a_inserts[3] == len(cdr3a_new_nucseq)
        if chain == 'A':
            if correct_cdr3_seqs:
                tmp_results = tcr_sampler.analyze_junction\
                              ( organism, v_gene, j_gene, cdr3_protseq, cdr3_nucseq,
                                return_corrected_cdr3_seqs = True,
                                mismatch_score = mismatch_score_for_correcting_cdr3_seqs )
                corrected_cdr3_nucseq, corrected_cdr3_protseq = list(
                    tmp_results)[-2:]

                if corrected_cdr3_nucseq != cdr3_nucseq:
                    print 'fixing early sequence error', cdr3_nucseq, '==>', corrected_cdr3_nucseq
                    cdr3_nucseq = corrected_cdr3_nucseq[:]
                    cdr3_protseq = corrected_cdr3_protseq[:]

            junction_results = tcr_sampler.analyze_junction(
                organism, v_gene, j_gene, cdr3_protseq, cdr3_nucseq)
            new_nucseq, cdr3_protseq_masked, cdr3_protseq_new_nucleotide_countstring, trims, inserts \
                = junction_results

            (v_trim, d0_trim, d1_trim, j_trim) = trims
            (d_id, n_vd_insert, n_dj_insert, n_vj_insert) = inserts
            if not new_nucseq: new_nucseq = '-'

            assert d0_trim + d1_trim + n_vd_insert + n_dj_insert == 0
            junction_info = '%s %s -%d -%d +%d' % (
                new_nucseq, cdr3_protseq_masked, v_trim, j_trim, n_vj_insert)
        else:
            junction_infos = []

            possible_d_ids = tcr_rearrangement_new.all_trbd_nucseq[
                organism].keys()[:]