Exemple #1
0
def writePredictedRepReadsToFile(p1, rep_reads, fout):
    counts = getProfileCounts(p1)
    idx = 0
    for cnt, indel, _, _ in counts:
        if cnt < 0.5: break
        fout.write(u'%d\t%s\t%s\n' % (idx, rep_reads[indel], indel))
        idx += 1
Exemple #2
0
def writeMCISummary(fout, id, p1, stats1, oligo_det, more_indels=False):
    if not more_indels: mcis = [getHighestIndel(p1)]
    else: mcis = [x[1] for x in getProfileCounts(p1) if x[1] != '-']
    for mci in mcis:
        mci_reads = p1[mci]
        total_reads = stats1[0] - stats1[2]
        itype, isize, details, muts = tokFullIndel(mci)
        pam_loc, pam_dir, seq = oligo_det

        mh_seq, altered_seq = '', ''
        if itype == 'D' and ('I' not in details or details['I'] == 0):
            if details['C'] > 0:
                left_c_seq = getSequence(oligo_det, details['L'] + 1,
                                         details['L'] + details['C'])
                right_c_seq = getSequence(oligo_det,
                                          details['R'] - details['C'],
                                          details['R'] - 1)
                if left_c_seq == right_c_seq:
                    mh_seq = left_c_seq
            altered_seq = getSequence(oligo_det, details['L'] + 1,
                                      details['R'] -
                                      1)  #Note includes MH seq at both ends

        str_args = (id, mci, details['L'], details['R'], details['C'], itype,
                    isize, mci_reads, total_reads, mh_seq, altered_seq)
        fout.write(u'%s\t%s\t%d\t%d\t%d\t%s\t%d\t%d\t%d\t%s\t%s\n' % str_args)
Exemple #3
0
def computePredictedProfile(data, theta, feature_columns):
    data['expThetaX'] = np.exp(
        data.apply(calcThetaX, axis=1, args=(theta, feature_columns)))
    sum_exp = data['expThetaX'].sum()
    profile = {
        x: expthetax * 100 / sum_exp
        for (x, expthetax) in zip(data['Indel'], data['expThetaX'])
    }
    counts = getProfileCounts(profile)
    return profile, counts
Exemple #4
0
def writePredictedProfileToSummary(p1, fout):
    counts = getProfileCounts(p1)
    for cnt, indel, _, _ in counts:
        if cnt < 0.5: break
        fout.write(u'%s\t-\t%d\n' % (indel, np.round(cnt)))
Exemple #5
0
def plotProfiles(profiles,
                 rep_reads,
                 pam_idxs,
                 reverses,
                 labels,
                 title='',
                 max_lines=10):
    if len(profiles) == 0: raise Exception('Empty list of profiles')

    colors = [
        FORECAST_GREEN, 'C0', 'C2', 'C2', 'C1', 'C1', 'C3', 'C3', 'C4', 'C4',
        'C5', 'C5', 'C6'
    ]

    PL.rcParams['svg.fonttype'] = 'none'
    ocounts = [getProfileCounts(p1) for p1 in profiles]
    counts = [{
        indel: (cnt, indel, perc1a, perc1b)
        for (cnt, indel, perc1a, perc1b) in x
    } for x in ocounts]

    #Count total non-null reads for each sample (to report in labels)
    nonnull_reads = [
        sum([x[indel][0] for indel in x if indel != '-']) for x in counts
    ]
    labels = [
        '%s(%d Reads)' % (tit, nn) for (tit, nn) in zip(labels, nonnull_reads)
    ]

    #Fetch the indels to display as union of top N indels across profiles
    num_top = 20
    top_indels = [[y[1] for y in x[:num_top]] for x in ocounts]
    union_top_indels = set()
    for x in top_indels:
        union_top_indels = union_top_indels.union(set(x))

    for indel in union_top_indels:
        for count in counts:
            if indel not in count:
                count[indel] = (0, indel, 0.0, 0.0)
    union_top_indels = [x for x in union_top_indels]
    indel_toks = [tokFullIndel(indel) for indel in union_top_indels]
    max_insert = max([0] + [toks[1] for toks in indel_toks if toks[0] == 'I'])

    #Order indels by decreasing average percentage across profiles
    top_av_percs = [(np.mean([x[indel][-1] for x in counts]), indel)
                    for indel in union_top_indels]
    top_av_percs.sort(reverse=True)
    max_indels = max_lines / len(profiles)

    #Figure out Trims
    null_reads = [
        x['-'] if '-' in x else [x[y[1]] for y in ocnt if y[1] in x][0]
        for x, ocnt in zip(rep_reads, ocounts)
    ]
    null_reads = [
        Bio.Seq.reverse_complement(x) if rev else x
        for x, rev in zip(null_reads, reverses)
    ]
    pam_idxs = [
        len(x) - pam if rev else pam
        for x, pam, rev in zip(null_reads, pam_idxs, reverses)
    ]
    min_null, pam_idx = min([(len(null), pidx)
                             for (null, pidx) in zip(null_reads, pam_idxs)])
    Ls = [x - pam_idx for x in pam_idxs]
    Rs = [L + min_null - len(null) for (L, null) in zip(Ls, null_reads)]

    #Plot
    scale_factor = 10.0 / max([x[1][3] for x in ocounts])
    fig = PL.figure(figsize=(9, 5 * len(labels)))
    fig.patch.set_visible(False)
    ax = PL.gca()
    ax.axis('off')
    N = min(len(union_top_indels), max_indels)
    line_height = 0.8
    min_xloc, max_xloc = MIN_X, MAX_X
    PL.ylim((0, (N + 1.0) * line_height))
    bar_ypos, bar_len = [[] for x in profiles], [[] for x in profiles]
    for i, (av_perc, indel) in enumerate(top_av_percs):
        if i > max_indels: break
        for repr, cnts, rev, L1, R1, j in zip(rep_reads, counts, reverses, Ls,
                                              Rs, range(len(Rs))):
            (cnt1, indel1, perc1a, perc1b) = cnts[indel]
            if indel in repr:
                if R1 == 0: R1 = len(repr[indel])
                seq = Bio.Seq.reverse_complement(
                    repr[indel])[L1:R1] if rev else repr[indel][L1:R1]
                padded_seq, red_idxs, green_idxs = padReadForIndel(
                    seq, indel, pam_idx)
                min_xloc, max_xloc = plotSeqLetterwise(
                    padded_seq,
                    (N - i + (j + 0.3) * 1.0 / len(profiles)) * line_height,
                    pam_idx,
                    red_idxs=red_idxs,
                    green_idxs=green_idxs)
            if indel != '-':
                bar_ypos[j].append(
                    (N - i + (j + 0.4) * 1.0 / len(profiles)) * line_height)
                bar_len[j].append(perc1b * scale_factor)
    hist_loc = max_xloc + 10
    for bar1_ypos, bar1_len, label1, clr in zip(bar_ypos, bar_len, labels,
                                                colors):
        PL.barh(bar1_ypos,
                bar1_len,
                height=0.8 * line_height / len(profiles),
                left=hist_loc,
                label=label1,
                color=clr)
        for (ypos, blen) in zip(bar1_ypos, bar1_len):
            PL.text(hist_loc + blen + 1,
                    ypos - 0.5 / len(profiles) * line_height,
                    '%.1f%%' % (blen / scale_factor))
    xlims = (min_xloc - 10, MAX_X + 20 + (min_xloc - MIN_X))
    PL.xlim(xlims)
    for i, (av_perc, indel) in enumerate(top_av_percs):
        if i > max_indels: break
        if indel == '-':
            PL.text(xlims[0], (N - i + 0.4) * line_height,
                    'Target:',
                    fontweight='bold')
        else:
            PL.text(xlims[0], (N - i + 0.4) * line_height,
                    indel.split('_')[0],
                    fontweight='bold')
        PL.plot([min_xloc - 10, max_xloc + 10],
                [(N - i) * line_height, (N - i) * line_height], 'lightgrey')
    PL.plot([0, 0], [0, (N + 1) * line_height], 'k--')
    PL.plot([min_xloc - 10, hist_loc], [N * line_height, N * line_height], 'k')
    PL.plot([hist_loc, hist_loc], [0, N * line_height], 'k')
    PL.xticks([])
    PL.yticks([])
    if len(labels) > 1: PL.legend(loc='upper right')
    PL.text(hist_loc, (N + 0.5) * line_height, title, fontweight='bold')
    PL.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)
    PL.show(block=False)
    PL.axis('off')
    saveFig('%s_%d' % (title.replace(' ', '_'), len(labels)), bbox=False)
    return fig