Example #1
0
def getFastaCount( fasta, intervals ):
    f = open( fasta )
    counts = []
    for i in range( len(intervals) -1 ):
        counts.append(0)

    for r in f:
        if r.strip()[0] != '>':
            continue
        v = int(r.split('=')[-1])
        idx = getIntervalIdx( intervals, v)
        if idx != None:
            counts[ idx ] += 1
    return counts
Example #2
0
def main( args ):
    records, motifs, details_poses = getRecords( args.input )
    args.intervals = list(set(args.intervals))
    args.intervals.sort()
    if len( args.intervals ) < 2:
        print "Intervals not defined."
        return
    result = []
    distBtMotifs = []
    leftPoses = []
    rightPoses = []
    singlePoses = []
    for i in range( len(motifs) ):
        temp = []
        distTemp = []
        for j in range( len(args.intervals) - 1 ):
            temp.append({})
            distTemp.append([])
        result.append(temp)
        distBtMotifs.append( distTemp )
        leftPoses.append([])
        rightPoses.append([])
        singlePoses.append([])

    if len(records) < 1 : sys.exit()
    motifIndex = 0
    currMotif = motifs[motifIndex]
    currSeqName = records[0][0]
    currSeqMotifCount = 0
    intervalIndex = getIntervalIdx( args.intervals, records[0][2])
    print args.intervals
    for r in records:
        detail_r = details_poses[r[-1]][r[0]]
        if len(detail_r) > 1:
            if intervalIndex != None:
                tempDist = detail_r[-1][0] - detail_r[0][0]
                distBtMotifs[ motifIndex ] [intervalIndex].append( tempDist )
            leftPoses[ motifIndex ].append( getMotifCenterPos( detail_r[0][0], detail_r[0][1] ) )
            rightPoses[ motifIndex ].append( getMotifCenterPos( detail_r[-1][0], detail_r[-1][1] ) )
        else:
            singlePoses[ motifIndex ].append( getMotifCenterPos( detail_r[0][0], detail_r[0][1] ) )
        if r[-1] != currMotif:
            motifIndex += 1
            assert r[-1] == motifs[motifIndex]
            currMotif = motifs[motifIndex]
        else:
            currSeqMotifCount = len( details_poses[r[-1]][r[0]] )
            #print r[2],' ', intervalIndex
            if intervalIndex != None  and currSeqMotifCount in result[motifIndex][intervalIndex]:
                result[motifIndex][intervalIndex][ currSeqMotifCount ] += 1
            elif intervalIndex != None:
                result[motifIndex][intervalIndex][ currSeqMotifCount ] = 1
            currSeqMotifCount = 1
            intervalIndex = getIntervalIdx( args.intervals, r[2] )
            currSeqName = r[0]

    fastaCount = getFastaCount( args.fasta, args.intervals )
    translate = loadTranslate( args.translate )
    counts = printResult( result, motifs, args.intervals, args.outPrefix, fastaCount, translate)


    printDistBox( distBtMotifs, args.intervals, motifs, translate, args.plotsdir)
    printLocHist( leftPoses, rightPoses,singlePoses, motifs, translate, args.plotsdir )

    generateHeatMatrix( motifs, details_poses, records, 201, args.intervals, args.plotsdir)

    plotPieChart(counts, fastaCount, args.plotsdir)
Example #3
0
def generateHeatMatrix( motifs, details, records, seq_length, intervals , savefig=None):
    import numpy as np
    import pylab as pl
    from plotDistCat import saveOrPrint, getIntervalIdx
    data = {}
    motif2value = {}
    for i,m in enumerate(motifs):
        print m
        motif2value[ m ] = i
    print motifs
    for r in records:
        if r[-1] not in motif2value:
            continue
        if r[0] not in data:
            data[r[0]] = [r[2],]
            for i in range(len(motifs)):
                data[r[0]].append([])
        for p in details[r[-1]][r[0]]:
            data[r[0]][motif2value[r[-1]]+1].append(( p[0],p[1]))

    matrix_all = []
    matrix_exist = []
    ticks = [0, seq_length/2, seq_length]
    labels = [-(seq_length/2),0,seq_length/2]

    count_diff = 0
    count_all = 0
    count_all_by_dist = []
    count_exist_by_dist = []
    #the following are used to store the indexes of the rows in
    # the complete set
    matrix_by_dist = []
    matrix_exist_by_dist = []
    for i in range(len(intervals) - 1):
        matrix_by_dist.append([])
        matrix_exist_by_dist.append([])
        count_all_by_dist.append(0)
        count_exist_by_dist.append(0)
    NUMPLOTS = len(intervals)


    for seq in data:
        exist = True
        temp = np.zeros(seq_length) - 1
        for i,p in enumerate(data[seq][1:]):
            for t in p:
                temp[(t[0]+t[1])/2] = i
            if len(p) == 0:
                exist = False
        temp = list(temp)
        if sum(temp) == -1*seq_length:
            continue
        interIdx = getIntervalIdx(intervals, data[seq][0])

        matrix_all.append(temp)
        if interIdx != None:
            matrix_by_dist[interIdx].append(len(matrix_all)-1)
            count_all_by_dist[interIdx] += 1

        count_all += 1
        if exist:
            matrix_exist.append(temp)
            if interIdx != None:
                matrix_exist_by_dist[interIdx].append(len(matrix_exist)-1)
                count_exist_by_dist[interIdx] += 1
        else:
            count_diff += 1


    print count_diff, ' ', count_all, ' ', len(matrix_all),' ', len(matrix_exist)
    print count_all_by_dist
    print count_exist_by_dist
    pl.close("all")
    fig_all = pl.figure(1)
    fig_exist = pl.figure(2)
    sub_count = 1
    matrix_all_idx = range(len(matrix_all))
    matrix_exist_idx = range(len(matrix_exist))
    matrix_all_idx.sort(key=lambda k:(np.average(matrix_all[k]),))
    matrix_exist_idx.sort(key=lambda k:(np.average(matrix_exist[k]),))
    for i in range(len(intervals) - 1):
        matrix_by_dist[i].sort(key=lambda k:(np.average(matrix_all[k]),))
        matrix_exist_by_dist[i].sort(key=lambda k:(np.average(matrix_exist[k]),))


    subTitles = []
    for i in range(len(intervals) - 1):
        t = '[%d, %d'%(intervals[i], intervals[i+1])
        if i != len(intervals) - 2:
            t += ')'
        else:
            t += ']'
        subTitles.append(t)

    #matrix_all.sort(key=lambda k:(np.average(k),)) # np.average(k[0:60]), np.average(k[60:140]),np.average(k[140:])))
    #matrix_exist.sort(key=lambda k:(np.average(k),)) # np.average(k[0:60]), np.average(k[60:140]),np.average(k[140:])))
    matrix_all = np.array(matrix_all)
    matrix_exist = np.array(matrix_exist)
    #matrix_all[matrix_all<0] = None
    #matrix_exist[matrix_exist<0] = None
    matrix_all += 1
    matrix_exist += 1
    ncolumns = 2


    def setTickAndLabel(ax, ticks_a, labels_a):
        ax.set_xticks(ticks_a)
        ax.set_xticklabels(labels_a)

    if len(intervals) > 2:
        for i in range(len(intervals) - 1):
            fas = fig_all.add_subplot((NUMPLOTS+1)/2, 2, sub_count)
            fes = fig_exist.add_subplot((NUMPLOTS+1)/2, 2, sub_count)
            #fas.imshow(matrix_all[matrix_by_dist[i]], aspect='auto')
            fas.plot(matrix_all[matrix_by_dist[i]].sum(axis=0)/matrix_all[matrix_by_dist[i]].shape[0])
            fas.set_title(subTitles[ sub_count - 1] + ' %d'%len(matrix_by_dist[i]))
            setTickAndLabel(fas, ticks, labels)
            #fes.imshow(matrix_exist[matrix_exist_by_dist[i]], aspect='auto')
            fes.plot(matrix_exist[matrix_exist_by_dist[i]].sum(axis=0)/matrix_exist[matrix_exist_by_dist[i]].shape[0])
            fes.set_title(subTitles[ sub_count - 1] + ' %d'%len(matrix_exist_by_dist[i]))
            setTickAndLabel(fes, ticks, labels)
            sub_count += 1

    else:
        ncolumns = 1
        NUMPLOTS=1

    fas = fig_all.add_subplot((NUMPLOTS+1)/2, ncolumns, sub_count)
    fes = fig_exist.add_subplot((NUMPLOTS+1)/2, ncolumns, sub_count)
    matrix_all_sorted = matrix_all[matrix_all_idx]
    matrix_exist_sorted = matrix_exist[matrix_exist_idx]
    fas.plot(matrix_all_sorted.sum(axis=0)/matrix_all_sorted.shape[0])
    fas.set_title("All dist %d" % len(matrix_all_idx))
    setTickAndLabel(fas, ticks, labels)
    fig_all.suptitle("Heatmap all for %s"%motifs[0])
    #pl.colorbar()
    #fig_all.close()
    #pl.close()

    fes.plot(matrix_exist_sorted.sum(axis=0)/matrix_exist_sorted.shape[0])
    fig_exist.suptitle("Heatmap exist for %s"%motifs[0])
    fes.set_title("All dist %d" % len(matrix_exist_idx))
    setTickAndLabel(fes, ticks, labels)

    fig_all.subplots_adjust(right=0.8)
    fig_exist.subplots_adjust(right=0.8)

    #cbar_ax = fig_all.add_axes([0.85,0.15,0.05,0.7])
    #cbar_ax_e = fig_exist.add_axes([0.85,0.15,0.05,0.7])

    #fig_all.colorbar(ima, cax=cbar_ax)
    #fig_all.show()
    #pl.show()
    fig_all.tight_layout()
    fig_exist.tight_layout()
    saveOrPrint(fig_all, "Heatmap_all", '', savefig)
    #fig_exist.colorbar(ime, cax=cbar_ax_e)
    #fig_exist.show()
    saveOrPrint(fig_exist, "Heatmap_exist", '', savefig)