def getFastaCount( fasta, intervals ): f = open( fasta ) counts = [] for i in range( len(intervals) -1 ): counts.append(0) for r in f: if r.strip()[0] != '>': continue v = int(r.split('=')[-1]) idx = getIntervalIdx( intervals, v) if idx != None: counts[ idx ] += 1 return counts
def main( args ): records, motifs, details_poses = getRecords( args.input ) args.intervals = list(set(args.intervals)) args.intervals.sort() if len( args.intervals ) < 2: print "Intervals not defined." return result = [] distBtMotifs = [] leftPoses = [] rightPoses = [] singlePoses = [] for i in range( len(motifs) ): temp = [] distTemp = [] for j in range( len(args.intervals) - 1 ): temp.append({}) distTemp.append([]) result.append(temp) distBtMotifs.append( distTemp ) leftPoses.append([]) rightPoses.append([]) singlePoses.append([]) if len(records) < 1 : sys.exit() motifIndex = 0 currMotif = motifs[motifIndex] currSeqName = records[0][0] currSeqMotifCount = 0 intervalIndex = getIntervalIdx( args.intervals, records[0][2]) print args.intervals for r in records: detail_r = details_poses[r[-1]][r[0]] if len(detail_r) > 1: if intervalIndex != None: tempDist = detail_r[-1][0] - detail_r[0][0] distBtMotifs[ motifIndex ] [intervalIndex].append( tempDist ) leftPoses[ motifIndex ].append( getMotifCenterPos( detail_r[0][0], detail_r[0][1] ) ) rightPoses[ motifIndex ].append( getMotifCenterPos( detail_r[-1][0], detail_r[-1][1] ) ) else: singlePoses[ motifIndex ].append( getMotifCenterPos( detail_r[0][0], detail_r[0][1] ) ) if r[-1] != currMotif: motifIndex += 1 assert r[-1] == motifs[motifIndex] currMotif = motifs[motifIndex] else: currSeqMotifCount = len( details_poses[r[-1]][r[0]] ) #print r[2],' ', intervalIndex if intervalIndex != None and currSeqMotifCount in result[motifIndex][intervalIndex]: result[motifIndex][intervalIndex][ currSeqMotifCount ] += 1 elif intervalIndex != None: result[motifIndex][intervalIndex][ currSeqMotifCount ] = 1 currSeqMotifCount = 1 intervalIndex = getIntervalIdx( args.intervals, r[2] ) currSeqName = r[0] fastaCount = getFastaCount( args.fasta, args.intervals ) translate = loadTranslate( args.translate ) counts = printResult( result, motifs, args.intervals, args.outPrefix, fastaCount, translate) printDistBox( distBtMotifs, args.intervals, motifs, translate, args.plotsdir) printLocHist( leftPoses, rightPoses,singlePoses, motifs, translate, args.plotsdir ) generateHeatMatrix( motifs, details_poses, records, 201, args.intervals, args.plotsdir) plotPieChart(counts, fastaCount, args.plotsdir)
def generateHeatMatrix( motifs, details, records, seq_length, intervals , savefig=None): import numpy as np import pylab as pl from plotDistCat import saveOrPrint, getIntervalIdx data = {} motif2value = {} for i,m in enumerate(motifs): print m motif2value[ m ] = i print motifs for r in records: if r[-1] not in motif2value: continue if r[0] not in data: data[r[0]] = [r[2],] for i in range(len(motifs)): data[r[0]].append([]) for p in details[r[-1]][r[0]]: data[r[0]][motif2value[r[-1]]+1].append(( p[0],p[1])) matrix_all = [] matrix_exist = [] ticks = [0, seq_length/2, seq_length] labels = [-(seq_length/2),0,seq_length/2] count_diff = 0 count_all = 0 count_all_by_dist = [] count_exist_by_dist = [] #the following are used to store the indexes of the rows in # the complete set matrix_by_dist = [] matrix_exist_by_dist = [] for i in range(len(intervals) - 1): matrix_by_dist.append([]) matrix_exist_by_dist.append([]) count_all_by_dist.append(0) count_exist_by_dist.append(0) NUMPLOTS = len(intervals) for seq in data: exist = True temp = np.zeros(seq_length) - 1 for i,p in enumerate(data[seq][1:]): for t in p: temp[(t[0]+t[1])/2] = i if len(p) == 0: exist = False temp = list(temp) if sum(temp) == -1*seq_length: continue interIdx = getIntervalIdx(intervals, data[seq][0]) matrix_all.append(temp) if interIdx != None: matrix_by_dist[interIdx].append(len(matrix_all)-1) count_all_by_dist[interIdx] += 1 count_all += 1 if exist: matrix_exist.append(temp) if interIdx != None: matrix_exist_by_dist[interIdx].append(len(matrix_exist)-1) count_exist_by_dist[interIdx] += 1 else: count_diff += 1 print count_diff, ' ', count_all, ' ', len(matrix_all),' ', len(matrix_exist) print count_all_by_dist print count_exist_by_dist pl.close("all") fig_all = pl.figure(1) fig_exist = pl.figure(2) sub_count = 1 matrix_all_idx = range(len(matrix_all)) matrix_exist_idx = range(len(matrix_exist)) matrix_all_idx.sort(key=lambda k:(np.average(matrix_all[k]),)) matrix_exist_idx.sort(key=lambda k:(np.average(matrix_exist[k]),)) for i in range(len(intervals) - 1): matrix_by_dist[i].sort(key=lambda k:(np.average(matrix_all[k]),)) matrix_exist_by_dist[i].sort(key=lambda k:(np.average(matrix_exist[k]),)) subTitles = [] for i in range(len(intervals) - 1): t = '[%d, %d'%(intervals[i], intervals[i+1]) if i != len(intervals) - 2: t += ')' else: t += ']' subTitles.append(t) #matrix_all.sort(key=lambda k:(np.average(k),)) # np.average(k[0:60]), np.average(k[60:140]),np.average(k[140:]))) #matrix_exist.sort(key=lambda k:(np.average(k),)) # np.average(k[0:60]), np.average(k[60:140]),np.average(k[140:]))) matrix_all = np.array(matrix_all) matrix_exist = np.array(matrix_exist) #matrix_all[matrix_all<0] = None #matrix_exist[matrix_exist<0] = None matrix_all += 1 matrix_exist += 1 ncolumns = 2 def setTickAndLabel(ax, ticks_a, labels_a): ax.set_xticks(ticks_a) ax.set_xticklabels(labels_a) if len(intervals) > 2: for i in range(len(intervals) - 1): fas = fig_all.add_subplot((NUMPLOTS+1)/2, 2, sub_count) fes = fig_exist.add_subplot((NUMPLOTS+1)/2, 2, sub_count) #fas.imshow(matrix_all[matrix_by_dist[i]], aspect='auto') fas.plot(matrix_all[matrix_by_dist[i]].sum(axis=0)/matrix_all[matrix_by_dist[i]].shape[0]) fas.set_title(subTitles[ sub_count - 1] + ' %d'%len(matrix_by_dist[i])) setTickAndLabel(fas, ticks, labels) #fes.imshow(matrix_exist[matrix_exist_by_dist[i]], aspect='auto') fes.plot(matrix_exist[matrix_exist_by_dist[i]].sum(axis=0)/matrix_exist[matrix_exist_by_dist[i]].shape[0]) fes.set_title(subTitles[ sub_count - 1] + ' %d'%len(matrix_exist_by_dist[i])) setTickAndLabel(fes, ticks, labels) sub_count += 1 else: ncolumns = 1 NUMPLOTS=1 fas = fig_all.add_subplot((NUMPLOTS+1)/2, ncolumns, sub_count) fes = fig_exist.add_subplot((NUMPLOTS+1)/2, ncolumns, sub_count) matrix_all_sorted = matrix_all[matrix_all_idx] matrix_exist_sorted = matrix_exist[matrix_exist_idx] fas.plot(matrix_all_sorted.sum(axis=0)/matrix_all_sorted.shape[0]) fas.set_title("All dist %d" % len(matrix_all_idx)) setTickAndLabel(fas, ticks, labels) fig_all.suptitle("Heatmap all for %s"%motifs[0]) #pl.colorbar() #fig_all.close() #pl.close() fes.plot(matrix_exist_sorted.sum(axis=0)/matrix_exist_sorted.shape[0]) fig_exist.suptitle("Heatmap exist for %s"%motifs[0]) fes.set_title("All dist %d" % len(matrix_exist_idx)) setTickAndLabel(fes, ticks, labels) fig_all.subplots_adjust(right=0.8) fig_exist.subplots_adjust(right=0.8) #cbar_ax = fig_all.add_axes([0.85,0.15,0.05,0.7]) #cbar_ax_e = fig_exist.add_axes([0.85,0.15,0.05,0.7]) #fig_all.colorbar(ima, cax=cbar_ax) #fig_all.show() #pl.show() fig_all.tight_layout() fig_exist.tight_layout() saveOrPrint(fig_all, "Heatmap_all", '', savefig) #fig_exist.colorbar(ime, cax=cbar_ax_e) #fig_exist.show() saveOrPrint(fig_exist, "Heatmap_exist", '', savefig)