def printLocHist(leftPoses, rightPoses,singlePoses, motifs, translate, savefig = None): ''' Generate a plot of the distribution of the left and right motifs within each sequence. ''' import pylab as pl import numpy as np from plotDistCat import getHistBinCenters, saveOrPrint for i,m in enumerate( motifs ): temp = [ (a,b) for a,b in zip(leftPoses[i], rightPoses[i])] temp.sort(key=lambda k:(k[0],k[1])) fig = pl.figure() subp = fig.add_subplot(211) if translate != None and m in translate: m = translate[m] subp.set_title(m) subp.plot([k[0] for k in temp], [k[1] for k in temp],'.') ly, binCenters = getHistBinCenters( leftPoses[i], 40, normed=1) lr, binCenters = getHistBinCenters( rightPoses[i], 40, normed=1) ls, binCenters = getHistBinCenters( singlePoses[i], 40, normed=1) subp.set_xlabel("Left position") subp.set_ylabel("Right position") subp2 = fig.add_subplot(212) subp2.plot( binCenters, ly, '-', label="Left (%d)"%len(leftPoses[i])) subp2.plot( binCenters, lr, '-', label="Right (%d)"%len(rightPoses[i])) subp2.plot( binCenters, ls, '-', label="Single (%d)"%len(singlePoses[i])) subp2.legend() subp2.set_xlabel("Position") subp2.set_ylabel("Frequency") saveOrPrint(fig, "LocHist_"+ m, '', savefig)
def plotPieChart(counts, fastaCount, savefig): import numpy as np import pylab as pl from plotDistCat import saveOrPrint, getIntervalIdx from collections import defaultdict inter_names = counts[0] NUMPLOTS = len(inter_names) + 1 colors = ['b','g','r','c','m','y','k','w'] for c in counts[1:]: print c totals = defaultdict(int) motif = c[0] nrows = (NUMPLOTS+1)/2 pl.close("all") fig = pl.figure(figsize=(10,14)) for i in range(len(inter_names)): ax = fig.add_subplot(nrows, 2, i+1) curr = np.array(c[i+1]) for k in c[i+1]: totals[k[0]] += k[1] fracs = list(curr[:,1]) labels = list(curr[:,0]) if fastaCount is not None: fracs.insert(0,fastaCount[i] - curr[:,1].sum()) labels.insert(0,0) print fracs , ' ', labels ax.pie(fracs, autopct='%1.1f%%',colors=colors,labels=labels) ax.set_title('%d,%s'%(sum(fracs),inter_names[i])) labels = totals.keys() labels.sort() fracs = [] for f in labels: fracs.append(totals[f]) if fastaCount is not None: fracs.insert(0,sum(fastaCount) - sum(fracs)) labels.insert(0,0) ax = fig.add_subplot(nrows, 2, i+2) ax.pie(fracs, labels=labels, autopct='%1.1f%%' ,colors=colors) ax.set_title('%d, All'%sum(fracs)) fig.tight_layout() saveOrPrint(fig, motif, "pie", savefig)
def generateHeatMatrix( motifs, details, records, seq_length, intervals , savefig=None): import numpy as np import pylab as pl from plotDistCat import saveOrPrint, getIntervalIdx data = {} motif2value = {} for i,m in enumerate(motifs): print m motif2value[ m ] = i print motifs for r in records: if r[-1] not in motif2value: continue if r[0] not in data: data[r[0]] = [r[2],] for i in range(len(motifs)): data[r[0]].append([]) for p in details[r[-1]][r[0]]: data[r[0]][motif2value[r[-1]]+1].append(( p[0],p[1])) matrix_all = [] matrix_exist = [] ticks = [0, seq_length/2, seq_length] labels = [-(seq_length/2),0,seq_length/2] count_diff = 0 count_all = 0 count_all_by_dist = [] count_exist_by_dist = [] #the following are used to store the indexes of the rows in # the complete set matrix_by_dist = [] matrix_exist_by_dist = [] for i in range(len(intervals) - 1): matrix_by_dist.append([]) matrix_exist_by_dist.append([]) count_all_by_dist.append(0) count_exist_by_dist.append(0) NUMPLOTS = len(intervals) for seq in data: exist = True temp = np.zeros(seq_length) - 1 for i,p in enumerate(data[seq][1:]): for t in p: temp[(t[0]+t[1])/2] = i if len(p) == 0: exist = False temp = list(temp) if sum(temp) == -1*seq_length: continue interIdx = getIntervalIdx(intervals, data[seq][0]) matrix_all.append(temp) if interIdx != None: matrix_by_dist[interIdx].append(len(matrix_all)-1) count_all_by_dist[interIdx] += 1 count_all += 1 if exist: matrix_exist.append(temp) if interIdx != None: matrix_exist_by_dist[interIdx].append(len(matrix_exist)-1) count_exist_by_dist[interIdx] += 1 else: count_diff += 1 print count_diff, ' ', count_all, ' ', len(matrix_all),' ', len(matrix_exist) print count_all_by_dist print count_exist_by_dist pl.close("all") fig_all = pl.figure(1) fig_exist = pl.figure(2) sub_count = 1 matrix_all_idx = range(len(matrix_all)) matrix_exist_idx = range(len(matrix_exist)) matrix_all_idx.sort(key=lambda k:(np.average(matrix_all[k]),)) matrix_exist_idx.sort(key=lambda k:(np.average(matrix_exist[k]),)) for i in range(len(intervals) - 1): matrix_by_dist[i].sort(key=lambda k:(np.average(matrix_all[k]),)) matrix_exist_by_dist[i].sort(key=lambda k:(np.average(matrix_exist[k]),)) subTitles = [] for i in range(len(intervals) - 1): t = '[%d, %d'%(intervals[i], intervals[i+1]) if i != len(intervals) - 2: t += ')' else: t += ']' subTitles.append(t) #matrix_all.sort(key=lambda k:(np.average(k),)) # np.average(k[0:60]), np.average(k[60:140]),np.average(k[140:]))) #matrix_exist.sort(key=lambda k:(np.average(k),)) # np.average(k[0:60]), np.average(k[60:140]),np.average(k[140:]))) matrix_all = np.array(matrix_all) matrix_exist = np.array(matrix_exist) #matrix_all[matrix_all<0] = None #matrix_exist[matrix_exist<0] = None matrix_all += 1 matrix_exist += 1 ncolumns = 2 def setTickAndLabel(ax, ticks_a, labels_a): ax.set_xticks(ticks_a) ax.set_xticklabels(labels_a) if len(intervals) > 2: for i in range(len(intervals) - 1): fas = fig_all.add_subplot((NUMPLOTS+1)/2, 2, sub_count) fes = fig_exist.add_subplot((NUMPLOTS+1)/2, 2, sub_count) #fas.imshow(matrix_all[matrix_by_dist[i]], aspect='auto') fas.plot(matrix_all[matrix_by_dist[i]].sum(axis=0)/matrix_all[matrix_by_dist[i]].shape[0]) fas.set_title(subTitles[ sub_count - 1] + ' %d'%len(matrix_by_dist[i])) setTickAndLabel(fas, ticks, labels) #fes.imshow(matrix_exist[matrix_exist_by_dist[i]], aspect='auto') fes.plot(matrix_exist[matrix_exist_by_dist[i]].sum(axis=0)/matrix_exist[matrix_exist_by_dist[i]].shape[0]) fes.set_title(subTitles[ sub_count - 1] + ' %d'%len(matrix_exist_by_dist[i])) setTickAndLabel(fes, ticks, labels) sub_count += 1 else: ncolumns = 1 NUMPLOTS=1 fas = fig_all.add_subplot((NUMPLOTS+1)/2, ncolumns, sub_count) fes = fig_exist.add_subplot((NUMPLOTS+1)/2, ncolumns, sub_count) matrix_all_sorted = matrix_all[matrix_all_idx] matrix_exist_sorted = matrix_exist[matrix_exist_idx] fas.plot(matrix_all_sorted.sum(axis=0)/matrix_all_sorted.shape[0]) fas.set_title("All dist %d" % len(matrix_all_idx)) setTickAndLabel(fas, ticks, labels) fig_all.suptitle("Heatmap all for %s"%motifs[0]) #pl.colorbar() #fig_all.close() #pl.close() fes.plot(matrix_exist_sorted.sum(axis=0)/matrix_exist_sorted.shape[0]) fig_exist.suptitle("Heatmap exist for %s"%motifs[0]) fes.set_title("All dist %d" % len(matrix_exist_idx)) setTickAndLabel(fes, ticks, labels) fig_all.subplots_adjust(right=0.8) fig_exist.subplots_adjust(right=0.8) #cbar_ax = fig_all.add_axes([0.85,0.15,0.05,0.7]) #cbar_ax_e = fig_exist.add_axes([0.85,0.15,0.05,0.7]) #fig_all.colorbar(ima, cax=cbar_ax) #fig_all.show() #pl.show() fig_all.tight_layout() fig_exist.tight_layout() saveOrPrint(fig_all, "Heatmap_all", '', savefig) #fig_exist.colorbar(ime, cax=cbar_ax_e) #fig_exist.show() saveOrPrint(fig_exist, "Heatmap_exist", '', savefig)