def tab(f, sample2row): #Print total: for sample in sorted(sample2row.keys()): f.write("%s" % iseqlib.properName(sample)) row = sample2row[sample] for i, cell in enumerate(row): if cell in ['+', '-']: cell = "$%s$" %cell f.write( " & %s " %(cell) ) f.write("\\\\\n") f.write("\\hline\n")
def tab0(f, clones, clone2hits, group2keywords, options): for clone in clones: vs, js, ds, sample2size = getCloneInfo(clone) if clone in clone2hits: hits = clone2hits[clone] hitsWithKeyword = [] # list of hits that have at least 1 keyword for hit in hits: for g, kw in group2keywords.iteritems(): if g == "b27" or g == "pathogen": continue if checkKeywords(hit[0], kw): hitsWithKeyword.append(hit) break if len(hitsWithKeyword) == 0: # no hit with keyword continue seq = hits[0][2] samples = sorted([iseqlib.properName(s) for s in sample2size.keys()]) numrow = max([len(samples), len(hitsWithKeyword)]) f.write( "\\multirow{%d}{*}{%s} & \\multirow{%d}{*}{%s} & \\multirow{%d}{*}{%s} & " % (numrow, vs, numrow, seq, numrow, js) ) # Write V, CDR3, J # f.write("%s & %d & %s & %s & %s \\\\\n " %( samples[0], sample2size[iseqlib.properName2name(samples[0])], hitsWithKeyword[0][4], hitsWithKeyword[0][3], hitsWithKeyword[0][0] )) #first row f.write( "%s & %d & %s & %s & %s \\\\\n " % ( samples[0], sample2size[iseqlib.properName2name(samples[0])], hitsWithKeyword[0][4], hitsWithKeyword[0][3], parsePaperInfo(hitsWithKeyword[0][0]), ) ) for i in xrange(1, numrow): f.write("\\cline{4-8}\n") f.write(" & & & ") if i < len(samples): s = samples[i] f.write(" %s & %d &" % (s, sample2size[iseqlib.properName2name(s)])) else: f.write(" & & ") if i < len(hitsWithKeyword): h = hitsWithKeyword[i] # f.write( "%s & %s & %s \\\\\n" %(h[4], h[3], h[0]) ) f.write("%s & %s & %s \\\\\n" % (h[4], h[3], parsePaperInfo(h[0]))) else: f.write(" & & \\\\\n") f.write("\\hline\n")
def tab(f, colnames, sample2row): #Print total: totalRow = sample2row['Total'] f.write("%s & %s \\\\\n" % ("Total", " & ".join( ["%s & 100.00" %cell for cell in totalRow] )) ) f.write("\\hline\n") for sample in sorted(sample2row.keys()): if sample == 'Total' or sample == 'controls' or sample == 'patients' : continue f.write("%s" % iseqlib.properName(sample)) row = sample2row[sample] for i, cell in enumerate(row): total = totalRow[i] pc = iseqlib.getPc( int(cell), int(total) ) f.write( " & %s & %.2f " %(cell, pc) ) f.write("\\\\\n") f.write("\\hline\n")
def drawVJdata( fig, axes, sample, vgenes, jgenes, options, minvj, maxvj ): #Draw heatmap: #data = sample.normalizeVJusage() if options.abs: data = sample.intersectVJusage else: data = sample.normIntersectVJusage #Normalize data to the range minvj-maxvj: if options.heatmapNoScale: hmaxes = axes.imshow( data, interpolation='nearest' ) else: norm = mpl.colors.Normalize(vmin=minvj, vmax=maxvj) cmap = mpl.cm.get_cmap('rainbow', 20) hmaxes = axes.imshow( data, interpolation='nearest', cmap=cmap, norm=norm ) #Colorbar: #from mpl_toolkits.axes_grid1.inset_locator import inset_axes #axins = inset_axes(axes, width="5%", height="10%", loc=3, bbox_to_anchor=(1.05, 0, 1, 1), bbox_transform=axes.transAxes, borderpad=0) #matplotlib.pyplot.colorbar(hmaxes, cax=axins, ticks=[0, 0.5, 1]) cbar = fig.colorbar(hmaxes, shrink=0.3) #cbar.ax.set_yticklabels(['0', '0.5', '1']) immunoseqLib.editSpine( axes ) axes.set_title( "VJ usage of sample %s" % immunoseqLib.properName(sample.name) ) xticks = [ x for x in range( len(jgenes) ) ] axes.xaxis.set_ticks( xticks ) for i, xlabel in enumerate(jgenes): items = xlabel.split('|') items = [item.lstrip('TRB') for item in items] jgenes[i] = '|'.join(items) axes.xaxis.set_ticklabels( jgenes ) yticks = [ y for y in range( len(vgenes) ) ] axes.yaxis.set_ticks( yticks ) for i, ylabel in enumerate(vgenes): items = ylabel.split('|') items = [item.lstrip('TRB') for item in items] vgenes[i] = '|'.join(items) axes.yaxis.set_ticklabels( vgenes ) textsize = 'x-small' for label in axes.get_xticklabels(): label.set_fontsize( textsize ) label.set_rotation( 80 ) for label in axes.get_yticklabels(): label.set_fontsize( textsize )
def drawAll(options, outdir, rowname2cells, index2colname): if options.infile == '-': outname = 'all' else: outname = os.path.basename(options.infile).split('.')[0] options.out = os.path.join(outdir, outname) fig, pdf = iseqlib.initImage(10.0, 8.0, options) axes = fig.add_axes( [0.12, 0.15, 0.85, 0.75] ) lines = [] #rownames = sorted( rowname2cells.keys() ) rownames = ['as11D', 'as16D', 'as1D', 'asBD', 'as20D', 'as15D', 'as8D'] name2color = sample2color(rownames) #xdata = sorted( [ int(x) for x in colname2index.keys() ] ) xmax = 0.0 xmin = float('inf') #ymax = 0 markersize = 12.0 xindices = [] #for rowname in rownames: r = 0 while r < len(rownames): rowname = rownames[r] row = rowname2cells[rowname] means = [] stds = [] xdata = [] for i, m in enumerate( row ): if i % 2 == 0 and m != 'NA' and m != '' and m != '-': colname = index2colname[i] try: colname = int(colname) xdata.append( colname ) except: xdata.append(i/2) if i not in xindices: xindices.append(i) means.append( float(m) ) elif i%2 == 1 and m != 'NA' and m != '' and m != '-': stds.append( float(m) ) xmax = max([xmax, max(xdata)]) xmin = min([xmin, min(xdata)]) #HACK #if rowname == 'uniqClones': #if rowname == 'mountford' or rowname == 'horn': #if rowname != 'horn': #if rowname == 'manhattan' or rowname == 'euclidean' or rowname == 'binomial': exceptions = ['manhattan', 'euclidean', 'binomial', 'kulczynski', 'canberra', 'jaccard'] if rowname in exceptions: rownames.remove(rowname) continue #means = [m/min(means) for m in means] #stds = [0.0 for s in stds] #END HACK color = name2color[rowname] axes.errorbar(xdata, means, yerr=stds, color=color, markeredgecolor=color, markersize=markersize, fmt='.') line = axes.plot(xdata, means, color=color, linestyle='-', linewidth=4.0) lines.append(line) r += 1 #axes.set_xscale('log') #axes.set_yscale('log') iseqlib.editSpine(axes) #axes.set_title("%s index across different sampling sizes" %outname, size='xx-large') #axes.set_xlabel("Sampling size (number of reads)", size='large' ) #axes.set_ylabel("%s index" %outname, size='large') #HACK axes.set_title("Sequencing Saturation", size='xx-large', weight='bold') axes.set_xlabel("Sampling size (number of sequences, in millions) ", size='x-large', weight='bold' ) #axes.set_xlabel("Sampling size (number of sequences, in thousands) ", size='x-large', weight='bold' ) axes.set_ylabel("Number of clones (in thousands)", size='x-large', weight='bold') fontP = FontProperties() fontP.set_size('medium') #rownames = ["A", "B"] rownames = [iseqlib.properName(n) for n in rownames] legend = axes.legend( lines, rownames, numpoints = 1, loc='best', ncol = 1, prop=fontP) legend._drawFrame = False if len(xindices) > 0: xticklabels = [] for i in sorted(xindices): xticklabels.append( index2colname[i] ) axes.xaxis.set_ticks( xrange(len(xticklabels)) ) axes.xaxis.set_ticklabels( xticklabels ) #HACK: #xticks = [ 10000, 50000, 100000, 200000, 300000, 400000, 500000] #xticklabels = [ str(x/1000) for x in xticks] xticks = [0, 1000000,2000000,3000000,4000000,5000000,6000000,7000000,8000000,9000000] xticklabels = [ str(x) for x in xrange(0, 10) ] axes.xaxis.set_ticks(xticks) axes.xaxis.set_ticklabels( xticklabels ) #yticks = xrange(0, 121000, 20000) #yticklabels = [ str(y) for y in xrange(0, 121, 20) ] yticks = xrange(0, 250000, 50000) yticklabels = [ str(y) for y in xrange(0, 250, 50) ] axes.yaxis.set_ticks(yticks) axes.yaxis.set_ticklabels(yticklabels) for label in axes.get_xticklabels(): label.set_fontsize('large') label.set_fontweight ('bold') #label.set_rotation(45) for label in axes.get_yticklabels(): label.set_fontsize('large') label.set_fontweight ('bold') axes.xaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05) axes.yaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05) #HACK #axes.set_ylim(0.996, 1) xspan = xmax - xmin #axes.set_xlim(xmin - xspan*0.01, xmax + xspan*0.01) #axes.set_ylim(10000, 121000) #axes.set_xlim(-10, 110000) #axes.set_ylim(-10, 60000) axes.set_xlim(-10, 5100000) axes.set_ylim(-10, 205000) iseqlib.writeImage(fig, pdf, options)
def drawCloneSizeData( axesList, samples, samplesPerPlot, options, isAbs, yaxisPcReads, yaxisPcClones, cumulative ): if len( samples ) <= 0: return colors = getColors6() s2c, s2m, s2cLight = sample2color( colors ) #markers = ['o', '^'] #markers=['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', '^', '^'] #c = -1 #textsize = 'x-large' textsize = 'medium' fontP = FontProperties() fontP.set_size(textsize) linesDict = {} labelsDict = {} if isAbs: xtickLabels = samples[0].countArr else: xtickLabels = samples[0].percentArr #get x location xdata = range( 0, len(xtickLabels), 1 ) #offset = 0.01 offset = 0.0 maxy = 0 for i in range( len(axesList) ): lines = [] sampleNames = [] axes = axesList[i] #HACK #if not yaxisPcReads and not yaxisPcClones: if not yaxisPcReads: axes.set_yscale('log') #axes.set_yscale('log') startIndex = i*samplesPerPlot endIndex = min( [startIndex + samplesPerPlot, len(samples)] ) for j in range( startIndex, endIndex ): sample = samples[j] #sampleNames.append( "%s" % (sample.name)) sampleNames.append( "%s" % (iseqlib.properName(sample.name))) ydata = sample.clonesPerCount if not yaxisPcReads and not isAbs: ydata = sample.clonesPerPercent elif yaxisPcReads and isAbs: ydata = sample.readsPerCount elif yaxisPcReads and not isAbs: ydata = sample.readsPerPercent if yaxisPcClones: ydata = [ 100*y/sample.totalClones for y in ydata ] maxy = max( [maxy, max(ydata)] ) #if (i*len(axesList) + j) %2 == 0: # c += 1 #l = axes.plot( xdata, ydata, color=colors[c], marker=markers[(i*len(axesList) + j)%2], markersize=6.0, linestyle='none' ) #axes.plot( xdata, ydata, color=colors[c], linestyle='-', linewidth=0.01 ) currxdata = [x + offset*(j - startIndex) for x in xdata] if isAbs: markersize = 8.0 #markersize = 10.0 else: markersize = 8.0 #markersize = 10.0 if s2m[sample.name] == '*': markersize += 2.0 elif s2m[sample.name] == 's': markersize -= 2.0 print sample.name print ydata #l = axes.plot( xdata, ydata, color=s2c[sample.name], marker=s2m[sample.name], markeredgecolor=s2c[sample.name], markersize=6.0, linestyle='none' ) l = axes.plot( currxdata, ydata, color=s2c[sample.name], marker=s2m[sample.name], markeredgecolor=s2c[sample.name], markersize=markersize, linestyle='none' ) #l = axes.plot( currxdata, ydata, color="#377EB8", marker='o', markeredgecolor=s2c[sample.name], markersize=markersize, linestyle='none' ) #axes.plot( xdata, ydata, color=s2cLight[sample.name], linestyle='-', linewidth=0.3 ) axes.plot( currxdata, ydata, color=s2cLight[sample.name], linestyle='-', linewidth=0.5 ) #axes.plot( currxdata, ydata, color="#377EB8", linestyle='-', linewidth=0.7 ) lines.append( l ) #if yaxisPcReads or yaxisPcClones: # axes.plot( [-0.5, len(xtickLabels)], [90, 90], linestyle='-', linewidth=0.2, color="#CCCCCC" ) linesDict[i] = lines labelsDict[i] = sampleNames if cumulative: #axes.set_title( 'Cumulative Distribution of Clones', size="xx-large", weight='bold' ) #axes.set_title( 'Cumulative Distribution of Clones', size="large", weight='bold' ) axes.set_title( 'A. Cumulative Distribution of Clones', size="large", weight='bold' ) if yaxisPcReads: #axes.set_title( 'Cumulative Distribution of Sequences', size="xx-large", weight='bold') axes.set_title( 'B. Cumulative Distribution of Sequences', size="large", weight='bold') else: axes.set_title( 'Clone Size Distribution', size="xx-large" ) for i in range( len(axesList) ): axes = axesList[ i ] #libplot.editSpine( axes ) iseqlib.editSpine( axes ) axes.set_xlabel('Clone size (number of sequences)', size = textsize) if not isAbs: axes.set_xlabel('Clone size (% of total sequences)', size = textsize, weight='bold') axes.set_ylabel('Frequency (number of clones)', size=textsize) if yaxisPcReads: axes.set_ylabel('Frequency (% of total sequences)', size=textsize, weight='bold') if yaxisPcClones: axes.set_ylabel('Frequency (% of total clones)', size = textsize, weight='bold') #Legend #legend = pyplot.legend( linesDict[i], labelsDict[i], numpoints=1, prop=fontP, loc="best" ) #legend = axes.legend( linesDict[i], labelsDict[i], numpoints = 1, "upper right", ncol=3) if yaxisPcClones and not yaxisPcReads: legend = axes.legend( linesDict[i], labelsDict[i], numpoints = 1, loc="best", ncol=1, prop=fontP) #for t in legend.get_texts(): # t.set_fontsize(textsize) legend._drawFrame = False #HACK #if isAbs: if False: xtickdata = xrange(10, 101, 10) #grid: for x in xtickdata: axes.plot([x,x], [0.0001, maxy], color='#3F3F3F', linestyle='-', linewidth=0.1) axes.xaxis.set_ticks(xtickdata) minx = 0 maxx = 101 axes.set_xlim(minx, maxx) axes.xaxis.set_ticklabels([str(x) for x in xtickdata] , size='medium') else: axes.xaxis.set_ticklabels( xtickLabels, size='medium' ) axes.xaxis.set_ticks([x + 0.5*offset*(samplesPerPlot-1) - 0.5 for x in xdata] ) minx = -0.5 maxx = len(xtickLabels) - 0.5 xticks = [x + 0.5*offset*(samplesPerPlot-1) - 0.5 for x in xdata] for xi, x in enumerate(xticks): if xi == 0: continue axes.plot([x,x], [0.0001, maxy], color='#3F3F3F', linestyle='-', linewidth=0.1) axes.set_xlim(minx, maxx) #END HACK #axes.xaxis.set_ticks( xdata ) #axes.xaxis.set_ticks([x + 0.5*offset*(samplesPerPlot-1) for x in xdata] ) #axes.set_xlim(-0.5, len(xtickLabels)) #numTicks = 20 #yticks = [ float(t)/numTicks for t in range(numTicks +1) ] #ytickLabels = [] #for y in yticks: # ytickLabels.append( "%d" %(y*100) ) if not yaxisPcReads and not yaxisPcClones: #yticks = [100, 1000, 5000, 10000, 15000, 20000] #ytickLabels = ["100", "1k", "5k", "10k", "15k", "20k"] yticks = [1, 10, 100, 1000, 10000, 50000, 100000, 150000] #ytickLabels = ["1", "10", "100", "1k", "10k", "50k", "100k", "150k"] ytickLabels = ["1", "10", "100", "1k", "10k", "50k", "100k", "150k"] axes.yaxis.set_ticklabels( ytickLabels ) axes.yaxis.set_ticks( yticks ) for y in yticks: if y in [1, 10, 100, 1000, 10000, 100000]: #if y in [100, 1000, 5000, 10000, 15000, 20000]: axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1) else: axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-.', linewidth=0.1) axes.set_ylim(0.8, maxy + 15000) elif yaxisPcClones: #elif yaxisPcClones or yaxisPcReads: #yticks = [0.001, 0.01, 0.1, 1, 10, 25, 50, 75, 100] yticks = [0.001, 0.01, 0.1, 1, 10, 100] #yticks = xrange(0, 101, 20) ytickLabels = [str(y) for y in yticks] axes.yaxis.set_ticks(yticks) axes.yaxis.set_ticklabels( ytickLabels ) for y in yticks: if y in [0.001, 0.01, 0.1, 1, 10, 100]: #if y in xrange(0, 101, 20): axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1) else: axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-.', linewidth=0.1) axes.set_ylim(0.0005, maxy + 25) #axes.set_ylim(0.001, maxy + 25) else: yticks = xrange(0, 101, 20) ytickLabels = [str(y) for y in yticks] axes.yaxis.set_ticks(yticks) axes.yaxis.set_ticklabels( ytickLabels ) for y in yticks: axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1) axes.set_ylim(-1, maxy + 1) #axes.set_ylim(-1, maxy + 1) #if isAbs: #HACK # axes.set_ylim(-0.1, 15) #axes.set_ylim(-0.1, 50) #HACK for label in axes.get_xticklabels(): #label.set_fontsize( 'large' ) label.set_fontsize( 'small' ) #label.set_rotation( 45 ) for label in axes.get_yticklabels(): #label.set_fontsize( 'large' ) label.set_fontsize( 'small' ) #axes.yaxis.grid(b=True, color="#BCBCBC", linestyle='-', linewidth=0.005) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) return
def drawPcaData(axes, rownames, transformedM, options): group2xdata = {} group2ydata = {} for i,r in enumerate(transformedM): sample = rownames[i][0] group = rownames[i][1] x = r[0] y = r[1] if group not in group2xdata: group2xdata[group] = [x] group2ydata[group] = [y] else: group2xdata[group].append(x) group2ydata[group].append(y) groups = sorted(group2xdata.keys()) if options.groupDrawInfo: g2c = options.g2c g2m = options.g2m colors = [g2c[g] for g in groups] markers = [g2m[g] for g in groups] else: colors = immunoseqLib.getColors6() markers = ['^', 'o', 'd', 'p', 'v', '*', 's'] lines = [] minx = float('inf') maxx = float('-inf') miny = float('inf') maxy = float('-inf') for i, group in enumerate(groups): if i >= len(colors): raise ValueError("drawPcaData: Need more color!") color = colors[i] marker = markers[i] xdata = group2xdata[group] ydata = group2ydata[group] minx = min(minx, min(xdata)) miny = min(miny, min(ydata)) maxx = max(maxx, max(xdata)) maxy = max(maxy, max(ydata)) l = axes.plot(xdata, ydata, color=color, marker=marker, markersize=15.0, markeredgecolor=color, linestyle='none' ) lines.append(l) for label in axes.get_xticklabels(): label.set_fontsize( 'large' ) label.set_fontweight( 'bold' ) for label in axes.get_yticklabels(): label.set_fontsize( 'large' ) label.set_fontweight( 'bold' ) rangex = maxx - minx axes.set_xlim(minx - rangex*0.1, maxx + rangex*0.1) rangey = maxy - miny axes.set_ylim(miny - rangey*0.1, maxy + rangey*0.1 ) linenames = [] for g in groups: linenames.append( immunoseqLib.properName(g) ) #legend = pyplot.legend( lines, sorted(group2xdata.keys()), numpoints=1, loc="lower left" ) legend = pyplot.legend( lines, linenames, numpoints=1, loc="best" ) #legend._drawFrame = False #axes.set_title( 'PCA', size='xx-large', weight='bold' ) axes.set_xlabel('PC1', size='xx-large', weight='bold') axes.set_ylabel('PC2', size='xx-large', weight='bold') axes.yaxis.grid(b=True, color="#3F3F3F", linestyle='-', linewidth=0.5) axes.xaxis.grid(b=True, color="#3F3F3F", linestyle='-', linewidth=0.5)