def tabHeader(f, ref1, ref2): f.write("\\begin{table}\n") f.write("\\begin{center}\n") f.write("\\scalebox{1}{%\n") f.write("\\begin{tabular}{c|r|r|r|r}\n") #f.write("\\multicolumn{6}{c}{%s} \\\\\n" %title) #f.write("\\hline\n") f.write("\\hline\n") f.write("Sample & Repeat & %s & %s & Total\\\\\n" %(libplot.properName(ref1), libplot.properName(ref2))) #f.write("Sample & Reads & Total Bases & \\%%Repeats & SNP Rate & Overal Snp Rate\\\\\n") f.write("\\hline\n")
def drawPlot(samplesList, sampleNames, options): options.out = os.path.join(options.outdir, "nonLinearBp") fig, pdf = libplot.initImage(12.0, 8.0, options) axes = fig.add_axes([0.09, 0.2, 0.9, 0.6]) list1 = samplesList[0] list2 = samplesList[1] if len(list1) < 1 or len(list2) < 1: return refname1 = list1[0].attrib['referenceName'] refname2 = list2[0].attrib['referenceName'] lines = [] barwidth = 0.3 y1data = [] y2data = [] for sample in sampleNames: for s in list1: if sample == s.attrib['sampleName']: y1data.append( int(s.attrib['totalIntraJoin']) ) for s in list2: if sample == s.attrib['sampleName']: y2data.append( int(s.attrib['totalIntraJoin']) ) x1data = range( len(y1data) ) x2data = [ x+ barwidth for x in x1data] colors =["#1F78B4", "#E31A1C"] l1 = axes.bar( x1data, y1data, barwidth, color = colors[0], ec='w') lines.append(l1[0]) l2 = axes.bar( x2data, y2data, barwidth, color = colors[1], ec='w') lines.append(l2[0]) libplot.editSpine(axes) axes.set_title("Non-linear Breakpoints") #set ticks xlabels = [ libplot.properName(name) for name in sampleNames ] fontP = FontProperties() fontP.set_size('small') pyplot.xticks(x2data, xlabels, rotation=45, fontproperties=fontP) pyplot.yticks( fontproperties = fontP ) pyplot.xlabel("Samples") pyplot.ylabel("Number of breakpoints") axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) legend = axes.legend( lines, [libplot.properName(refname1), libplot.properName(refname2)], prop=fontP, loc="best" ) legend._drawFrame = False libplot.writeImage(fig, pdf, options)
def tab( f, samplesList, sampleNames ): refname1 = samplesList[0][0].attrib['referenceName'] refname2 = samplesList[1][0].attrib['referenceName'] getNonLinearOps(samplesList[0]) getNonLinearOps(samplesList[1]) for s in sampleNames: #altColor = 1 for altColor in [1,0]: #Get #Deletions, #Non-linearOps numDels = -1 numDelsPerAlignedBase = -1 numNonLinearOps = -1 numNonLinearOpsPerAlignedBase = -1 for sample in samplesList[altColor]: if sample.attrib['sampleName'] == s: numDels = int( sample.attrib['totalInsertion'] ) numDelsPerAlignedBase = float( sample.attrib['totalInsertionPerAlignedBase'] ) numDelsPerAlignedBase = prettyFloat(numDelsPerAlignedBase) #numNonLinearOps = int(sample.attrib['totalIntraJoin']) + int(sample.attrib['totalInterJoin']) #numNonLinearOpsPerAlignedBase = float(sample.attrib['totalInterJoinPerAlignedBase']) + float(sample.attrib['totalIntraJoinPerAlignedBase']) numNonLinearOps = int(sample.attrib['totalIntraJoin']) numNonLinearOpsPerAlignedBase = float(sample.attrib['totalIntraJoinPerAlignedBase']) numNonLinearOpsPerAlignedBase = prettyFloat(numNonLinearOpsPerAlignedBase) break #Get the Snps# numSnps = -1 numSnpsPerAlignedBase = -1 for sample in samplesList[altColor + 2]: if sample.attrib['sampleName'] == s: numSnps = int(sample.attrib['totalErrors']) numSnpsPerAlignedBase = '0' if float( sample.attrib['totalCalls'] ) != 0: numSnpsPerAlignedBase = numSnps/float( sample.attrib['totalCalls']) numSnpsPerAlignedBase = prettyFloat( numSnpsPerAlignedBase ) break if altColor == 1: f.write("\\multirow{2}{*}{%s} &\\cellcolor[gray]{0.9} %s & \\cellcolor[gray]{0.9} %d (%s) & \\cellcolor[gray]{0.9} %d & \\cellcolor[gray]{0.9} %d (%s) \\\\\n" % \ ( libplot.properName(s), libplot.properName(refname2), numDels, numDelsPerAlignedBase, numNonLinearOps, numSnps, numSnpsPerAlignedBase)) else: f.write("& %s & %d (%s) & %d & %d (%s) \\\\\n" %\ (libplot.properName(refname1), numDels, numDelsPerAlignedBase, numNonLinearOps, numSnps, numSnpsPerAlignedBase)) f.write("\\hline\n\n")
def drawData( axes, stats, options ): #halfsize = len(stats)/2 + len(stats)%2 #colors = libplot.getColors2( halfsize ) #colors = libplot.getColors2( len(stats) ) #styles = { 0:'-', 1:'--' } colors = libplot.getColors1() if len(stats) < 1: return if stats[0].reference == "reference": colors.pop(0) elif stats[0].reference == 'hg19': colors.pop(1) #=========== #dash = 0 colorindex = -1 lines = [] sampleNames = [] ymin = float('inf') ref = '' for sample in stats: sampleNames.append(sample.name) if ref == '': ref = sample.reference xdata = [] ydata = [] for bucket in sample: xdata.append( bucket.mid ) if options.includeCov: ydata.append( bucket.correctPerSample ) else: ydata.append( bucket.correctPerAligned ) #if not dash: # colorindex += 1 #if colorindex == 1: # colorindex += 1 colorindex +=1 ymin = min([ymin, min(ydata)]) l = axes.plot( xdata, ydata, color=colors[colorindex], linewidth=1 ) #l = axes.plot( xdata, ydata, color=colors[colorindex], linestyle=styles[dash], linewidth=0.5 ) lines.append(l) #dash = not dash libplot.editSpine( axes ) title = options.title if ref != '': title += ', %s' % libplot.properName(ref) axes.set_title(title) pyplot.xlabel("Distance") pyplot.ylabel("Correct proportion") return lines, sampleNames, ymin
def tab( f, exps, rexps, samples ): for sample in samples: expList = copy.copy(exps[sample]) expList.sort() expList.append( rexps[sample] ) #sys.stderr.write('expList for sample %s: %s\n' %(sample, '\t'.join([ '%s%d' %(e.ref, e.weight)for e in expList]))) f.write( "\\multirow{%d}{*}{%s} " %( len(expList), sample ) ) #f.write( "\\multirow{%d}{*}{%s} " %( len(expList) -1, sample ) ) #HACK for e in expList: #if e.ref == 'cactusRef' and e.weight == 1: #HACK # continue ref = libplot.properName(e.ref) if re.search('cactusRef', e.ref): r = e.ref.lstrip('cactusRef') ref = "%s %s" % (libplot.properName('cactusRef'), r) if e.ref != 'cactusRef': f.write("& %s & %s & %s & %s & %s & %s \\\\\n" %(ref, libplot.prettyInt(e.mapped), libplot.prettyInt(e.uniquelyMapped), libplot.prettyInt(e.properlyPaired), libplot.prettyInt(e.uniquelyMappedAndProperlyPaired), libplot.prettyInt(e.snps))) elif e.ref == 'cactusRef' and e.weight == 2: f.write("& \\cellcolor{cyan!30} %s%d & \\cellcolor{cyan!30} %s & \\cellcolor{cyan!30} %s & \\cellcolor{cyan!30} %s & \\cellcolor{cyan!30} %s & \\cellcolor{cyan!30} %s \\\\\n" %(ref, e.weight, libplot.prettyInt(e.mapped), libplot.prettyInt(e.uniquelyMapped), libplot.prettyInt(e.properlyPaired), libplot.prettyInt(e.uniquelyMappedAndProperlyPaired), libplot.prettyInt(e.snps))) else: f.write("& %s%d & %s & %s & %s & %s & %s \\\\\n" %(ref, e.weight, libplot.prettyInt(e.mapped), libplot.prettyInt(e.uniquelyMapped), libplot.prettyInt(e.properlyPaired), libplot.prettyInt(e.uniquelyMappedAndProperlyPaired), libplot.prettyInt(e.snps))) f.write("\\hline\n")
def drawCnvPlot( sample, options ): sampleName = sample.attrib[ 'sampleName' ] #print sampleName options.out = os.path.join( options.outdir, 'cnv_%s' %sampleName ) fig, pdf = libplot.initImage( 11.0, 3.25, options ) title = "Copy Number Variation between %s and %s" % ( libplot.properName(sampleName), libplot.properName(sample.attrib['referenceName']) ) cnvDict, minCn, maxCn = getSampleData( sample ) axDict = setAxes( fig, cnvDict.keys(), options ) for r in axDict: if r != 'bg': drawOneCnvPlot( r, axDict[ r ], cnvDict[ r ], options, minCn, maxCn ) drawAxisLabels( axDict, cnvDict, options, title, maxCn ) setAxisLimits( axDict, minCn, maxCn ) libplot.writeImage( fig, pdf, options )
def drawLegend( axes, lines, sampleNames, options ): fontP = FontProperties() fontP.set_size('small') box= axes.get_position() axes.set_position([box.x0, box.y0, box.width * 0.8, box.height]) #legend = pyplot.legend( lines, sampleNames, numpoints = 1, prop= fontP, loc="best", bbox_to_anchor=(1, 0.5)) if not options.legendElements: legend = pyplot.legend( lines, [ libplot.properName(n) for n in sampleNames ], prop= fontP, loc="best", bbox_to_anchor=(1,0.5)) legend._drawFrame=False elif len(lines) == len(options.legendElements): legend = pyplot.legend( lines, options.legendElements, prop= fontP, loc="best", bbox_to_anchor=(1,0.5) ) legend._drawFrame=False else: sys.stderr.write('Number of items in --legendElements is different ' 'from the number of lines plotted\n' )
def tab(f, stats, sample2repeat): altColor = 1 for sample in stats: repeat = 'NA' repeatPc = '' if sample.name in sample2repeat: repeat = libplot.prettyInt( sample2repeat[sample.name][1] ) repeatPc = "(%.2f \\%%)" %sample2repeat[sample.name][2] otherRef = libplot.prettyInt(sample.otherReferenceBasesMapped) otherRefPc = "%.2f" % (100.0*sample.otherReferenceBasesMapped/sample.totalBases) ref = libplot.prettyInt(sample.referenceBasesMapped) refPc = "%.2f" % (100.0*sample.referenceBasesMapped/sample.totalBases) total = libplot.prettyInt(sample.totalBases) sampleName = libplot.properName(sample.name) if altColor == 1: f.write("%s & %s %s & %s (%s \\%%) & %s (%s \\%%) & %s \\\\\n" %(sampleName, repeat, repeatPc, otherRef, otherRefPc, ref, refPc, total )) else: f.write("\\cellcolor[gray]{0.9} %s & \\cellcolor[gray]{0.9} %s %s & \\cellcolor[gray]{0.9} %s (%s \\%%) & \\cellcolor[gray]{0.9} %s (%s \\%%) & \\cellcolor[gray]{0.9} %s \\\\\n" %(sampleName, repeat, repeatPc, otherRef, otherRefPc, ref, refPc, total )) altColor = 1 - altColor f.write("\\hline\n")
def drawN50Plot( options, samples ): #sort samples: samples = sorted( samples, key=lambda s: int(s.attrib[ options.sortkey ]), reverse=True ) sampleNames = getSampleNames( samples ) if len(samples) < 1: return refname = samples[0].attrib[ 'referenceName' ] options.out = os.path.join( options.outdir, options.prefix + '_' + refname ) fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.1, 0.85, 0.85] ) title = "N50" lines = drawN50data( axes, samples, options ) axes.set_title(title) #Legend fontP = FontProperties() fontP.set_size( 'small' ) box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.8, box.height] ) legend = pyplot.legend( lines, options.keys, numpoints=1, prop=fontP, loc="best", bbox_to_anchor=(1, 0.9) ) legend._drawFrame = False #libplot.setTicks( axes ) axes.set_xticks( range( 0, len(samples) ) ) axes.set_xticklabels( [ libplot.properName(n) for n in sampleNames ] ) for label in axes.xaxis.get_ticklabels(): label.set_rotation( 90 ) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) axes.set_xlim( -0.5, len(samples) - 0.5 ) #axes.set_ylim( -20, 6000 ) libplot.writeImage( fig, pdf, options )
def drawPlot(rexps, exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] ) #Set title: titleDict = {'mapped':'Mapped reads', 'uniquelyMapped':'Uniquely Mapped Reads', 'properlyPaired':'Properly Paired Reads', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired Reads', 'snps':'SNPs'} axes.set_title( titleDict[type] ) if len(rexps) < 1: return sampleNotherRefmapped = [] ref = '' for sample in rexps: if sample == 'average': continue exp = rexps[sample] ref = exp.ref sampleNotherRefmapped.append( (sample, exp.total) ) otherRefName = libplot.properName( ref ) sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNotherRefmapped] samples.append( 'average' ) xdata = range( 0, len(samples) ) colors = libplot.getColors4() #c = -1 c = 0 lines = [] ydataList, miny, maxy = getData(samples, exps, rexps, type) #print ydataList refs = sorted( ydataList.keys() ) #miny = float('inf') #maxy = 0 #offset = 0.075 offset = 0.12 #if type != 'snps': # offset = 0 #axes.set_yscale('log') scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 #Draw line connecting the data for each sample (each bin): binXdataList = [ [] for x in xdata ] binYdataList = [ [] for x in xdata ] for i, ref in enumerate(refs): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[ref] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] for j, x in enumerate(xdatai): binXdataList[j].append(x) binYdataList[j].append( ydata[j] ) for i in xrange( len(binXdataList) ): axes.plot( binXdataList[i], binYdataList[i], color="#CCCCCC", linestyle='-', linewidth=0.005 ) #Draw main plots: for i, ref in enumerate(refs): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[ref] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] c += 1 l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none') lines.append(l) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale #Draw horizontal line at y = 0: xmin = -0.4 xmax = len(samples) - 1 + offset*len(refs) + offset axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.2 #Draw vertical lines to separate each sample: #for i in xrange(1, len(samples)): # d = (1 - offset*len(refs))/2.0 # x = [i - d, i - d] # y = [miny , maxy] # axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax ) axes.set_ylim( miny, maxy ) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(refs)/2.0) for i in range(0, len(samples))] ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) properRefs = [] for r in refs: if re.search('cactusRef', r): r = r.lstrip('cactusRef') properRefs.append( "%s %s" %(libplot.properName('cactusRef'), r)) else: properRefs.append( libplot.properName(r) ) legend = pyplot.legend( lines,properRefs, numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName) if scale > 0: axes.set_ylabel( 'Event counts (x%d)' %(10**scale) ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawSamplePlot(rexps, exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) axes = fig.add_axes( [0.14, 0.12, 0.8, 0.8] ) #Set title: axes.set_title( "SNP Rate Using BWA Mapping" ) sampleNsize = [] if len(rexps) < 1: return ref = '' for sample in rexps: if sample == 'average': continue exp = rexps[sample] ref = exp.ref #sampleNsize.append( (sample, exp.snps) ) sampleNsize.append( (sample, exp.snprate) ) otherRefName = ref sampleNsize = sorted( sampleNsize, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNsize] samples.append( 'average' ) #Get ydata: ydata1 = [] #otherRef (hg19, apd, ...) ydata2 = [] #cactusRef2 for sample in samples: explist = exps[sample] otherRef = rexps[sample] ydata1.append( otherRef.snprate ) for e in explist: if e.ref == 'cactusRef' and e.weight == 2: ydata2.append( e.snprate ) miny = min([min(ydata1), min(ydata2)]) maxy = max([max(ydata1), max(ydata2)]) xdata = range( 0, len(samples) ) #colors = ["#E31A1C", "#1F78B4"] #red, blue colors = ["#1F78B4", "#E31A1C"] #red, blue scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 if scale > 0: ydata1 = [ float(y)/10**scale for y in ydata1 ] ydata2 = [ float(y)/10**scale for y in ydata2 ] lines = [] lines.append( axes.plot(xdata, ydata1, color=colors[0], marker=".", markersize=16.0, linestyle='none') ) lines.append( axes.plot(xdata, ydata2, color=colors[1], marker=".", markersize=16.0, linestyle='none') ) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale fontP = FontProperties() fontP.set_size('x-small') axes.set_xlim(-0.4, len(samples) - 0.6 ) yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.1 axes.set_ylim( miny, maxy ) libplot.editSpine( axes ) axes.set_xticks( xdata ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.yaxis.set_ticks_position( 'left' ) axes.xaxis.set_ticks_position( 'bottom' ) legend = pyplot.legend( lines, [libplot.properName(otherRefName), libplot.properName("cactusRef")], numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'SNPs Per Site' ) if scale > 0: axes.set_ylabel( 'Snp counts (x%d)' %(10**scale) ) axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawData( axesList, samples, samplesPerPlot, options, proportion, culm ): largeIns = [] #List of proportion of total indel bases that indels >= 1000bp take up, each element is for each sample largeDels = [] if len(axesList) %2 != 0: sys.stderr.write( 'Number of axes must be even. Got %d\n' %len(axesList) ) sys.exit( 1 ) colors = libplot.getColors1() if len(samples) < 1: return if samples[0].attrib["referenceName"] == "reference": colors.pop(0) elif samples[0].attrib["referenceName"] == 'hg19': colors.pop(1) #styles = [] c = -1 textsize = 'x-small' linesDict = {} labelsDict = {} xmax = float('-inf') ymax = float('-inf') xmin = float('inf') ymin = float('inf') for i in range( len(axesList)/2 ): inslines = [] dellines = [] sampleNames = [] insAxes = axesList[ i ] delAxes = axesList[ i + len(axesList)/2 ] startIndex = i * samplesPerPlot endIndex = min( [startIndex + samplesPerPlot, len(samples)] ) for j in range( startIndex, endIndex ): sample = samples[j] sampleNames.append( sample.attrib[ 'sampleName' ] ) insDist = [int(val) for val in sample.attrib[ 'insertionSizeDistribution' ].split()] #insXdata, insYdata = getFreq( insDist, options.xlogscale, options.ylogscale ) insXdata, insYdata = getFreq( insDist, proportion, culm ) delDist = [int(val) for val in sample.attrib[ 'deletionSizeDistribution' ].split()] #delXdata, delYdata = getFreq( delDist, options.xlogscale, options.ylogscale ) delXdata, delYdata = getFreq( delDist, proportion, culm ) #LARGE INDELS, FOR paper STATS, not related to the plot: if proportion and culm: largeIns.append( getLargeIndelProp(insXdata, insYdata) ) largeDels.append( getLargeIndelProp(delXdata, delYdata) ) c += 1 il = insAxes.plot( insXdata, insYdata, color=colors[c] ) dl = delAxes.plot( delXdata, delYdata, color=colors[c] ) inslines.append( il ) dellines.append( dl ) insXmax = xmax delXmax = xmax if len(insXdata) >0: insXmax = max(insXdata) if len(delXdata) > 0: delXmax = max(delXdata) xmax = max( [xmax, insXmax, delXmax] ) insYmax = ymax delYmax = ymax if len(insYdata) >0: insYmax = max(insYdata) if len(delYdata) > 0: delYmax = max(delYdata) ymax = max( [ymax, insYmax, delYmax] ) insXmin = xmin delXmin = xmin if len(insXdata) >0: insXmin = min(insXdata) if len(delXdata) > 0: delXmin = min(delXdata) xmin = min( [xmin, insXmin, delXmin] ) insYmin = ymin delYmin = ymin if len(insYdata) >0: insYmin = min(insYdata) if len(delYdata) > 0: delYmin = min(delYdata) ymin = min( [ymin, insYmin, delYmin] ) #xmax = max([xmax, max(insXdata), max(delXdata)]) #ymax = max([ymax, max(insYdata), max(delYdata)]) linesDict[ i ] = inslines labelsDict[ i ] = sampleNames linesDict[ i + len(axesList)/2 ] = dellines labelsDict[ i + len(axesList)/2 ] = sampleNames #fontp = FontProperties() #fontp.set_size( 'x-small' ) if i == 0: insAxes.set_title( 'Insertions' ) delAxes.set_title( 'Deletions' ) for i in range( len(axesList) ): axes = axesList[ i ] if options.xlogscale == "true": axes.set_xscale('log') #if options.ylogscale == "true" and not proportion: if options.ylogscale == "true": axes.set_yscale('log') libplot.editSpine( axes ) axes.set_xlabel('Length (bp)', size = textsize) if not proportion: axes.set_ylabel('Event number', size = textsize) else: axes.set_ylabel('Number of positions', size = textsize) axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) #if options.xlogscale == "true": # axes.set_xlabel('Log 2 of length (bp)', size = textsize) #else: # axes.set_xlabel('Length (bp)', size = textsize) #if options.ylogscale == "true": # axes.set_ylabel('Log 2 of count', size = textsize) #else: # axes.set_ylabel('Count', size = textsize) #Legend legend = axes.legend( linesDict[ i ], [ libplot.properName(n) for n in labelsDict[ i ]], 'upper right', ncol=3 ) for t in legend.get_texts(): t.set_fontsize('x-small') legend._drawFrame = False if options.xlogscale == "true": scale = len(str(xmax)) -1 xticks = [ 10**x for x in range(scale + 1) ] axes.set_xticks( xticks ) #if options.ylogscale == "true" and not proportion: if options.ylogscale == "true": scale = len(str(ymax)) -1 yticks = [ 10**y for y in range(scale + 1) ] axes.set_yticks( yticks ) for label in axes.get_xticklabels(): #label.set_rotation(75) label.set_fontsize( textsize ) for label in axes.get_yticklabels(): label.set_fontsize( textsize ) #box = axes.get_position() #axes.set_position( [box.x0, box.y0, box.width*0.8, box.height] ) #legend = pyplot.legend( lines, options.keys, numpoints=1, prop=fontP, loc="best", bbox_to_anchor=(1, 0.9) ) #legend._drawFrame = False #libplot.setTicks( axes ) #axes.set_xticks( range( 0, len(samples) ) ) #axes.set_xticklabels( sampleNames ) #for label in axes.xaxis.get_ticklabels(): # label.set_rotation( 90 ) #axes.xaxis.set_ticks_position( 'bottom' ) #axes.yaxis.set_ticks_position( 'left' ) axes.set_ylim( ymin, ymax ) if proportion and not culm: axes.set_xlim( xmin, 100 ) else: axes.set_xlim( xmin, xmax ) #PRINT THE LARGE INDEL STATS: if proportion and culm: sys.stderr.write("largeIndelStats\n") sys.stderr.write("Large insertions: %f\n" %( sum(largeIns)/len(largeIns) )) sys.stderr.write("Large deletions: %f\n" %( sum(largeDels)/len(largeDels) )) largeIndels = [ (largeIns[i] + largeDels[i])/2.0 for i in range(len(largeIns)) ] sys.stderr.write("IndelsAverage: %f\n" %( sum(largeIndels)/len(largeIndels) )) return
def drawSnpPlot(options, samples1, samples2): #All the samples sorted indecreasing order of SNP rate, then average, then chimp samples1 = sorted( samples1, key=lambda s:s.errPerSite, reverse=True ) if len( samples1 ) < 1: return chimpSample = None #remove chimpSample for i, s in enumerate(samples1): if s.name == "panTro3": chimpSample = samples1.pop(i) break refname1 = samples1[0].refname refname2 = samples2[0].refname y1data = [ s.errPerSite for s in samples1 ] xticklabels = [s.name for s in samples1] y1data.append(0) #snps of refname1 w.r.t itself (which is 0) xticklabels.append(refname1) y2data = [] for name in xticklabels: if name == refname2: y2data.append(0) #snps of refname2 w.r.t itself for s2 in samples2: if s2.name == name: y2data.append(s2.errPerSite) break if len(y1data) != len(y2data): sys.stderr.write("Input files have different number of samples: %d, %d\n" %(len(y1data), len(y2data))) sys.exit(1) #add the average column: if len(y1data) >= 2: y1avr = sum(y1data)/float(len(y1data) -1) y1data.append(y1avr) y2avr = sum(y2data)/float(len(y2data) -1) y2data.append(y2avr) #Print summary stats to stderr: sys.stderr.write("%s\t%f\t%f\t%f\n" %( refname1, sorted(y1data)[1] , max(y1data), y1avr )) sys.stderr.write("%s\t%f\t%f\t%f\n" %( refname2, sorted(y2data)[1] , max(y2data), y2avr )) xticklabels.append('average') #add chimp: samples1.append(chimpSample) y1data.append( chimpSample.errPerSite ) for s in samples2: if s.name == 'panTro3': y2data.append( s.errPerSite ) xticklabels.append( 'panTro3' ) #Min, max values: num = options.numOutliners numcols = len(y1data) minOutlier = min( [ min(y1data[numcols -num:]), min(y2data[numcols - num:]) ] ) - 0.001 maxOutlier = max( [ max(y1data[numcols -num:]), max(y2data[numcols - num:]) ] ) + 0.001 minMajority = min( [min(y1data[:numcols - num]), min(y2data[: numcols - num])] ) - 0.001 maxMajority = max( [max(y1data[:numcols - num]), max(y2data[: numcols - num])] ) + 0.001 if minMajority < 0: minMajority = -0.0001 #Set up basename = os.path.basename(options.files[0]) options.out = os.path.join(options.outdir, '%s' %(basename.lstrip('snpStats').lstrip('_').rstrip('.xml')) ) fig, pdf = libplot.initImage( 11.2, 10.0, options ) ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxMajority - minMajority) #Plot the outliers: l2 = ax.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" )#Red l1 = ax.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" )#Blue ax2.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" ) ax2.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" ) #Legend fontP = FontProperties() fontP.set_size("x-small") legend = ax.legend([l1, l2], [libplot.properName(refname1), libplot.properName(refname2)], 'upper left', numpoints=1, prop=fontP) legend._drawFrame = False d = .0001 # how big to make the diagonal lines in axes coordinates ax.plot( (-1, 0), (minOutlier +d, minOutlier - d), color = "k", clip_on=False ) ax2.plot( (-1, 0), (maxMajority +d, maxMajority - d), color = "k", clip_on=False ) ax.set_ylim( minOutlier, maxOutlier ) # outliers only ax.set_xlim( -0.5, len(xticklabels) -0.5 ) dummyxticklabels = [ "" for l in xticklabels ] ax.set_xticklabels( dummyxticklabels ) #Make sure the y ticks of the top plot (the outlier plot) is the same with the other plot: step = 0.001 ytickpositions = [] ytickpos = 0 while ytickpos < maxOutlier: if ytickpos >= minOutlier: ytickpositions.append(ytickpos) ytickpos += step ax.set_yticks( ytickpositions ) ax2.set_ylim( minMajority, maxMajority ) ax2.set_xlim( -0.5, len(xticklabels) -0.5 ) # hide the spines between ax and ax2 ax.spines['bottom'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.yaxis.set_ticks_position( 'left' ) ax.xaxis.set_ticks_position( 'none' ) ax2.spines['top'].set_visible(False) ax2.spines['right'].set_visible(False) ax2.xaxis.tick_bottom() ax2.yaxis.set_ticks_position( 'left' ) ax2.set_xticks( range( 0, len(xticklabels) ) ) properxticklabels = [ libplot.properName(l) for l in xticklabels ] ax2.set_xticklabels( properxticklabels ) #Make sure the x ticks of the top plot is the same with the other plot: ax.set_xticks( range(0, len(xticklabels)) ) for label in ax2.xaxis.get_ticklabels(): label.set_rotation( 90 ) ax.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.set_xlabel( 'Samples' ) ax2.set_ylabel( 'SNPs per site' ) title = 'SNPs' ax.set_title( title ) libplot.writeImage( fig, pdf, options )
def drawPlot( options, samples1, samples2, type ): #Sorted in decreasing order of errorPerSite in samples1 if type == 'insertion': samples1 = sorted( samples1, key=lambda s:s.ins, reverse=True ) else: samples1 = sorted( samples1, key=lambda s:s.dels, reverse=True ) if len( samples1 ) < 1: return #remove chimpSample: chimpSample = None for i, s in enumerate(samples1): if s.name == 'panTro3': chimpSample = samples1.pop(i) break refname1 = samples1[0].refname refname2 = samples2[0].refname y1data = [ s.ins for s in samples1 ] if type == 'deletion': y1data = [ s.dels for s in samples1 ] xticklabels = [ s.name for s in samples1 ] #indel of refname1 w.r.t itself (0) y1data.append(0) xticklabels.append(refname1) y2data = [] for name in xticklabels: if name == refname2:#indel of refname2 w.r.t itself (0) y2data.append(0) for s in samples2: if s.name == name: if type == 'insertion': y2data.append(s.ins) else: y2data.append(s.dels) break if len(xticklabels) != len(y2data): sys.stderr.write("Input file 1 and 2 do not have the same set of samples\n") sys.exit( 1 ) #add the average column: num = 1 y1avr = sum(y1data)/float(len(y1data) - 1) y1data.append(y1avr) xticklabels.append('average') y2avr = sum(y2data)/float(len(y2data) - 1) y2data.append(y2avr) print "%s Average: %s %f, %s %f" %(type, refname1, y1avr, refname2, y2avr) #Add chimp: samples1.append(chimpSample) if type == 'insertion': y1data.append( chimpSample.ins ) else: y1data.append( chimpSample.dels ) for s in samples2: if s.name == 'panTro3': if type == 'insertion': y2data.append(s.ins) else: y2data.append(s.dels) xticklabels.append("panTro3") minMajority = min( [min(y2data), min(y1data)] ) - 0.0001 maxMajority = max( [max(y2data), max(y1data)] ) + 0.0001 basename = os.path.basename(options.files[0]) options.out = os.path.join( options.outdir, '%s_%s' %( type, basename.lstrip('pathStats').lstrip('_').rstrip('.xml') ) ) fig, pdf = libplot.initImage( 11.2, 10.0, options ) #ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxMajority - minMajority) ax2 = fig.add_axes( [0.15, 0.15, 0.8, 0.8] ) l2 = ax2.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" ) l1 = ax2.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" ) #Legend fontP = FontProperties() fontP.set_size("x-small") legend = ax2.legend([l1, l2], [libplot.properName(refname1), libplot.properName(refname2)], 'upper right', numpoints=1, prop=fontP) legend._drawFrame = False ax2.set_ylim( minMajority, maxMajority ) ax2.set_xlim( -0.5, len(xticklabels) -0.5 ) ax2.spines['top'].set_visible(False) ax2.spines['right'].set_visible(False) ax2.xaxis.tick_bottom() ax2.yaxis.set_ticks_position( 'left' ) ax2.set_xticks( range( 0, len(xticklabels) ) ) properxticklabels = [ libplot.properName(l) for l in xticklabels ] ax2.set_xticklabels( properxticklabels ) for label in ax2.xaxis.get_ticklabels(): label.set_rotation( 90 ) ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.set_xlabel( 'Samples' ) title = 'Deletions' #if type == 'insertion': if type == 'insertion': ax2.set_ylabel( 'Insertions per site' ) title = 'Insertions' else: ax2.set_ylabel( 'Deletions per site' ) ax2.set_title( title ) libplot.writeImage( fig, pdf, options )
def drawCompareData( axesList, xstats, ystats, options ): #Only draw the overlapped samples: #colors = libplot.getColors2( len(xstats) ) colors = libplot.getColors1() #colorindex = -1 #colorindex = 0 colorindex = 1 lines = [] sampleNames = [] p0axes = axesList[0] #plot 0 axes (see def 'setCompareAxes') aggData = [] #data points (buckets) of all samples minval = float('inf') for xsample in xstats: ysample = getSample( ystats, xsample.name ) if ysample is None: continue xsample, ysample = intersect(xsample, ysample) #if len(xsample) != len(ysample): # xsample, ysample = intersect(xsample, ysample) # sys.stderr.write( "Error: Two xml files do not have the same number of buckets for sample %s\n" % xsample.name ) #sys.exit( 1 ) data = [] #list of (x,y) tuples colorindex += 1 for i in range( len( xsample ) ): #each bucket if xsample[i].mid != ysample[i].mid: sys.stderr.write( "Two xml files have different buckets\n " ) sys.exit( 1 ) if options.includeCov: data.append( (xsample[i].correctPerSample, ysample[i].correctPerSample) ) else: data.append( (xsample[i].correctPerAligned, ysample[i].correctPerAligned) ) x2data = [ point[0] for point in data ] y2data = [ point[1] for point in data ] l = p0axes.plot( x2data, y2data, color=colors[colorindex], marker='.', markersize=4.0, linestyle='none' ) lines.append( l ) sampleNames.append( xsample.name ) aggData.extend( data ) minval = min( [min(x2data), min(y2data)] ) #Draw the y=x line x = [0, 1] y = [0, 1] p0axes.plot(x, y, color="#919191") fontP = FontProperties() fontP.set_size('small') libplot.editSpine( p0axes ) p0axes.set_title(options.title) p0axes.set_xlabel( libplot.properName(xstats.refname) ) p0axes.set_ylabel( libplot.properName(ystats.refname) ) libplot.setTicks( p0axes ) for l in p0axes.xaxis.get_ticklabels(): l.set_fontsize('small') for l in p0axes.yaxis.get_ticklabels(): l.set_fontsize('small') #legend: legend = p0axes.legend( lines, [ libplot.properName(n) for n in sampleNames], 'lower right', numpoints = 1, prop=fontP, ncol = 2) legend._drawFrame = False #p0axes.set_xlim( -0.005, 1.005 ) #p0axes.set_ylim( -0.005, 1.005 ) ycutoff = minval if options.ycutoff: ycutoff = options.ycutoff p0axes.set_xlim( ycutoff - (1-ycutoff)*0.02, 1 + (1 - ycutoff)*0.01 ) p0axes.set_ylim( ycutoff - (1-ycutoff)*0.02, 1 + (1 - ycutoff)*0.01 ) #box = p0axes.get_position() #p0axes.set_position([box.x0, box.y0, box.width * 0.8, box.height * 0.8]) #legend = pyplot.legend( lines, sampleNames, numpoints = 1, prop= fontP, loc="best", bbox_to_anchor=(1, 0.6)) #legend._drawFrame=False #DRAW AGGREGATE DATA (plot 1 and plot 2): nbins = 20 p1axes = axesList[1] y1min, y1max = drawAggData( p1axes, aggData, 0, 0, 1, ycutoff, nbins ) y1lim = max( abs(y1min), abs(y1max) ) p1axes.set_ylim( -y1lim*1.1, y1lim*1.1 ) p1axes.set_xlim( ycutoff - (1-ycutoff)*0.02, 1 + (1-ycutoff)*0.01 ) #p1axes.set_ylim( y1min*1.1, y1max*1.1 ) for loc, spine in p1axes.spines.iteritems(): if loc == 'left': spine.set_position( ( 'outward', 10 ) ) spine.set_color( 'none' ) p1axes.axhline( 0, color = '#000000' ) p1axes.xaxis.set_major_locator( NullLocator() ) p1axes.xaxis.set_major_formatter( NullFormatter() ) p1axes.yaxis.set_ticks([-y1lim, 0, y1lim]) for l in p1axes.yaxis.get_ticklabels(): l.set_fontsize('small') p2axes = axesList[2] x2min, x2max = drawAggData( p2axes, aggData, 1, 0, 1, ycutoff, nbins ) x2lim = max( abs(x2min), abs(x2max) ) p2axes.set_xlim( -x2lim*1.1, x2lim*1.1 ) p2axes.set_ylim( ycutoff - (1-ycutoff)*0.02, 1 + (1- ycutoff)*0.01 ) #p2axes.set_xlim( x2min*1.1, x2max*1.1 ) for loc, spine in p2axes.spines.iteritems(): if loc == 'bottom': spine.set_position( ( 'outward', 10 ) ) spine.set_color( 'none' ) p2axes.axvline( 0, color = '#000000' ) p2axes.yaxis.set_major_locator( NullLocator() ) p2axes.yaxis.set_major_formatter( NullFormatter() ) p2axes.xaxis.set_ticks([-x2lim, 0, x2lim]) for l in p2axes.xaxis.get_ticklabels(): l.set_fontsize('small') l.set_rotation( 45 ) return
def drawCompareData2( axes, options, stats, isAbs ): if len(stats) == 0: return #if isAbs, draw absolute values. If not, draw proportion (relative values) lines = [] linenames = [ stats[0].otherReferenceName, stats[0].referenceName, "total" ] #X data: x1data = [] currx = -1 for i,s in enumerate( stats ): if s.name == 'all': continue if s.name == 'average' or s.name == 'panTro3': currx += 1.5 else: currx += 1 x1data.append( currx ) y1data = [] for sample in stats: if sample.name == 'all': continue if isAbs: y1data.append( sample.referenceBasesMapped ) else: y1data.append( 100.0*sample.referenceBasesMapped/sample.totalBases ) barwidth = 0.6 #barwidth = 0.25 l1 = axes.bar( x1data, y1data, barwidth, color = "#E31A1C", ec="w" ) lines.append( l1[0] ) libplot.editSpine( axes ) axes.set_title("Sample Coverage") #TO BE NAMED #set ticks: samples = [] for sample in stats: if sample.name == 'all': continue samples.append( libplot.properName(sample.name) ) fontP = FontProperties() fontP.set_size('small') pyplot.xticks( [x + barwidth/2.0 for x in x1data], samples, rotation=45, fontproperties=fontP ) pyplot.yticks( fontproperties=fontP ) #HACK: yticks = range(2000000, 6000000, 500000) yticklabels = [ float(y)/1000000 for y in yticks ] axes.set_yticks(yticks) axes.set_yticklabels(yticklabels) pyplot.xlabel("Samples") pyplot.ylabel("Number of positions (in millions)") axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) miny = min( y1data ) miny = miny*0.9 axes.set_ylim( miny, max(y1data) ) axes.set_xlim(-0.5, max(x1data) + 0.5 ) axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) #Legend: box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.95, box.height*0.9] ) #legend = axes.legend( lines, [libplot.properName(n) for n in linenames], prop=fontP, loc="best", bbox_to_anchor=(0.2, 1) ) #legend._drawFrame=False return
def drawPlot(exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) axes = fig.add_axes( [0.12, 0.18, 0.85, 0.75] ) #Set title: titleDict = {'tpfn':'Indel Overlap with dbSNP', 'tp':'True Positives According to dbSNP', 'tp2':'Indel Overlap with dbSNP', 'fn':'False Negatives According to dbSNP', 'total':'Total Indels Called'} axes.set_title( titleDict[type] ) if 'All' not in exps: return samples = getSamplesOrder( exps['All'], type ) if len( samples ) < 1: return samples.append('average') if type != 'fn': samples.append('reference') samples.append('panTro3') xdata = range( 0, len(samples) ) colors = libplot.getColors6() c = -1 lines = [] pointsize = 10.0 offset = 0.15 #exporder = ['All', 'No repeats', 'Wobble', 'Wobble, No repeats'] exporder = ['Wobble', 'Wobble, No repeats', 'All', 'No repeats'] if type == 'fn': exporder = ['Wobble', 'All'] elif type == 'total': exporder = ['All', 'No repeats'] elif type == 'tp2' or type == 'tpfn': exporder = ['Wobble', 'Wobble, No repeats'] #pointsize = 16.0 #offset = 0.3 #Get ydata ydataList, ymin, ymax = getData(samples, exps, type, exporder) scale = -1 if ymin > 1000: scale = len( str(int(ymin)) ) -1 if scale > 0: for exp in ydataList: ydataList[exp] = [ float(y)/10**scale for y in ydataList[exp]] if type == 'tpfn': for j,t in enumerate(['tp', 'fn']): for i, exp in enumerate(exporder): if i > 0 and t == 'fn': continue if t == 'tp': xdatai = [x + offset*(j*2+i) for x in xdata] else: xdatai = [x + offset*(j*2+i) for x in xdata[: len(xdata) -3]] ydata = ydataList["%s.%s" %(exp,t)] c +=1 lines.append(axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none')) else: for i, exp in enumerate(exporder): xdatai = [x + offset*i for x in xdata] ydata = ydataList[exp] c += 1 l = axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none') if type == 'fn': c += 1 lines.append(l) xmin = -0.4 xmax = len(samples) - 1 + offset*len(exps) + offset*3 fontP = FontProperties() fontP.set_size('x-small') if scale > 0: ymin = float(ymin)/10**scale ymax = float(ymax)/10**scale datarange = ymax -ymin ymin = ymin - datarange*0.01 ymax = ymax + datarange*0.01 #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(exporder))/2.0 x = [i - d, i - d] y = [ymin , ymax] axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax) axes.set_ylim(ymin, ymax) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] ) axes.set_xticklabels( [ libplot.properName(s) for s in samples] ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) if type == 'tp': legend = pyplot.legend(lines, ['All', 'No repeats', 'No wobble', 'No wobble, No repeats'], numpoints=1, loc='best', prop=fontP) elif type == 'fn': legend = pyplot.legend( lines, ['All', 'No wobble'], numpoints=1, loc='best', prop=fontP) elif type == 'tpfn': legend = pyplot.legend(lines, ['All, TP', 'No repeats, TP', 'All, FN'], numpoints=1, loc='best', prop=fontP) elif type == 'tp2': legend = pyplot.legend( lines, ['All', 'No repeats'], numpoints=1, loc='best', prop=fontP) else: legend = pyplot.legend( lines, exporder, numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) ylabel = "Percentage" if type == 'total': ylabel = 'Number of indels' if scale > 0: ylabel += '(x%d)' %10**scale axes.set_ylabel(ylabel) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawCompareN50data( axes, xsamples, ysamples, options ): keys = options.keys lineNames = [] colors = libplot.getColors6() c = -1 lines = [] xrefname = xsamples[0].attrib[ 'referenceName' ] yrefname = ysamples[0].attrib[ 'referenceName' ] minval = inf maxval = 0 for key in keys: xdata = [] ydata = [] for xsample in xsamples: name = xsample.attrib[ 'sampleName' ] if name == yrefname: continue ysample = getSample( ysamples, name ) if ysample == None: sys.stderr.write( "%s has %s sample, but %s doesn't\n" % (xrefname, name, yrefname) ) continue xval = int(xsample.attrib[key]) yval = int(ysample.attrib[key]) if xval > 0 and yval > 0: xdata.append(xval) ydata.append(yval) #xdata.append( int(xsample.attrib[ key ]) ) #ydata.append( int(ysample.attrib[ key ]) ) if len(xdata) == 0: continue if options.logscale: xdata = log10( array(xdata) ) ydata = log10( array(ydata) ) c += 1 l = axes.plot( xdata, ydata, color=colors[c], marker=".", markersize=10.0, linestyle='none' ) lines.append(l) lineNames.append( key ) currmax = max( xdata.max(), ydata.max() ) if maxval < currmax: maxval = currmax currmin = min( xdata.min(), ydata.min() ) if minval > currmin: minval = currmin if minval == -inf: minval = 0 #Draw y=x line span = maxval - minval #print "MaxVal: %f, MinVal: %f. Span: %f" % (maxval, minval, span) x = [ minval - span*0.1, maxval + span*0.1 ] y = [ minval - span*0.1, maxval + span*0.1 ] axes.plot( x, y, color="0.9" ) libplot.editSpine( axes ) if options.logscale: #pyplot.ylabel( 'Log 10 of N50' ) pyplot.xlabel( "%s (Log 10)" % libplot.properName(xrefname) ) pyplot.ylabel( "%s (Log 10)" %libplot.properName(yrefname) ) return lines, lineNames, maxval, minval
def drawScatter( axes, options, stats, type, cumulative ): if len(stats) < 4: return title = "Distribution of Positions Shared Among Samples" if cumulative: title = "Cumulative Distribution of Positions Shared Among Samples" axes.set_title(title) #TO BE NAMED #samples = ["panTro3", "minusOtherReference", "average", "reference", "hg19"] samples = ["reference", "hg19", "panTro3", "average"] if type == 'noHg19': samples = ["minusOtherReference"] xdata = range( 0, len(stats) -4 ) #print xdata ydataList = [] miny = float('inf') maxy = float('-inf') for name in samples: for s in stats: if s.name == name: ydata = s.baseCoverages[: len(stats) -4] if cumulative: ydata = [ sum(ydata[i:]) for i in xrange( len(ydata) ) ] ydataList.append( ydata ) miny = min( [miny, min(ydata)] ) maxy = max( [maxy, max(ydata)] ) break lines = [] #colors = libplot.getColors0() colors =["#E31A1C", "#1F78B4", "#3D3D3D", "#4DAF4A"] #ConsensusRef, GRCh37, chimp, average c = -1 offset = 0.12 axes.set_yscale('log') #if type == 'noHg19': # axes.set_yscale('log') for i in xrange( len(samples) ): xdatai = [x + offset*i for x in xdata] ydata = ydataList[i] c += 1 if i == 0: axes.plot(xdatai[1:], ydata[1:], color="#CCCCCC", linestyle='-', linewidth=0.002) else: axes.plot(xdatai, ydata, color="#CCCCCC", linestyle='-', linewidth=0.002) l = axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=12.0, linestyle='none') lines.append(l) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - 10 maxy = maxy + yrange*0.1 xmin = -0.4 xmax = len(stats) - 4 -1 + offset*len(samples) + offset libplot.editSpine(axes) axes.set_xticks( [ i + offset*(len(samples)/2.0 ) for i in range(0, len(stats) -4)] ) axes.set_xticklabels( range(1, len(stats) -2) ) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) scale = len(str( int(maxy) )) - 1 ylabel = "Number of positions" if type == "noHg19": yticks = [ 10**y for y in range(scale + 1) ] else: #yticks = [ 10**y for y in range(scale + 2) ] yticks = [] for y in range(scale + 1): for y1 in range(1,10): yticks.append(y1*(10**y)) axes.set_yticks( yticks ) minorLocator = LogLocator( base=10, subs = range(1, 10) ) axes.yaxis.set_minor_locator( minorLocator ) #else: # yticks = range(0, int(maxy), 10**scale) # yticklabels = [ y/(10**scale) for y in yticks ] # axes.set_yticks( yticks ) # axes.set_yticklabels( yticklabels ) # ylabel += " (x%s)" %( libplot.prettyInt(10**scale) ) #ylabel += " (in millions)" axes.set_xlim(xmin, xmax) if type == "noHg19": axes.set_ylim(miny, maxy) else: axes.set_ylim(10000, 1000000)#HACK if type != 'noHg19': legend = pyplot.legend( lines, [libplot.properName(s) for s in samples], numpoints=1, loc='lower right', prop=fontP ) legend._drawFrame = False axes.set_xlabel( 'Number of samples' ) #if type == "noHg19": # ylabel += " (x %d)" %(10**(scale -1)) axes.set_ylabel( ylabel ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) return
def drawRef2(rexps, exps, options, outfile, numCats): options.out = outfile fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] ) if len(rexps) < 1: return sampleNotherRefmapped = [] ref = '' for sample in rexps: if sample == 'average': continue e = rexps[sample] ref = e.ref sampleNotherRefmapped.append( (sample, e.total) ) otherRefName = libplot.properName( ref ) #Set title: #axes.set_title("Mapability of C. Ref. in Comparison to %s" % otherRefName) #HACK axes.set_title("Mapability of C. Ref. in Comparison to GRCh37 haplotypes") sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNotherRefmapped] samples.append( 'average' ) xdata = range( 0, len(samples) ) colors = libplot.getColors4() c = -1 #c = 0 lines = [] #titleDict = {'mapped':'Mapped', 'uniquelyMapped':'Uniquely Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired', 'snps':'Snp'} titleDict = {'mapped':'Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMapped':'Uniquely Mapped', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired'} ydataList, miny, maxy = getData2(samples, rexps, exps, titleDict.keys()) #ydataList, miny, maxy = getData2(samples, exps, titleDict.keys()) #refs = sorted( ydataList.keys() ) offset = 0.12 scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 linenames = [] categories = ["mapped", "properlyPaired", "uniquelyMapped", "uniquelyMappedAndProperlyPaired"] cats = categories[:numCats] for i, key in enumerate( cats ): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[key] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] c += 1 l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none') lines.append(l) linenames.append( titleDict[key] ) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale #Draw horizontal line at y = 0: xmin = -0.4 xmax = len(samples) - 1 + offset*len(linenames) + offset axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.2 #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(linenames))/2.0 x = [i - d, i - d] y = [miny , maxy] axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax ) axes.set_ylim( miny, maxy ) #HACK: #axes.set_ylim( -2, 0 ) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(linenames)/2.0) for i in range(0, len(samples))] ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) legend = pyplot.legend( lines, linenames, numpoints=1, loc='upper right', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName) #NEED TO DO #axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and GRCh37 haplotypes') if scale > 0: axes.set_ylabel( 'Event counts (x%d)' %(10**scale) ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawCompareData( axes, options, stats, isAbs ): if len(stats) == 0: return #if isAbs, draw absolute values. If not, draw proportion (relative values) lines = [] linenames = [ stats[0].otherReferenceName, stats[0].referenceName, "total" ] barwidth = 0.25 #X data: x3data = [] #avgIndex = -1 currx = -1 #xVer = [] #location (x) of vertical lines to separate between human samples | avr, all | chimp for i,s in enumerate( stats ): #if s.name == 'average': # avgIndex = i if s.name == 'average' or s.name == 'panTro3': currx += 1 + 1.5*barwidth #xVer.append( currx - (1.0 + 1.5*barwidth - 3*barwidth)/2.0 ) else: currx += 1 x3data.append( currx ) #print x1data x2data = [ x + barwidth for x in x3data ] x1data = [ x + barwidth for x in x2data ] if isAbs: y1data = [ sample.otherReferenceBasesMapped for sample in stats ] y2data = [ sample.referenceBasesMapped for sample in stats ] y3data = [ sample.totalBases for sample in stats ] else: y1data = [ 100.0*sample.otherReferenceBasesMapped/sample.totalBases for sample in stats ] y2data = [ 100.0*sample.referenceBasesMapped/sample.totalBases for sample in stats ] y3data = [ 100.0*sample.totalBases/sample.totalBases for sample in stats ] #Average aggregate data: #if avgIndex > 0: # y1data[ avgIndex ] /= float(avgIndex) # y2data[ avgIndex ] /= float(avgIndex) # y3data[ avgIndex ] /= float(avgIndex) colors =["#1F78B4", "#E31A1C", "#4DAF4A"] #colors =["#1B9E77", "#D95F02", "#7570B3"] #colors =["#EDF8B1", "#7FCDBB", "#2C7FB8"] #colors =["#A1DAB4", "#41B6C4", "#225EA8"] l1 = axes.bar( x1data, y1data, barwidth, color = colors[0], ec="w" ) lines.append( l1[0] ) l2 = axes.bar( x2data, y2data, barwidth, color = colors[1], ec="w" ) lines.append( l2[0] ) l3 = axes.bar( x3data, y3data, barwidth, color = colors[2], ec="w" ) lines.append( l3[0] ) libplot.editSpine( axes ) axes.set_title("Sample Coverage") #TO BE NAMED #set ticks: samples = [] for sample in stats: samples.append( libplot.properName(sample.name) ) fontP = FontProperties() fontP.set_size('small') #pyplot.xticks( x + barwidth/2., samples, rotation=45, fontproperties=fontP ) pyplot.xticks( x2data, samples, rotation=45, fontproperties=fontP ) pyplot.yticks( fontproperties=fontP ) #HACK: yticks = range(2000000, 6000000, 500000) yticklabels = [ float(y)/1000000 for y in yticks ] axes.set_yticks(yticks) axes.set_yticklabels(yticklabels) pyplot.xlabel("Samples") pyplot.ylabel("Number of positions (in millions)") axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) miny = min( [min(y1data), min(y2data), min(y3data)] ) miny = miny*0.9 maxy = max([max(y1data), max(y2data), max(y3data)]) #Draw vertical lines: #for x in xVer: # axes.plot([x, x], [miny, maxy], color="#A8A8A8") axes.set_ylim( miny, maxy ) axes.set_xlim(-0.5, max(x1data) + 0.5 ) axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) #Legend: box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.95, box.height*0.9] ) legend = axes.legend( lines, [libplot.properName(n) for n in linenames], prop=fontP, loc="best", bbox_to_anchor=(0.2, 1) ) legend._drawFrame=False return
def drawPlot2(exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) #Set title: titleDict = {'total':'Total Indels Called'} if 'All' not in exps: return samples = getSamplesOrder( exps['All'], type ) if len( samples ) < 1: return samples.append('average') samples.append('reference') samples.append('panTro3') xdata = range( 0, len(samples) ) colors = libplot.getColors6() c = -1 lines = [] pointsize = 10.0 offset = 0.15 exporder = ['All', 'No repeats'] #Get ydata ydataList, ymin, ymax = getData(samples, exps, type, exporder) yrange = ymax - ymin #Get normal range and outlier range: normalvals, outliers = getOutliers(ydataList) minNormal = min(normalvals) - 0.05*yrange maxNormal = max(normalvals) + 0.05*yrange minOutlier = min(outliers) - 0.05*yrange maxOutlier = max(outliers) + 0.05*yrange if minNormal< 0: minNormal = -0.5 #Set up the axes ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxNormal - minNormal) scale = -1 if minNormal > 1000: scale = len( str(int(minNormal)) ) -1 if scale > 0: for exp in ydataList: ydataList[exp] = [ float(y)/10**scale for y in ydataList[exp]] #PLOT for i, exp in enumerate(exporder): xdatai = [x + offset*i for x in xdata] ydata = ydataList[exp] c += 1 #Outlier plot l = ax.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none') lines.append(l) #Normal range plot ax2.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none') xmin = -0.4 xmax = len(samples) - 1 + offset*len(exps) + offset*3 fontP = FontProperties() fontP.set_size('x-small') if scale > 0: minNormal = float(minNormal)/10**scale maxNormal = float(maxNormal)/10**scale minOutlier = float(minOutlier)/10**scale maxOutlier = float(maxOutlier)/10**scale #Draw the Discontinue sign: d = 0.2 #how big to make the diagonal lines in axes coordinates if scale == -1: d = 50 ax.plot( (-1, 0), (minOutlier +d, minOutlier - d), color = "k", clip_on=False ) ax2.plot( (-1, 0), (maxNormal +d, maxNormal - d), color = "k", clip_on=False ) #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(exporder))/2.0 x = [i - d, i - d] y = [minNormal , maxOutlier] ax.plot(x,y, color="#CCCCCC", linewidth=0.005) ax2.plot(x,y, color="#CCCCCC", linewidth=0.005) xticklabels = [libplot.properName(s) for s in samples] #Set limit for the top plot (outlier) ax.set_ylim(minOutlier, maxOutlier) ax.set_xlim(xmin, xmax) ax.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] ) dummyxticklabels = [ "" for l in xticklabels ] ax.set_xticklabels(dummyxticklabels) #Make sure the y ticks of the top plot is the same with the bottom plot: step = 2 if scale == -1: step = 500 ytickpositions = [] ytickpos = 0 while ytickpos < maxOutlier: if ytickpos >= minOutlier: ytickpositions.append(ytickpos) ytickpos += step ax.set_yticks(ytickpositions) #Set limit for the bottom plot: ax2.set_ylim(minNormal, maxNormal) ax2.set_xlim(xmin, xmax) #Hide the spines between ax and ax2: ax.spines['bottom'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('none') ax2.spines['top'].set_visible(False) ax2.spines['right'].set_visible(False) ax2.xaxis.tick_bottom() ax2.yaxis.set_ticks_position( 'left' ) ax2.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] ) ax2.set_xticklabels( xticklabels ) for label in ax2.xaxis.get_ticklabels(): label.set_rotation(75) legend = pyplot.legend( lines, exporder, numpoints=1, loc='upper left', prop=fontP) legend._drawFrame = False ax2.set_xlabel( 'Samples' ) ylabel = 'Number of indels' if scale > 0: ylabel += '(x%d)' %10**scale ax2.set_ylabel(ylabel) ax.set_title( titleDict[type] ) ax.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawData( axes, stats, isAbs, ycutoff ): #if isAbs, draw absolute values. If not, draw proportion (relative values) lines = [] linenames = [] ydataList = [] #initialize ydataList: #for i in range( len(stats[0].baseCoverages) - len(stats), len(stats[0].baseCoverages) ): #for i in range( len(stats) -1 ): #each coverage level for i in range( len(stats) -1 - 2 ): #each coverage level (num samples - average, reference, minusOtherReference ydata = [] for j in range( len(stats) ):#each sample if isAbs: #if stats[j].name == 'aggregate': # ydata.append( stats[j].baseCoverages[i]/(len(stats) -1) ) #else: ydata.append( stats[j].baseCoverages[i] ) else: ydata.append( stats[j].relativeBaseCoverages[i] ) ydataList.append(ydata) #colors = libplot.getColors2( len(stats) ) colors = libplot.getColors3() colorindex = 0 x = arange( len(stats) ) #x axis represents the samples barwidth = 0.6 #add bottom-most bar (number of bases that are in all samples) l = axes.bar( x, ydataList[ len(ydataList) - 1 ], barwidth, color = colors[colorindex], ec="w" ) lines.append( l[0] ) linenames.append( "%d" % len(ydataList) ) culmulativeList = ydataList[ len(ydataList) - 1 ] for i in range( len(ydataList) - 2, -1, -1 ): colorindex += 1 l = axes.bar( x, ydataList[i], barwidth, color = colors[colorindex], bottom=culmulativeList, ec="w" ) lines.append( l[0] ) linenames.append( "%d" % (i + 1) ) #Update cumulative list: for j in range( len(culmulativeList) ): culmulativeList[j] += ydataList[i][j] #l = axes.fill_between( x=range(len(ydataList[i])), y1=ydataList[i], y2=[0] * len(ydataList[i]) , facecolor=colors[colorindex], linewidth = 0.0) libplot.editSpine( axes ) axes.set_title("Sample Coverage") #TO BE NAMED!!! pyplot.xlabel("Samples") if isAbs: pyplot.ylabel("Number of positions") else: pyplot.ylabel("Proportion of total positions") #set ticks: samples = [] for sample in stats: samples.append( libplot.properName( sample.name ) ) fontP = FontProperties() fontP.set_size('small') pyplot.xticks( x + barwidth/2., samples, rotation=90, fontproperties=fontP ) pyplot.yticks( fontproperties=fontP ) #for label in axes.yaxis.get_ticklabels(): # label.fontproperties = fontP # label.set_rotation( 45 ) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) miny = ycutoff if not isAbs: axes.set_ylim(ycutoff, 1) #axes.set_ylim(0, 1) axes.set_xlim(-0.5, len(stats) ) axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) #Legend: box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.8, box.height] ) lines.reverse() linenames.reverse() legend = axes.legend( lines, [libplot.properName(n) for n in linenames], prop=fontP, loc="best", bbox_to_anchor=(1,0.75) ) legend._drawFrame=False return lines, linenames