def drawN50data( axes, samples, options ): #key can be 'blockN50', 'sequenceN50', 'contigPathN50', or 'scaffolfPathN50' keys = options.keys colors = libplot.getColors6() #markers = [".", "s", "^", "--"] c = -1 lines = [] for key in keys: ydata = [] for sample in samples: y = int(sample.attrib[ key ]) ydata.append( y ) if options.logscale: ydata = log10( array(ydata) ) c += 1 l = axes.plot( ydata, color=colors[c], marker=".", markersize=10.0, linestyle='none' ) lines.append(l) libplot.editSpine( axes ) pyplot.xlabel( 'Samples' ) if options.logscale: pyplot.ylabel( 'Log 10 of N50' ) else: pyplot.ylabel( 'N50' ) return lines
def drawData( axes, stats, options ): #halfsize = len(stats)/2 + len(stats)%2 #colors = libplot.getColors2( halfsize ) #colors = libplot.getColors2( len(stats) ) #styles = { 0:'-', 1:'--' } colors = libplot.getColors1() if len(stats) < 1: return if stats[0].reference == "reference": colors.pop(0) elif stats[0].reference == 'hg19': colors.pop(1) #=========== #dash = 0 colorindex = -1 lines = [] sampleNames = [] ymin = float('inf') ref = '' for sample in stats: sampleNames.append(sample.name) if ref == '': ref = sample.reference xdata = [] ydata = [] for bucket in sample: xdata.append( bucket.mid ) if options.includeCov: ydata.append( bucket.correctPerSample ) else: ydata.append( bucket.correctPerAligned ) #if not dash: # colorindex += 1 #if colorindex == 1: # colorindex += 1 colorindex +=1 ymin = min([ymin, min(ydata)]) l = axes.plot( xdata, ydata, color=colors[colorindex], linewidth=1 ) #l = axes.plot( xdata, ydata, color=colors[colorindex], linestyle=styles[dash], linewidth=0.5 ) lines.append(l) #dash = not dash libplot.editSpine( axes ) title = options.title if ref != '': title += ', %s' % libplot.properName(ref) axes.set_title(title) pyplot.xlabel("Distance") pyplot.ylabel("Correct proportion") return lines, sampleNames, ymin
def drawPlot(samplesList, sampleNames, options): options.out = os.path.join(options.outdir, "nonLinearBp") fig, pdf = libplot.initImage(12.0, 8.0, options) axes = fig.add_axes([0.09, 0.2, 0.9, 0.6]) list1 = samplesList[0] list2 = samplesList[1] if len(list1) < 1 or len(list2) < 1: return refname1 = list1[0].attrib['referenceName'] refname2 = list2[0].attrib['referenceName'] lines = [] barwidth = 0.3 y1data = [] y2data = [] for sample in sampleNames: for s in list1: if sample == s.attrib['sampleName']: y1data.append( int(s.attrib['totalIntraJoin']) ) for s in list2: if sample == s.attrib['sampleName']: y2data.append( int(s.attrib['totalIntraJoin']) ) x1data = range( len(y1data) ) x2data = [ x+ barwidth for x in x1data] colors =["#1F78B4", "#E31A1C"] l1 = axes.bar( x1data, y1data, barwidth, color = colors[0], ec='w') lines.append(l1[0]) l2 = axes.bar( x2data, y2data, barwidth, color = colors[1], ec='w') lines.append(l2[0]) libplot.editSpine(axes) axes.set_title("Non-linear Breakpoints") #set ticks xlabels = [ libplot.properName(name) for name in sampleNames ] fontP = FontProperties() fontP.set_size('small') pyplot.xticks(x2data, xlabels, rotation=45, fontproperties=fontP) pyplot.yticks( fontproperties = fontP ) pyplot.xlabel("Samples") pyplot.ylabel("Number of breakpoints") axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) legend = axes.legend( lines, [libplot.properName(refname1), libplot.properName(refname2)], prop=fontP, loc="best" ) legend._drawFrame = False libplot.writeImage(fig, pdf, options)
def drawRef2(rexps, exps, options, outfile, numCats): options.out = outfile fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] ) if len(rexps) < 1: return sampleNotherRefmapped = [] ref = '' for sample in rexps: if sample == 'average': continue e = rexps[sample] ref = e.ref sampleNotherRefmapped.append( (sample, e.total) ) otherRefName = libplot.properName( ref ) #Set title: #axes.set_title("Mapability of C. Ref. in Comparison to %s" % otherRefName) #HACK axes.set_title("Mapability of C. Ref. in Comparison to GRCh37 haplotypes") sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNotherRefmapped] samples.append( 'average' ) xdata = range( 0, len(samples) ) colors = libplot.getColors4() c = -1 #c = 0 lines = [] #titleDict = {'mapped':'Mapped', 'uniquelyMapped':'Uniquely Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired', 'snps':'Snp'} titleDict = {'mapped':'Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMapped':'Uniquely Mapped', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired'} ydataList, miny, maxy = getData2(samples, rexps, exps, titleDict.keys()) #ydataList, miny, maxy = getData2(samples, exps, titleDict.keys()) #refs = sorted( ydataList.keys() ) offset = 0.12 scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 linenames = [] categories = ["mapped", "properlyPaired", "uniquelyMapped", "uniquelyMappedAndProperlyPaired"] cats = categories[:numCats] for i, key in enumerate( cats ): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[key] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] c += 1 l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none') lines.append(l) linenames.append( titleDict[key] ) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale #Draw horizontal line at y = 0: xmin = -0.4 xmax = len(samples) - 1 + offset*len(linenames) + offset axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.2 #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(linenames))/2.0 x = [i - d, i - d] y = [miny , maxy] axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax ) axes.set_ylim( miny, maxy ) #HACK: #axes.set_ylim( -2, 0 ) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(linenames)/2.0) for i in range(0, len(samples))] ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) legend = pyplot.legend( lines, linenames, numpoints=1, loc='upper right', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName) #NEED TO DO #axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and GRCh37 haplotypes') if scale > 0: axes.set_ylabel( 'Event counts (x%d)' %(10**scale) ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawPlot(rexps, exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] ) #Set title: titleDict = {'mapped':'Mapped reads', 'uniquelyMapped':'Uniquely Mapped Reads', 'properlyPaired':'Properly Paired Reads', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired Reads', 'snps':'SNPs'} axes.set_title( titleDict[type] ) if len(rexps) < 1: return sampleNotherRefmapped = [] ref = '' for sample in rexps: if sample == 'average': continue exp = rexps[sample] ref = exp.ref sampleNotherRefmapped.append( (sample, exp.total) ) otherRefName = libplot.properName( ref ) sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNotherRefmapped] samples.append( 'average' ) xdata = range( 0, len(samples) ) colors = libplot.getColors4() #c = -1 c = 0 lines = [] ydataList, miny, maxy = getData(samples, exps, rexps, type) #print ydataList refs = sorted( ydataList.keys() ) #miny = float('inf') #maxy = 0 #offset = 0.075 offset = 0.12 #if type != 'snps': # offset = 0 #axes.set_yscale('log') scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 #Draw line connecting the data for each sample (each bin): binXdataList = [ [] for x in xdata ] binYdataList = [ [] for x in xdata ] for i, ref in enumerate(refs): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[ref] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] for j, x in enumerate(xdatai): binXdataList[j].append(x) binYdataList[j].append( ydata[j] ) for i in xrange( len(binXdataList) ): axes.plot( binXdataList[i], binYdataList[i], color="#CCCCCC", linestyle='-', linewidth=0.005 ) #Draw main plots: for i, ref in enumerate(refs): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[ref] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] c += 1 l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none') lines.append(l) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale #Draw horizontal line at y = 0: xmin = -0.4 xmax = len(samples) - 1 + offset*len(refs) + offset axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.2 #Draw vertical lines to separate each sample: #for i in xrange(1, len(samples)): # d = (1 - offset*len(refs))/2.0 # x = [i - d, i - d] # y = [miny , maxy] # axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax ) axes.set_ylim( miny, maxy ) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(refs)/2.0) for i in range(0, len(samples))] ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) properRefs = [] for r in refs: if re.search('cactusRef', r): r = r.lstrip('cactusRef') properRefs.append( "%s %s" %(libplot.properName('cactusRef'), r)) else: properRefs.append( libplot.properName(r) ) legend = pyplot.legend( lines,properRefs, numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName) if scale > 0: axes.set_ylabel( 'Event counts (x%d)' %(10**scale) ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawSamplePlot(rexps, exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) axes = fig.add_axes( [0.14, 0.12, 0.8, 0.8] ) #Set title: axes.set_title( "SNP Rate Using BWA Mapping" ) sampleNsize = [] if len(rexps) < 1: return ref = '' for sample in rexps: if sample == 'average': continue exp = rexps[sample] ref = exp.ref #sampleNsize.append( (sample, exp.snps) ) sampleNsize.append( (sample, exp.snprate) ) otherRefName = ref sampleNsize = sorted( sampleNsize, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNsize] samples.append( 'average' ) #Get ydata: ydata1 = [] #otherRef (hg19, apd, ...) ydata2 = [] #cactusRef2 for sample in samples: explist = exps[sample] otherRef = rexps[sample] ydata1.append( otherRef.snprate ) for e in explist: if e.ref == 'cactusRef' and e.weight == 2: ydata2.append( e.snprate ) miny = min([min(ydata1), min(ydata2)]) maxy = max([max(ydata1), max(ydata2)]) xdata = range( 0, len(samples) ) #colors = ["#E31A1C", "#1F78B4"] #red, blue colors = ["#1F78B4", "#E31A1C"] #red, blue scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 if scale > 0: ydata1 = [ float(y)/10**scale for y in ydata1 ] ydata2 = [ float(y)/10**scale for y in ydata2 ] lines = [] lines.append( axes.plot(xdata, ydata1, color=colors[0], marker=".", markersize=16.0, linestyle='none') ) lines.append( axes.plot(xdata, ydata2, color=colors[1], marker=".", markersize=16.0, linestyle='none') ) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale fontP = FontProperties() fontP.set_size('x-small') axes.set_xlim(-0.4, len(samples) - 0.6 ) yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.1 axes.set_ylim( miny, maxy ) libplot.editSpine( axes ) axes.set_xticks( xdata ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.yaxis.set_ticks_position( 'left' ) axes.xaxis.set_ticks_position( 'bottom' ) legend = pyplot.legend( lines, [libplot.properName(otherRefName), libplot.properName("cactusRef")], numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'SNPs Per Site' ) if scale > 0: axes.set_ylabel( 'Snp counts (x%d)' %(10**scale) ) axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawData( axesList, samples, samplesPerPlot, options, proportion, culm ): largeIns = [] #List of proportion of total indel bases that indels >= 1000bp take up, each element is for each sample largeDels = [] if len(axesList) %2 != 0: sys.stderr.write( 'Number of axes must be even. Got %d\n' %len(axesList) ) sys.exit( 1 ) colors = libplot.getColors1() if len(samples) < 1: return if samples[0].attrib["referenceName"] == "reference": colors.pop(0) elif samples[0].attrib["referenceName"] == 'hg19': colors.pop(1) #styles = [] c = -1 textsize = 'x-small' linesDict = {} labelsDict = {} xmax = float('-inf') ymax = float('-inf') xmin = float('inf') ymin = float('inf') for i in range( len(axesList)/2 ): inslines = [] dellines = [] sampleNames = [] insAxes = axesList[ i ] delAxes = axesList[ i + len(axesList)/2 ] startIndex = i * samplesPerPlot endIndex = min( [startIndex + samplesPerPlot, len(samples)] ) for j in range( startIndex, endIndex ): sample = samples[j] sampleNames.append( sample.attrib[ 'sampleName' ] ) insDist = [int(val) for val in sample.attrib[ 'insertionSizeDistribution' ].split()] #insXdata, insYdata = getFreq( insDist, options.xlogscale, options.ylogscale ) insXdata, insYdata = getFreq( insDist, proportion, culm ) delDist = [int(val) for val in sample.attrib[ 'deletionSizeDistribution' ].split()] #delXdata, delYdata = getFreq( delDist, options.xlogscale, options.ylogscale ) delXdata, delYdata = getFreq( delDist, proportion, culm ) #LARGE INDELS, FOR paper STATS, not related to the plot: if proportion and culm: largeIns.append( getLargeIndelProp(insXdata, insYdata) ) largeDels.append( getLargeIndelProp(delXdata, delYdata) ) c += 1 il = insAxes.plot( insXdata, insYdata, color=colors[c] ) dl = delAxes.plot( delXdata, delYdata, color=colors[c] ) inslines.append( il ) dellines.append( dl ) insXmax = xmax delXmax = xmax if len(insXdata) >0: insXmax = max(insXdata) if len(delXdata) > 0: delXmax = max(delXdata) xmax = max( [xmax, insXmax, delXmax] ) insYmax = ymax delYmax = ymax if len(insYdata) >0: insYmax = max(insYdata) if len(delYdata) > 0: delYmax = max(delYdata) ymax = max( [ymax, insYmax, delYmax] ) insXmin = xmin delXmin = xmin if len(insXdata) >0: insXmin = min(insXdata) if len(delXdata) > 0: delXmin = min(delXdata) xmin = min( [xmin, insXmin, delXmin] ) insYmin = ymin delYmin = ymin if len(insYdata) >0: insYmin = min(insYdata) if len(delYdata) > 0: delYmin = min(delYdata) ymin = min( [ymin, insYmin, delYmin] ) #xmax = max([xmax, max(insXdata), max(delXdata)]) #ymax = max([ymax, max(insYdata), max(delYdata)]) linesDict[ i ] = inslines labelsDict[ i ] = sampleNames linesDict[ i + len(axesList)/2 ] = dellines labelsDict[ i + len(axesList)/2 ] = sampleNames #fontp = FontProperties() #fontp.set_size( 'x-small' ) if i == 0: insAxes.set_title( 'Insertions' ) delAxes.set_title( 'Deletions' ) for i in range( len(axesList) ): axes = axesList[ i ] if options.xlogscale == "true": axes.set_xscale('log') #if options.ylogscale == "true" and not proportion: if options.ylogscale == "true": axes.set_yscale('log') libplot.editSpine( axes ) axes.set_xlabel('Length (bp)', size = textsize) if not proportion: axes.set_ylabel('Event number', size = textsize) else: axes.set_ylabel('Number of positions', size = textsize) axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) #if options.xlogscale == "true": # axes.set_xlabel('Log 2 of length (bp)', size = textsize) #else: # axes.set_xlabel('Length (bp)', size = textsize) #if options.ylogscale == "true": # axes.set_ylabel('Log 2 of count', size = textsize) #else: # axes.set_ylabel('Count', size = textsize) #Legend legend = axes.legend( linesDict[ i ], [ libplot.properName(n) for n in labelsDict[ i ]], 'upper right', ncol=3 ) for t in legend.get_texts(): t.set_fontsize('x-small') legend._drawFrame = False if options.xlogscale == "true": scale = len(str(xmax)) -1 xticks = [ 10**x for x in range(scale + 1) ] axes.set_xticks( xticks ) #if options.ylogscale == "true" and not proportion: if options.ylogscale == "true": scale = len(str(ymax)) -1 yticks = [ 10**y for y in range(scale + 1) ] axes.set_yticks( yticks ) for label in axes.get_xticklabels(): #label.set_rotation(75) label.set_fontsize( textsize ) for label in axes.get_yticklabels(): label.set_fontsize( textsize ) #box = axes.get_position() #axes.set_position( [box.x0, box.y0, box.width*0.8, box.height] ) #legend = pyplot.legend( lines, options.keys, numpoints=1, prop=fontP, loc="best", bbox_to_anchor=(1, 0.9) ) #legend._drawFrame = False #libplot.setTicks( axes ) #axes.set_xticks( range( 0, len(samples) ) ) #axes.set_xticklabels( sampleNames ) #for label in axes.xaxis.get_ticklabels(): # label.set_rotation( 90 ) #axes.xaxis.set_ticks_position( 'bottom' ) #axes.yaxis.set_ticks_position( 'left' ) axes.set_ylim( ymin, ymax ) if proportion and not culm: axes.set_xlim( xmin, 100 ) else: axes.set_xlim( xmin, xmax ) #PRINT THE LARGE INDEL STATS: if proportion and culm: sys.stderr.write("largeIndelStats\n") sys.stderr.write("Large insertions: %f\n" %( sum(largeIns)/len(largeIns) )) sys.stderr.write("Large deletions: %f\n" %( sum(largeDels)/len(largeDels) )) largeIndels = [ (largeIns[i] + largeDels[i])/2.0 for i in range(len(largeIns)) ] sys.stderr.write("IndelsAverage: %f\n" %( sum(largeIndels)/len(largeIndels) )) return
def drawPlot(exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) axes = fig.add_axes( [0.12, 0.18, 0.85, 0.75] ) #Set title: titleDict = {'tpfn':'Indel Overlap with dbSNP', 'tp':'True Positives According to dbSNP', 'tp2':'Indel Overlap with dbSNP', 'fn':'False Negatives According to dbSNP', 'total':'Total Indels Called'} axes.set_title( titleDict[type] ) if 'All' not in exps: return samples = getSamplesOrder( exps['All'], type ) if len( samples ) < 1: return samples.append('average') if type != 'fn': samples.append('reference') samples.append('panTro3') xdata = range( 0, len(samples) ) colors = libplot.getColors6() c = -1 lines = [] pointsize = 10.0 offset = 0.15 #exporder = ['All', 'No repeats', 'Wobble', 'Wobble, No repeats'] exporder = ['Wobble', 'Wobble, No repeats', 'All', 'No repeats'] if type == 'fn': exporder = ['Wobble', 'All'] elif type == 'total': exporder = ['All', 'No repeats'] elif type == 'tp2' or type == 'tpfn': exporder = ['Wobble', 'Wobble, No repeats'] #pointsize = 16.0 #offset = 0.3 #Get ydata ydataList, ymin, ymax = getData(samples, exps, type, exporder) scale = -1 if ymin > 1000: scale = len( str(int(ymin)) ) -1 if scale > 0: for exp in ydataList: ydataList[exp] = [ float(y)/10**scale for y in ydataList[exp]] if type == 'tpfn': for j,t in enumerate(['tp', 'fn']): for i, exp in enumerate(exporder): if i > 0 and t == 'fn': continue if t == 'tp': xdatai = [x + offset*(j*2+i) for x in xdata] else: xdatai = [x + offset*(j*2+i) for x in xdata[: len(xdata) -3]] ydata = ydataList["%s.%s" %(exp,t)] c +=1 lines.append(axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none')) else: for i, exp in enumerate(exporder): xdatai = [x + offset*i for x in xdata] ydata = ydataList[exp] c += 1 l = axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none') if type == 'fn': c += 1 lines.append(l) xmin = -0.4 xmax = len(samples) - 1 + offset*len(exps) + offset*3 fontP = FontProperties() fontP.set_size('x-small') if scale > 0: ymin = float(ymin)/10**scale ymax = float(ymax)/10**scale datarange = ymax -ymin ymin = ymin - datarange*0.01 ymax = ymax + datarange*0.01 #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(exporder))/2.0 x = [i - d, i - d] y = [ymin , ymax] axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax) axes.set_ylim(ymin, ymax) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] ) axes.set_xticklabels( [ libplot.properName(s) for s in samples] ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) if type == 'tp': legend = pyplot.legend(lines, ['All', 'No repeats', 'No wobble', 'No wobble, No repeats'], numpoints=1, loc='best', prop=fontP) elif type == 'fn': legend = pyplot.legend( lines, ['All', 'No wobble'], numpoints=1, loc='best', prop=fontP) elif type == 'tpfn': legend = pyplot.legend(lines, ['All, TP', 'No repeats, TP', 'All, FN'], numpoints=1, loc='best', prop=fontP) elif type == 'tp2': legend = pyplot.legend( lines, ['All', 'No repeats'], numpoints=1, loc='best', prop=fontP) else: legend = pyplot.legend( lines, exporder, numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) ylabel = "Percentage" if type == 'total': ylabel = 'Number of indels' if scale > 0: ylabel += '(x%d)' %10**scale axes.set_ylabel(ylabel) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawCompareN50data( axes, xsamples, ysamples, options ): keys = options.keys lineNames = [] colors = libplot.getColors6() c = -1 lines = [] xrefname = xsamples[0].attrib[ 'referenceName' ] yrefname = ysamples[0].attrib[ 'referenceName' ] minval = inf maxval = 0 for key in keys: xdata = [] ydata = [] for xsample in xsamples: name = xsample.attrib[ 'sampleName' ] if name == yrefname: continue ysample = getSample( ysamples, name ) if ysample == None: sys.stderr.write( "%s has %s sample, but %s doesn't\n" % (xrefname, name, yrefname) ) continue xval = int(xsample.attrib[key]) yval = int(ysample.attrib[key]) if xval > 0 and yval > 0: xdata.append(xval) ydata.append(yval) #xdata.append( int(xsample.attrib[ key ]) ) #ydata.append( int(ysample.attrib[ key ]) ) if len(xdata) == 0: continue if options.logscale: xdata = log10( array(xdata) ) ydata = log10( array(ydata) ) c += 1 l = axes.plot( xdata, ydata, color=colors[c], marker=".", markersize=10.0, linestyle='none' ) lines.append(l) lineNames.append( key ) currmax = max( xdata.max(), ydata.max() ) if maxval < currmax: maxval = currmax currmin = min( xdata.min(), ydata.min() ) if minval > currmin: minval = currmin if minval == -inf: minval = 0 #Draw y=x line span = maxval - minval #print "MaxVal: %f, MinVal: %f. Span: %f" % (maxval, minval, span) x = [ minval - span*0.1, maxval + span*0.1 ] y = [ minval - span*0.1, maxval + span*0.1 ] axes.plot( x, y, color="0.9" ) libplot.editSpine( axes ) if options.logscale: #pyplot.ylabel( 'Log 10 of N50' ) pyplot.xlabel( "%s (Log 10)" % libplot.properName(xrefname) ) pyplot.ylabel( "%s (Log 10)" %libplot.properName(yrefname) ) return lines, lineNames, maxval, minval
def drawCompareData( axesList, xstats, ystats, options ): #Only draw the overlapped samples: #colors = libplot.getColors2( len(xstats) ) colors = libplot.getColors1() #colorindex = -1 #colorindex = 0 colorindex = 1 lines = [] sampleNames = [] p0axes = axesList[0] #plot 0 axes (see def 'setCompareAxes') aggData = [] #data points (buckets) of all samples minval = float('inf') for xsample in xstats: ysample = getSample( ystats, xsample.name ) if ysample is None: continue xsample, ysample = intersect(xsample, ysample) #if len(xsample) != len(ysample): # xsample, ysample = intersect(xsample, ysample) # sys.stderr.write( "Error: Two xml files do not have the same number of buckets for sample %s\n" % xsample.name ) #sys.exit( 1 ) data = [] #list of (x,y) tuples colorindex += 1 for i in range( len( xsample ) ): #each bucket if xsample[i].mid != ysample[i].mid: sys.stderr.write( "Two xml files have different buckets\n " ) sys.exit( 1 ) if options.includeCov: data.append( (xsample[i].correctPerSample, ysample[i].correctPerSample) ) else: data.append( (xsample[i].correctPerAligned, ysample[i].correctPerAligned) ) x2data = [ point[0] for point in data ] y2data = [ point[1] for point in data ] l = p0axes.plot( x2data, y2data, color=colors[colorindex], marker='.', markersize=4.0, linestyle='none' ) lines.append( l ) sampleNames.append( xsample.name ) aggData.extend( data ) minval = min( [min(x2data), min(y2data)] ) #Draw the y=x line x = [0, 1] y = [0, 1] p0axes.plot(x, y, color="#919191") fontP = FontProperties() fontP.set_size('small') libplot.editSpine( p0axes ) p0axes.set_title(options.title) p0axes.set_xlabel( libplot.properName(xstats.refname) ) p0axes.set_ylabel( libplot.properName(ystats.refname) ) libplot.setTicks( p0axes ) for l in p0axes.xaxis.get_ticklabels(): l.set_fontsize('small') for l in p0axes.yaxis.get_ticklabels(): l.set_fontsize('small') #legend: legend = p0axes.legend( lines, [ libplot.properName(n) for n in sampleNames], 'lower right', numpoints = 1, prop=fontP, ncol = 2) legend._drawFrame = False #p0axes.set_xlim( -0.005, 1.005 ) #p0axes.set_ylim( -0.005, 1.005 ) ycutoff = minval if options.ycutoff: ycutoff = options.ycutoff p0axes.set_xlim( ycutoff - (1-ycutoff)*0.02, 1 + (1 - ycutoff)*0.01 ) p0axes.set_ylim( ycutoff - (1-ycutoff)*0.02, 1 + (1 - ycutoff)*0.01 ) #box = p0axes.get_position() #p0axes.set_position([box.x0, box.y0, box.width * 0.8, box.height * 0.8]) #legend = pyplot.legend( lines, sampleNames, numpoints = 1, prop= fontP, loc="best", bbox_to_anchor=(1, 0.6)) #legend._drawFrame=False #DRAW AGGREGATE DATA (plot 1 and plot 2): nbins = 20 p1axes = axesList[1] y1min, y1max = drawAggData( p1axes, aggData, 0, 0, 1, ycutoff, nbins ) y1lim = max( abs(y1min), abs(y1max) ) p1axes.set_ylim( -y1lim*1.1, y1lim*1.1 ) p1axes.set_xlim( ycutoff - (1-ycutoff)*0.02, 1 + (1-ycutoff)*0.01 ) #p1axes.set_ylim( y1min*1.1, y1max*1.1 ) for loc, spine in p1axes.spines.iteritems(): if loc == 'left': spine.set_position( ( 'outward', 10 ) ) spine.set_color( 'none' ) p1axes.axhline( 0, color = '#000000' ) p1axes.xaxis.set_major_locator( NullLocator() ) p1axes.xaxis.set_major_formatter( NullFormatter() ) p1axes.yaxis.set_ticks([-y1lim, 0, y1lim]) for l in p1axes.yaxis.get_ticklabels(): l.set_fontsize('small') p2axes = axesList[2] x2min, x2max = drawAggData( p2axes, aggData, 1, 0, 1, ycutoff, nbins ) x2lim = max( abs(x2min), abs(x2max) ) p2axes.set_xlim( -x2lim*1.1, x2lim*1.1 ) p2axes.set_ylim( ycutoff - (1-ycutoff)*0.02, 1 + (1- ycutoff)*0.01 ) #p2axes.set_xlim( x2min*1.1, x2max*1.1 ) for loc, spine in p2axes.spines.iteritems(): if loc == 'bottom': spine.set_position( ( 'outward', 10 ) ) spine.set_color( 'none' ) p2axes.axvline( 0, color = '#000000' ) p2axes.yaxis.set_major_locator( NullLocator() ) p2axes.yaxis.set_major_formatter( NullFormatter() ) p2axes.xaxis.set_ticks([-x2lim, 0, x2lim]) for l in p2axes.xaxis.get_ticklabels(): l.set_fontsize('small') l.set_rotation( 45 ) return
def drawData( axes, stats, isAbs, ycutoff ): #if isAbs, draw absolute values. If not, draw proportion (relative values) lines = [] linenames = [] ydataList = [] #initialize ydataList: #for i in range( len(stats[0].baseCoverages) - len(stats), len(stats[0].baseCoverages) ): #for i in range( len(stats) -1 ): #each coverage level for i in range( len(stats) -1 - 2 ): #each coverage level (num samples - average, reference, minusOtherReference ydata = [] for j in range( len(stats) ):#each sample if isAbs: #if stats[j].name == 'aggregate': # ydata.append( stats[j].baseCoverages[i]/(len(stats) -1) ) #else: ydata.append( stats[j].baseCoverages[i] ) else: ydata.append( stats[j].relativeBaseCoverages[i] ) ydataList.append(ydata) #colors = libplot.getColors2( len(stats) ) colors = libplot.getColors3() colorindex = 0 x = arange( len(stats) ) #x axis represents the samples barwidth = 0.6 #add bottom-most bar (number of bases that are in all samples) l = axes.bar( x, ydataList[ len(ydataList) - 1 ], barwidth, color = colors[colorindex], ec="w" ) lines.append( l[0] ) linenames.append( "%d" % len(ydataList) ) culmulativeList = ydataList[ len(ydataList) - 1 ] for i in range( len(ydataList) - 2, -1, -1 ): colorindex += 1 l = axes.bar( x, ydataList[i], barwidth, color = colors[colorindex], bottom=culmulativeList, ec="w" ) lines.append( l[0] ) linenames.append( "%d" % (i + 1) ) #Update cumulative list: for j in range( len(culmulativeList) ): culmulativeList[j] += ydataList[i][j] #l = axes.fill_between( x=range(len(ydataList[i])), y1=ydataList[i], y2=[0] * len(ydataList[i]) , facecolor=colors[colorindex], linewidth = 0.0) libplot.editSpine( axes ) axes.set_title("Sample Coverage") #TO BE NAMED!!! pyplot.xlabel("Samples") if isAbs: pyplot.ylabel("Number of positions") else: pyplot.ylabel("Proportion of total positions") #set ticks: samples = [] for sample in stats: samples.append( libplot.properName( sample.name ) ) fontP = FontProperties() fontP.set_size('small') pyplot.xticks( x + barwidth/2., samples, rotation=90, fontproperties=fontP ) pyplot.yticks( fontproperties=fontP ) #for label in axes.yaxis.get_ticklabels(): # label.fontproperties = fontP # label.set_rotation( 45 ) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) miny = ycutoff if not isAbs: axes.set_ylim(ycutoff, 1) #axes.set_ylim(0, 1) axes.set_xlim(-0.5, len(stats) ) axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) #Legend: box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.8, box.height] ) lines.reverse() linenames.reverse() legend = axes.legend( lines, [libplot.properName(n) for n in linenames], prop=fontP, loc="best", bbox_to_anchor=(1,0.75) ) legend._drawFrame=False return lines, linenames
def drawScatter( axes, options, stats, type, cumulative ): if len(stats) < 4: return title = "Distribution of Positions Shared Among Samples" if cumulative: title = "Cumulative Distribution of Positions Shared Among Samples" axes.set_title(title) #TO BE NAMED #samples = ["panTro3", "minusOtherReference", "average", "reference", "hg19"] samples = ["reference", "hg19", "panTro3", "average"] if type == 'noHg19': samples = ["minusOtherReference"] xdata = range( 0, len(stats) -4 ) #print xdata ydataList = [] miny = float('inf') maxy = float('-inf') for name in samples: for s in stats: if s.name == name: ydata = s.baseCoverages[: len(stats) -4] if cumulative: ydata = [ sum(ydata[i:]) for i in xrange( len(ydata) ) ] ydataList.append( ydata ) miny = min( [miny, min(ydata)] ) maxy = max( [maxy, max(ydata)] ) break lines = [] #colors = libplot.getColors0() colors =["#E31A1C", "#1F78B4", "#3D3D3D", "#4DAF4A"] #ConsensusRef, GRCh37, chimp, average c = -1 offset = 0.12 axes.set_yscale('log') #if type == 'noHg19': # axes.set_yscale('log') for i in xrange( len(samples) ): xdatai = [x + offset*i for x in xdata] ydata = ydataList[i] c += 1 if i == 0: axes.plot(xdatai[1:], ydata[1:], color="#CCCCCC", linestyle='-', linewidth=0.002) else: axes.plot(xdatai, ydata, color="#CCCCCC", linestyle='-', linewidth=0.002) l = axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=12.0, linestyle='none') lines.append(l) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - 10 maxy = maxy + yrange*0.1 xmin = -0.4 xmax = len(stats) - 4 -1 + offset*len(samples) + offset libplot.editSpine(axes) axes.set_xticks( [ i + offset*(len(samples)/2.0 ) for i in range(0, len(stats) -4)] ) axes.set_xticklabels( range(1, len(stats) -2) ) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) scale = len(str( int(maxy) )) - 1 ylabel = "Number of positions" if type == "noHg19": yticks = [ 10**y for y in range(scale + 1) ] else: #yticks = [ 10**y for y in range(scale + 2) ] yticks = [] for y in range(scale + 1): for y1 in range(1,10): yticks.append(y1*(10**y)) axes.set_yticks( yticks ) minorLocator = LogLocator( base=10, subs = range(1, 10) ) axes.yaxis.set_minor_locator( minorLocator ) #else: # yticks = range(0, int(maxy), 10**scale) # yticklabels = [ y/(10**scale) for y in yticks ] # axes.set_yticks( yticks ) # axes.set_yticklabels( yticklabels ) # ylabel += " (x%s)" %( libplot.prettyInt(10**scale) ) #ylabel += " (in millions)" axes.set_xlim(xmin, xmax) if type == "noHg19": axes.set_ylim(miny, maxy) else: axes.set_ylim(10000, 1000000)#HACK if type != 'noHg19': legend = pyplot.legend( lines, [libplot.properName(s) for s in samples], numpoints=1, loc='lower right', prop=fontP ) legend._drawFrame = False axes.set_xlabel( 'Number of samples' ) #if type == "noHg19": # ylabel += " (x %d)" %(10**(scale -1)) axes.set_ylabel( ylabel ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) return
def drawCompareData2( axes, options, stats, isAbs ): if len(stats) == 0: return #if isAbs, draw absolute values. If not, draw proportion (relative values) lines = [] linenames = [ stats[0].otherReferenceName, stats[0].referenceName, "total" ] #X data: x1data = [] currx = -1 for i,s in enumerate( stats ): if s.name == 'all': continue if s.name == 'average' or s.name == 'panTro3': currx += 1.5 else: currx += 1 x1data.append( currx ) y1data = [] for sample in stats: if sample.name == 'all': continue if isAbs: y1data.append( sample.referenceBasesMapped ) else: y1data.append( 100.0*sample.referenceBasesMapped/sample.totalBases ) barwidth = 0.6 #barwidth = 0.25 l1 = axes.bar( x1data, y1data, barwidth, color = "#E31A1C", ec="w" ) lines.append( l1[0] ) libplot.editSpine( axes ) axes.set_title("Sample Coverage") #TO BE NAMED #set ticks: samples = [] for sample in stats: if sample.name == 'all': continue samples.append( libplot.properName(sample.name) ) fontP = FontProperties() fontP.set_size('small') pyplot.xticks( [x + barwidth/2.0 for x in x1data], samples, rotation=45, fontproperties=fontP ) pyplot.yticks( fontproperties=fontP ) #HACK: yticks = range(2000000, 6000000, 500000) yticklabels = [ float(y)/1000000 for y in yticks ] axes.set_yticks(yticks) axes.set_yticklabels(yticklabels) pyplot.xlabel("Samples") pyplot.ylabel("Number of positions (in millions)") axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) miny = min( y1data ) miny = miny*0.9 axes.set_ylim( miny, max(y1data) ) axes.set_xlim(-0.5, max(x1data) + 0.5 ) axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) #Legend: box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.95, box.height*0.9] ) #legend = axes.legend( lines, [libplot.properName(n) for n in linenames], prop=fontP, loc="best", bbox_to_anchor=(0.2, 1) ) #legend._drawFrame=False return
def drawCompareData( axes, options, stats, isAbs ): if len(stats) == 0: return #if isAbs, draw absolute values. If not, draw proportion (relative values) lines = [] linenames = [ stats[0].otherReferenceName, stats[0].referenceName, "total" ] barwidth = 0.25 #X data: x3data = [] #avgIndex = -1 currx = -1 #xVer = [] #location (x) of vertical lines to separate between human samples | avr, all | chimp for i,s in enumerate( stats ): #if s.name == 'average': # avgIndex = i if s.name == 'average' or s.name == 'panTro3': currx += 1 + 1.5*barwidth #xVer.append( currx - (1.0 + 1.5*barwidth - 3*barwidth)/2.0 ) else: currx += 1 x3data.append( currx ) #print x1data x2data = [ x + barwidth for x in x3data ] x1data = [ x + barwidth for x in x2data ] if isAbs: y1data = [ sample.otherReferenceBasesMapped for sample in stats ] y2data = [ sample.referenceBasesMapped for sample in stats ] y3data = [ sample.totalBases for sample in stats ] else: y1data = [ 100.0*sample.otherReferenceBasesMapped/sample.totalBases for sample in stats ] y2data = [ 100.0*sample.referenceBasesMapped/sample.totalBases for sample in stats ] y3data = [ 100.0*sample.totalBases/sample.totalBases for sample in stats ] #Average aggregate data: #if avgIndex > 0: # y1data[ avgIndex ] /= float(avgIndex) # y2data[ avgIndex ] /= float(avgIndex) # y3data[ avgIndex ] /= float(avgIndex) colors =["#1F78B4", "#E31A1C", "#4DAF4A"] #colors =["#1B9E77", "#D95F02", "#7570B3"] #colors =["#EDF8B1", "#7FCDBB", "#2C7FB8"] #colors =["#A1DAB4", "#41B6C4", "#225EA8"] l1 = axes.bar( x1data, y1data, barwidth, color = colors[0], ec="w" ) lines.append( l1[0] ) l2 = axes.bar( x2data, y2data, barwidth, color = colors[1], ec="w" ) lines.append( l2[0] ) l3 = axes.bar( x3data, y3data, barwidth, color = colors[2], ec="w" ) lines.append( l3[0] ) libplot.editSpine( axes ) axes.set_title("Sample Coverage") #TO BE NAMED #set ticks: samples = [] for sample in stats: samples.append( libplot.properName(sample.name) ) fontP = FontProperties() fontP.set_size('small') #pyplot.xticks( x + barwidth/2., samples, rotation=45, fontproperties=fontP ) pyplot.xticks( x2data, samples, rotation=45, fontproperties=fontP ) pyplot.yticks( fontproperties=fontP ) #HACK: yticks = range(2000000, 6000000, 500000) yticklabels = [ float(y)/1000000 for y in yticks ] axes.set_yticks(yticks) axes.set_yticklabels(yticklabels) pyplot.xlabel("Samples") pyplot.ylabel("Number of positions (in millions)") axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) miny = min( [min(y1data), min(y2data), min(y3data)] ) miny = miny*0.9 maxy = max([max(y1data), max(y2data), max(y3data)]) #Draw vertical lines: #for x in xVer: # axes.plot([x, x], [miny, maxy], color="#A8A8A8") axes.set_ylim( miny, maxy ) axes.set_xlim(-0.5, max(x1data) + 0.5 ) axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) #Legend: box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.95, box.height*0.9] ) legend = axes.legend( lines, [libplot.properName(n) for n in linenames], prop=fontP, loc="best", bbox_to_anchor=(0.2, 1) ) legend._drawFrame=False return