def main(): usage = ( 'usage: %prog [options]\n\n' '%prog takes in a reference genome name ( --referenceGenome ),\n' 'optionally a directory where annotation wig pickles are stored ( --annotPickleDir [optional] ),\n' 'a directory where maf wig pickles are stored ( --mafPickleDir ), a paired set of chromosome names\n' '( --chrNames comma separated ) and chromosome lengths ( --chrLengths comma separated ) and \n' 'then various other options specifed below to draw a figure.') data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) las.initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( options, parser, data ) las.checkOptions( options, parser ) lpt.checkOptions( options, parser ) loadAnnots( options, data ) loadMafs( options, data ) normalizeData( options, data ) transformData( options, data ) figHeight = ( data.numberOfMafs + len( data.annotationOrder ) + 0.5 ) / 4.0 fig, pdf = lpt.initImage( 8.0, figHeight, options, data ) axDict = establishAxes( fig, options, data ) labelAxes( fig, axDict, options, data ) drawAnnotations( axDict, options, data ) drawMafs( axDict, options, data ) drawLegend( options, data ) setAxisLimits( axDict, options, data ) lpt.writeImage( fig, pdf, options )
def main(): usage = ( 'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n' '%prog takes in a directory of substitution stats files ( --subStatsDir )\n' 'with filenames as NAME.subStats.[upper|lower].xml and produces a plot showing\n' 'the difference in subs in hap1 versus hap2.\n') data = Data() parser = OptionParser(usage=usage) initOptions(parser) las.initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) las.checkOptions(options, parser) lpt.checkOptions(options, parser) fig, pdf = lpt.initImage(11., 8., options, data) axDict = establishAxis(fig, options, data) assembliesDict = {} assembliesDict = cssp.readSubStatsDir(assembliesDict, options) valuesList = createValsList(assembliesDict.values(), options) valuesList = sorted(valuesList, key=lambda x: float(x[1]) / x[2], reverse=False) drawData(valuesList, axDict, options, data) lpt.writeImage(fig, pdf, options)
def main(): usage = ( 'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n' '%prog takes in a directory of substitution stats files ( --subStatsDir )\n' 'with filenames as NAME.subStats.[upper|lower].xml and produces a plot.') data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) las.initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( options, parser ) las.checkOptions( options, parser ) lpt.checkOptions( options, parser ) if not options.outputRanks: fig, pdf = lpt.initImage( 9., 11., options, data ) axDict = establishAxes( fig, options, data ) assembliesDict = {} assembliesDict = readSubStatsDir( assembliesDict, options ) sumErrors( assembliesDict, options ) normalizeData( assembliesDict, options ) sortOrder = sorted( assembliesDict, key=lambda key: assembliesDict[ key ].allLo, reverse=False ) if options.outputRanks: rankings( assembliesDict, sortOrder, options, data ) return drawData( assembliesDict, sortOrder, axDict, options, data ) lpt.writeImage( fig, pdf, options )
def main(): usage = ( "usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ [options]\n\n" "%prog takes a directory of contig path stats xml files\n" "( --statsScaffoldsContigPathDir ) named as NAME.contigPathStats.xml and creates a plot." ) data = Data() parser = OptionParser(usage=usage) initOptions(parser) las.initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() las.checkOptions(options, parser) lpt.checkOptions(options, parser) checkOptions(options, parser) assembliesDict = readDir(options.statsScaffoldsContigPathDir, options) assembliesList = assembliesDict.values() if len(assembliesList) <= 20: fig, pdf = lpt.initImage(14.0, 8.0, options, data) else: fig, pdf = lpt.initImage(14.0, 24.0, options, data) axDict = establishAxis(len(assembliesList), fig, options, data) assembliesList = createXYData(assembliesList, options) assembliesList = normalizeDist(assembliesList, options) assembliesList = sorted(assembliesList, key=lambda x: max(x.yData["insertionErrorSizeDistribution"]), reverse=True) drawData(assembliesList, axDict, options, data) drawLegend(assembliesList, axDict, options, data) lpt.writeImage(fig, pdf, options)
def main(): usage = ( 'usage: %prog [options] < rankedAssemblies.txt\n\n' '%prog takes via STDIN a list of coverage values, each line formatted as:\n' '#assembly ave.CovBothGenomes hap1 hap2 delta bac\n' 'P1 .9885178 .9888077 .9882265 5.782e-04 0\n' 'B1 .9869388 .9871948 .9866798 5.257e-04 .9978954\n' 'F5 .9869094 .9872685 .9865338 7.295e-04 .9993371\n' '...\n' 'And produces a plot showing the Total and Bacterial coverages for\n' 'all assemblies in the input.') data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( options, parser ) lpt.checkOptions( options, parser ) valuesList = readStream( options ) valuesList = sorted( valuesList, key=lambda key: key.tot, reverse=True ) fig, pdf = lpt.initImage( 8.0, 6.0, options, data ) ax = establishAxis( fig, options, data ) drawData( valuesList, ax, options ) lpt.writeImage( fig, pdf, options )
def main(): usage = ( 'usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ ' '--statsContigssContigPathDir=path/to/dir/ [options]\n\n' '%prog takes a directory of scaffold-alignment contig path stats xml files\n' '( --statsScaffoldsContigPathDir ) named as NAME.pathStats.xml, contig-alignment ' 'contig path stats xml files ( --statsContigsContigPathDir ) named as NAME.pathStats.xml,' ' and creates a plot.\n') data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) cscp.initOptions( parser ) las.initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() cscp.checkOptions( options, parser ) las.checkOptions( options, parser ) lpt.checkOptions( options, parser ) checkOptions( options, parser ) assembliesList = readDirs( options ) assembliesList = sorted( assembliesList, key=lambda x: x.valuesDict[ options.sortOn ], reverse=True ) maxesMax, minsMin = findMaxMin( assembliesList, options ) if options.outputRanks: rankings( assembliesList, options ) return fig, pdf = lpt.initImage( 10.0, 8.0, options, data ) axDict = establishAxis( fig, options, data ) drawData( assembliesList, maxesMax, minsMin, axDict, options ) lpt.writeImage( fig, pdf, options )
def drawCompareN50Plot( options, xsamples, ysamples ): #Sort xsamples and ysamples in the order of the sampleNames: xsamples = sorted( xsamples, key=lambda s: s.attrib[ 'sampleName' ] ) if len(xsamples) < 1 or len(ysamples) < 1: return xrefname = xsamples[0].attrib[ 'referenceName' ] yrefname = ysamples[0].attrib[ 'referenceName' ] options.out = os.path.join( options.outdir, options.prefix + '_' + xrefname + '_' + yrefname ) fig, pdf = libplot.initImage( 8.0, 8.0, options ) axes = fig.add_axes( [0.12, 0.1, 0.85, 0.85] ) lines, lineNames, maxval, minval = drawCompareN50data( axes, xsamples, ysamples, options ) if len(lines) == 0: sys.stderr.write('Comparing N50 stats of %s and %s: All values are 0, no plot created\n' %(xrefname, yrefname) ) return title = "N50" #% ( libplot.properName(xrefname), libplot.properName(yrefname) ) axes.set_title(title) #Legend fontP = FontProperties() fontP.set_size( 'small' ) box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.8, box.height*0.8] ) legend = pyplot.legend( lines, lineNames, numpoints=1, prop=fontP, loc="best", bbox_to_anchor=(1, 0.5) ) legend._drawFrame = False libplot.setTicks( axes ) span = maxval - minval axes.set_xlim( minval - span*0.1, maxval + span*0.1 ) axes.set_ylim( minval - span*0.1, maxval + span*0.1 ) libplot.writeImage( fig, pdf, options )
def main(): usage = ( 'usage: %prog [options] file1.xml file2.xml\n\n' '%prog takes in contiguous path statistics file(s)\n' 'and creates an image file.' ) data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) las.initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( args, options, parser ) las.checkOptions( options, parser ) lpt.checkOptions( options, parser ) if not options.outputRanks: fig, pdf = lpt.initImage( 11., 8.0, options, data ) # 8 axDict = establishAxes( fig, options, data ) data.statsList, data.xData = readFiles( options ) for i in xrange( 0, len( data.statsList )): # ensure that the buckets are all in order by their midpoint. data.statsList[i] = sorted( data.statsList[i], key=lambda x: x.mid, reverse=False ) if options.outputRanks: ranks = rankFiles( options, data ) printRanks( ranks, options, data ) sys.exit(0) drawData( axDict['main'], data.xData, data.statsList, options, data ) drawLegend( options, data ) drawAxisLabels( fig, options, data ) setAxisLimits( axDict['main'], data.xData, options, data ) establishTicks( axDict['main'], data.xData, options, data ) lpt.writeImage( fig, pdf, options )
def main(): usage = ('usage: %prog [options] --dir=path/to/dir/\n\n' '%prog takes in a copy statistics file\n' 'and creates an image file.') data = Data() parser = OptionParser(usage=usage) initOptions(parser) las.initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(args, options, parser) las.checkOptions(options, parser) lpt.checkOptions(options, parser) if not options.outputRanks: fig, pdf = lpt.initImage(8.0, 10.0, options, data) stats = readFiles(options) sortedOrder = sorted(stats.values(), key=lambda x: x.sumLower, reverse=False) if options.outputRanks: rankings(sortedOrder, options, data) return axDict = establishAxes(fig, options, data) drawData(axDict, sortedOrder, options, data) drawLegend(options, data) drawAxisLabels(axDict, stats, options, data) setAxisLimits(axDict, options, data) lpt.writeImage(fig, pdf, options)
def main(): usage = ( 'usage: %prog [options] --dir=path/to/dir/\n\n' '%prog takes in a copy statistics file\n' 'and creates an image file.' ) data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) las.initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( args, options, parser ) las.checkOptions( options, parser ) lpt.checkOptions( options, parser ) if not options.outputRanks: fig, pdf = lpt.initImage( 8.0, 10.0, options, data ) stats = readFiles( options ) sortedOrder = sorted( stats.values(), key=lambda x: x.sumLower, reverse=False ) if options.outputRanks: rankings( sortedOrder, options, data ) return axDict = establishAxes( fig, options, data ) drawData( axDict, sortedOrder, options, data ) drawLegend( options, data ) drawAxisLabels( axDict, stats, options, data ) setAxisLimits( axDict, options, data ) lpt.writeImage( fig, pdf, options )
def main(): usage = ( 'usage: %prog [options] < rankedAssemblies.txt\n\n' '%prog takes via STDIN a list of coverage values, each line formatted as:\n' '#assembly ave.CovBothGenomes hap1 hap2 delta bac\n' 'P1 .9885178 .9888077 .9882265 5.782e-04 0\n' 'B1 .9869388 .9871948 .9866798 5.257e-04 .9978954\n' 'F5 .9869094 .9872685 .9865338 7.295e-04 .9993371\n' '...\n' 'And produces a plot showing the Total and Bacterial coverages for\n' 'all assemblies in the input.') data = Data() parser = OptionParser(usage=usage) initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) lpt.checkOptions(options, parser) valuesList = readStream(options) valuesList = sorted(valuesList, key=lambda key: key.tot, reverse=True) fig, pdf = lpt.initImage(8.0, 6.0, options, data) ax = establishAxis(fig, options, data) drawData(valuesList, ax, options) lpt.writeImage(fig, pdf, options)
def main(): usage = ( 'usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ [options]\n\n' '%prog takes a directory of contig path stats xml files\n' '( --statsScaffoldsContigPathDir ) named as NAME.contigPathStats.xml and creates a plot.') data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) las.initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() las.checkOptions( options, parser ) lpt.checkOptions( options, parser ) checkOptions( options, parser ) assembliesDict = readDir( options.statsScaffoldsContigPathDir, options ) assembliesList = assembliesDict.values() if len(assembliesList) <= 20: fig, pdf = lpt.initImage( 14.0, 8.0, options, data ) else: fig, pdf = lpt.initImage( 14.0, 24.0, options, data ) axDict = establishAxis( len(assembliesList), fig, options, data ) assembliesList = createXYData( assembliesList, options ) assembliesList = normalizeDist( assembliesList, options ) assembliesList = sorted( assembliesList, key=lambda x: max(x.yData['insertionErrorSizeDistribution']), reverse=True ) drawData( assembliesList, axDict, options, data ) drawLegend( assembliesList, axDict, options, data ) lpt.writeImage( fig, pdf, options )
def main(): usage = ( 'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n' '%prog takes in a directory of substitution stats files ( --subStatsDir )\n' 'with filenames as NAME.subStats.[upper|lower].xml and produces a plot showing\n' 'the difference in subs in hap1 versus hap2.\n') data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) las.initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( options, parser ) las.checkOptions( options, parser ) lpt.checkOptions( options, parser ) fig, pdf = lpt.initImage( 11., 8., options, data ) axDict = establishAxis( fig, options, data ) assembliesDict = {} assembliesDict = cssp.readSubStatsDir( assembliesDict, options ) valuesList = createValsList( assembliesDict.values(), options ) valuesList = sorted( valuesList, key = lambda x: float(x[1])/x[2], reverse=False) drawData( valuesList, axDict, options, data ) lpt.writeImage( fig, pdf, options )
def getSummary( runs, analysis, options, sortId, refnames ): analysisName = analysis.attrib[ 'name' ] options.out = os.path.join( options.outdir, 'summary_%s_%d' %(analysisName, sortId) ) #fig, pdf = libplot.initImage( 8.0, 10.0, options ) fig, pdf = libplot.initImage( 16.0, 20.0, options ) drawSum( fig, runs, analysis, options, refnames) libplot.writeImage( fig, pdf, options )
def drawScatterPlot( options, stats, type, cumulative ): prefix = "coverageScatter_%s" %type if cumulative: prefix += "_culm" options.out = os.path.join( options.outdir, "%s" %(prefix) ) fig, pdf = libplot.initImage( 12.0, 8.0, options ) axes = fig.add_axes( [0.1, 0.15, 0.85, 0.75] ) drawScatter( axes, options, stats, type, cumulative ) libplot.writeImage( fig, pdf, options )
def drawCoveragePlot( options, stats, isAbs, ycutoff ): prefix = "coverage_%.2f_" %ycutoff if not isAbs: prefix = "rel_coverage_%.2f_" %ycutoff options.out = os.path.join(options.outdir, prefix + stats[0].referenceName) fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.14, 0.2, 0.8, 0.6] ) #axes = libplot.setAxes( fig ) lines, linenames = drawData( axes, stats, isAbs, ycutoff ) libplot.writeImage( fig, pdf, options )
def drawCompareCoveragePlot2( options, stats, isAbs ): if len(stats) == 0: return prefix = "cmpCoverage2_" if not isAbs: prefix = "cmpRelCoverage2_" options.out = os.path.join( options.outdir, "%s%s_%s" %(prefix, stats[0].referenceName, stats[0].otherReferenceName) ) fig, pdf = libplot.initImage( 12.0, 8.0, options ) axes = fig.add_axes( [0.09, 0.2, 0.9, 0.6] ) drawCompareData2( axes, options, stats, isAbs ) libplot.writeImage( fig, pdf, options )
def drawPlot(samplesList, sampleNames, options): options.out = os.path.join(options.outdir, "nonLinearBp") fig, pdf = libplot.initImage(12.0, 8.0, options) axes = fig.add_axes([0.09, 0.2, 0.9, 0.6]) list1 = samplesList[0] list2 = samplesList[1] if len(list1) < 1 or len(list2) < 1: return refname1 = list1[0].attrib['referenceName'] refname2 = list2[0].attrib['referenceName'] lines = [] barwidth = 0.3 y1data = [] y2data = [] for sample in sampleNames: for s in list1: if sample == s.attrib['sampleName']: y1data.append( int(s.attrib['totalIntraJoin']) ) for s in list2: if sample == s.attrib['sampleName']: y2data.append( int(s.attrib['totalIntraJoin']) ) x1data = range( len(y1data) ) x2data = [ x+ barwidth for x in x1data] colors =["#1F78B4", "#E31A1C"] l1 = axes.bar( x1data, y1data, barwidth, color = colors[0], ec='w') lines.append(l1[0]) l2 = axes.bar( x2data, y2data, barwidth, color = colors[1], ec='w') lines.append(l2[0]) libplot.editSpine(axes) axes.set_title("Non-linear Breakpoints") #set ticks xlabels = [ libplot.properName(name) for name in sampleNames ] fontP = FontProperties() fontP.set_size('small') pyplot.xticks(x2data, xlabels, rotation=45, fontproperties=fontP) pyplot.yticks( fontproperties = fontP ) pyplot.xlabel("Samples") pyplot.ylabel("Number of breakpoints") axes.xaxis.set_ticks_position('bottom') axes.yaxis.set_ticks_position('left') axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25) legend = axes.legend( lines, [libplot.properName(refname1), libplot.properName(refname2)], prop=fontP, loc="best" ) legend._drawFrame = False libplot.writeImage(fig, pdf, options)
def drawCompareContiguityPlot( options, xstats, ystats ): #options.out = os.path.join(options.outdir, "contiguity_" + xstats.refname + "_" + ystats.refname) options.out = os.path.join(options.outdir, options.exp + "_" + xstats.refname + "_" + ystats.refname) if options.includeCov: options.out = options.out + "_incCov" fig, pdf = libplot.initImage( 8.0, 8.0, options ) #Set axes: #axes = fig.add_axes( [0.12, 0.1, 0.85, 0.85] ) axesList = setCompareAxes( fig ) drawCompareData( axesList, xstats, ystats, options ) libplot.writeImage( fig, pdf, options )
def drawCnvPlot( sample, options ): sampleName = sample.attrib[ 'sampleName' ] #print sampleName options.out = os.path.join( options.outdir, 'cnv_%s' %sampleName ) fig, pdf = libplot.initImage( 11.0, 3.25, options ) title = "Copy Number Variation between %s and %s" % ( libplot.properName(sampleName), libplot.properName(sample.attrib['referenceName']) ) cnvDict, minCn, maxCn = getSampleData( sample ) axDict = setAxes( fig, cnvDict.keys(), options ) for r in axDict: if r != 'bg': drawOneCnvPlot( r, axDict[ r ], cnvDict[ r ], options, minCn, maxCn ) drawAxisLabels( axDict, cnvDict, options, title, maxCn ) setAxisLimits( axDict, minCn, maxCn ) libplot.writeImage( fig, pdf, options )
def main(): usage = ( '%prog --dir=path/to/dir --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n' '%prog takes an aggregate directory ( --dir ) and a mode \n' '( --mode ) and then produces a pretty picture.' ) data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( options, parser ) lpt.checkOptions( options, parser ) readFiles( options, data ) lpt.initImage( 7.0, 8.0, options, data ) establishAxis( options, data ) drawPlots( options, data ) lpt.writeImage( options, data )
def main(): usage = ( '%prog --dir=path/to/dir --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n' '%prog takes an aggregate directory ( --dir ) and a mode \n' '( --mode ) and then produces a pretty picture.') data = Data() parser = OptionParser(usage=usage) initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) lpt.checkOptions(options, parser) readFiles(options, data) lpt.initImage(7.0, 8.0, options, data) establishAxis(options, data) drawPlots(options, data) lpt.writeImage(options, data)
def drawContiguityPlot( options, stats ): #options.out = os.path.join(options.outdir, "contiguity_" + stats.refname) #name of output file options.out = os.path.join(options.outdir, options.exp + "_" + stats.refname) #name of output file if options.includeCov: options.out = options.out + "_incCov" options.ycutoff = 0.7 #HACK else:#HACK options.ycutoff = 0.95 #HACK fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = libplot.setAxes( fig ) lines, sampleNames, ymin = drawData( axes, stats, options ) drawLegend( axes, lines, sampleNames, options ) if options.ycutoff: setAxisLimits( axes, options.ycutoff ) else: setAxisLimits( axes, ymin*0.98 ) libplot.setTicks( axes ) libplot.writeImage( fig, pdf, options )
def drawN50Plot( options, samples ): #sort samples: samples = sorted( samples, key=lambda s: int(s.attrib[ options.sortkey ]), reverse=True ) sampleNames = getSampleNames( samples ) if len(samples) < 1: return refname = samples[0].attrib[ 'referenceName' ] options.out = os.path.join( options.outdir, options.prefix + '_' + refname ) fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.1, 0.85, 0.85] ) title = "N50" lines = drawN50data( axes, samples, options ) axes.set_title(title) #Legend fontP = FontProperties() fontP.set_size( 'small' ) box = axes.get_position() axes.set_position( [box.x0, box.y0, box.width*0.8, box.height] ) legend = pyplot.legend( lines, options.keys, numpoints=1, prop=fontP, loc="best", bbox_to_anchor=(1, 0.9) ) legend._drawFrame = False #libplot.setTicks( axes ) axes.set_xticks( range( 0, len(samples) ) ) axes.set_xticklabels( [ libplot.properName(n) for n in sampleNames ] ) for label in axes.xaxis.get_ticklabels(): label.set_rotation( 90 ) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) axes.set_xlim( -0.5, len(samples) - 0.5 ) #axes.set_ylim( -20, 6000 ) libplot.writeImage( fig, pdf, options )
def main(): usage = ( 'usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ ' '--statsContigssContigPathDir=path/to/dir/ ' '--statsScaffoldsContigPathPhasingDir=path/to/dir/ [options]\n\n' '%prog takes a directory of scaffold-alignment contig path stats xml files\n' '( --statsScaffoldsContigPathDir ) named as NAME.pathStats.xml, contig-alignment ' 'contig path stats xml files ( --statsContigsContigPathDir ) named as NAME.pathStats.xml,' 'scaffold-alignment contig path phasing stats xml files ( --statsScaffoldsContigPathPhasingDir )' ' named as NAME.hap%d.pathStats.xml and creates a plot.\n') data = Data() parser = OptionParser(usage=usage) initOptions(parser) cscp.initOptions(parser) las.initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() cscp.checkOptions(options, parser) las.checkOptions(options, parser) lpt.checkOptions(options, parser) checkOptions(options, parser) assembliesList = readData(options) assembliesList = sorted(assembliesList, key=lambda x: x.valuesDict[options.sortOn], reverse=True) maxesMax, minsMin = findMaxMin(assembliesList, options) if options.outputRanks: rankings(assembliesList, options) return fig, pdf = lpt.initImage(10.0, 8.0, options, data) axDict = establishAxis(fig, options, data) drawData(assembliesList, maxesMax, minsMin, axDict, options) lpt.writeImage(fig, pdf, options)
def main(): usage = ('usage: %prog --scaffoldsFile=sFile.txt --contigsFile=cFile.txt --size=N --title=TITLE\n\n' '%prog takes in a scaffolds file (--scaffoldsFile), a contigs\n' 'file (--contigs), the size of the genome (--size) and a title (--title)\n' 'and then produces an N50 style figure.') data = Data() parser = OptionParser(usage=usage) initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) lpt.checkOptions(options, parser) scaffolds = readFile(options.scaffoldsFile) contigs = readFile(options.contigsFile) pScaffs, pContigs = processData(scaffolds, contigs, options) fig, pdf = lpt.initImage(8.0, 5.0, options, data) ax = establishAxis(fig, options) drawData(pScaffs, pContigs, ax, options) lpt.writeImage(fig, pdf, options)
def drawPlots( options, samples, outname, proportion, culm ): #sort samples: #samples = sorted( samples, key=lambda s: s.attrib[ 'sampleName' ] ) if len(samples) < 1: return refname = samples[0].attrib[ 'referenceName' ] sys.stderr.write("%s\n" %refname) #options.out = os.path.join( options.outdir, 'indelDist_' + refname ) options.out = os.path.join( options.outdir, 'indelDist_' + outname ) if proportion: options.out = os.path.join( options.outdir, 'indelDist2_' + outname ) if culm: options.out += '_culm' fig, pdf = libplot.initImage( 8.0, 10.0, options ) samplesPerPlot = 10 axesList = setAxes( fig, len(samples), samplesPerPlot ) lines = drawData( axesList, samples, samplesPerPlot, options, proportion, culm ) libplot.writeImage( fig, pdf, options )
def main(): usage = ( '%prog --file=file.txt --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n' '%prog takes an aggregate text file ( --file ) and a mode \n' '( --mode ) and then produces a pretty picture.') data = Data() parser = OptionParser(usage=usage) initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) lpt.checkOptions(options, parser) fig, pdf = lpt.initImage(8.0, 10.0, options, data) axDict = establishAxes(fig, options, data) data.valuesDict = readFile(options.file, options) data.xData = data.valuesDict['columnLength'] if options.mode != 'contamination': data.valuesDict = normalizeDataNormalMode(data.valuesDict, options, data) else: normalizeDataContaminationMode(options, data) setAxisLimits(axDict['main'], axDict['crazy'], axDict['blowUp'], data.xData, options, data) drawData(axDict['main'], axDict['crazy'], axDict['blowUp'], data.xData, data.valuesDict, options, data) drawLegend(options, data) drawAxisLabels(fig, options, data) setAxisLimits(axDict['main'], axDict['crazy'], axDict['blowUp'], data.xData, options, data) establishTicks(axDict['main'], axDict['crazy'], axDict['blowUp'], options, data) lpt.writeImage(fig, pdf, options)
def main(): usage = ( 'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n' '%prog takes in a directory of substitution stats files ( --subStatsDir )\n' 'with filenames as NAME.subStats.[upper|lower].xml and produces a plot.' ) data = Data() parser = OptionParser(usage=usage) initOptions(parser) las.initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) las.checkOptions(options, parser) lpt.checkOptions(options, parser) if not options.outputRanks: fig, pdf = lpt.initImage(9., 11., options, data) axDict = establishAxes(fig, options, data) assembliesDict = {} assembliesDict = readSubStatsDir(assembliesDict, options) sumErrors(assembliesDict, options) normalizeData(assembliesDict, options) sortOrder = sorted(assembliesDict, key=lambda key: assembliesDict[key].allLo, reverse=False) if options.outputRanks: rankings(assembliesDict, sortOrder, options, data) return drawData(assembliesDict, sortOrder, axDict, options, data) lpt.writeImage(fig, pdf, options)
def main(): usage = ( '%prog --file=file.txt --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n' '%prog takes an aggregate text file ( --file ) and a mode \n' '( --mode ) and then produces a pretty picture.' ) data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( options, parser ) lpt.checkOptions( options, parser ) fig, pdf = lpt.initImage( 8.0, 10.0, options, data ) axDict = establishAxes( fig, options, data ) data.valuesDict = readFile( options.file, options ) data.xData = data.valuesDict['columnLength'] if options.mode != 'contamination': data.valuesDict = normalizeDataNormalMode( data.valuesDict, options, data ) else: normalizeDataContaminationMode( options, data ) setAxisLimits( axDict['main'], axDict['crazy'], axDict['blowUp'], data.xData, options, data ) drawData( axDict['main'], axDict['crazy'], axDict['blowUp'], data.xData, data.valuesDict, options, data ) drawLegend( options, data ) drawAxisLabels( fig, options, data ) setAxisLimits( axDict['main'], axDict['crazy'], axDict['blowUp'], data.xData, options, data ) establishTicks( axDict['main'], axDict['crazy'], axDict['blowUp'], options, data ) lpt.writeImage( fig, pdf, options )
def main(): usage = ('usage: %prog [options] file1.xml\n\n' '%prog takes in a copy number statistics file\n' 'and creates an image file.') data = Data() parser = OptionParser(usage=usage) initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(args, options, parser) lpt.checkOptions(options, parser) fig, pdf = lpt.initImage(11.0, 3.25, options, data) storedCategories = readFiles(options) establishGlobalMinMax(storedCategories, options, data) axDict = establishAxes(fig, storedCategories, options, data) drawData(axDict, storedCategories, options, data) drawLegend(options, data) drawAxisLabels(axDict, storedCategories, options, data) setAxisLimits(axDict, options, data) lpt.writeImage(fig, pdf, options)
def main(): usage = ( 'usage: %prog --scaffoldsFile=sFile.txt --contigsFile=cFile.txt --size=N --title=TITLE\n\n' '%prog takes in a scaffolds file (--scaffoldsFile), a contigs\n' 'file (--contigs), the size of the genome (--size) and a title (--title)\n' 'and then produces an N50 style figure.') data = Data() parser = OptionParser(usage=usage) initOptions(parser) lpt.initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) lpt.checkOptions(options, parser) scaffolds = readFile(options.scaffoldsFile) contigs = readFile(options.contigsFile) pScaffs, pContigs = processData(scaffolds, contigs, options) fig, pdf = lpt.initImage(8.0, 5.0, options, data) ax = establishAxis(fig, options) drawData(pScaffs, pContigs, ax, options) lpt.writeImage(fig, pdf, options)
def main(): usage = ( 'usage: %prog [options] file1.xml\n\n' '%prog takes in a copy number statistics file\n' 'and creates an image file.' ) data = Data() parser = OptionParser( usage=usage ) initOptions( parser ) lpt.initOptions( parser ) options, args = parser.parse_args() checkOptions( args, options, parser ) lpt.checkOptions( options, parser ) fig, pdf = lpt.initImage( 11.0, 3.25, options, data ) storedCategories = readFiles( options ) establishGlobalMinMax( storedCategories, options, data ) axDict = establishAxes( fig, storedCategories, options, data ) drawData( axDict, storedCategories, options, data ) drawLegend( options, data ) drawAxisLabels( axDict, storedCategories, options, data ) setAxisLimits( axDict, options, data ) lpt.writeImage( fig, pdf, options )
def drawRef2(rexps, exps, options, outfile, numCats): options.out = outfile fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] ) if len(rexps) < 1: return sampleNotherRefmapped = [] ref = '' for sample in rexps: if sample == 'average': continue e = rexps[sample] ref = e.ref sampleNotherRefmapped.append( (sample, e.total) ) otherRefName = libplot.properName( ref ) #Set title: #axes.set_title("Mapability of C. Ref. in Comparison to %s" % otherRefName) #HACK axes.set_title("Mapability of C. Ref. in Comparison to GRCh37 haplotypes") sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNotherRefmapped] samples.append( 'average' ) xdata = range( 0, len(samples) ) colors = libplot.getColors4() c = -1 #c = 0 lines = [] #titleDict = {'mapped':'Mapped', 'uniquelyMapped':'Uniquely Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired', 'snps':'Snp'} titleDict = {'mapped':'Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMapped':'Uniquely Mapped', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired'} ydataList, miny, maxy = getData2(samples, rexps, exps, titleDict.keys()) #ydataList, miny, maxy = getData2(samples, exps, titleDict.keys()) #refs = sorted( ydataList.keys() ) offset = 0.12 scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 linenames = [] categories = ["mapped", "properlyPaired", "uniquelyMapped", "uniquelyMappedAndProperlyPaired"] cats = categories[:numCats] for i, key in enumerate( cats ): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[key] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] c += 1 l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none') lines.append(l) linenames.append( titleDict[key] ) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale #Draw horizontal line at y = 0: xmin = -0.4 xmax = len(samples) - 1 + offset*len(linenames) + offset axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.2 #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(linenames))/2.0 x = [i - d, i - d] y = [miny , maxy] axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax ) axes.set_ylim( miny, maxy ) #HACK: #axes.set_ylim( -2, 0 ) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(linenames)/2.0) for i in range(0, len(samples))] ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) legend = pyplot.legend( lines, linenames, numpoints=1, loc='upper right', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName) #NEED TO DO #axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and GRCh37 haplotypes') if scale > 0: axes.set_ylabel( 'Event counts (x%d)' %(10**scale) ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawPlot(rexps, exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 8.0, 10.0, options ) axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] ) #Set title: titleDict = {'mapped':'Mapped reads', 'uniquelyMapped':'Uniquely Mapped Reads', 'properlyPaired':'Properly Paired Reads', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired Reads', 'snps':'SNPs'} axes.set_title( titleDict[type] ) if len(rexps) < 1: return sampleNotherRefmapped = [] ref = '' for sample in rexps: if sample == 'average': continue exp = rexps[sample] ref = exp.ref sampleNotherRefmapped.append( (sample, exp.total) ) otherRefName = libplot.properName( ref ) sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNotherRefmapped] samples.append( 'average' ) xdata = range( 0, len(samples) ) colors = libplot.getColors4() #c = -1 c = 0 lines = [] ydataList, miny, maxy = getData(samples, exps, rexps, type) #print ydataList refs = sorted( ydataList.keys() ) #miny = float('inf') #maxy = 0 #offset = 0.075 offset = 0.12 #if type != 'snps': # offset = 0 #axes.set_yscale('log') scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 #Draw line connecting the data for each sample (each bin): binXdataList = [ [] for x in xdata ] binYdataList = [ [] for x in xdata ] for i, ref in enumerate(refs): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[ref] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] for j, x in enumerate(xdatai): binXdataList[j].append(x) binYdataList[j].append( ydata[j] ) for i in xrange( len(binXdataList) ): axes.plot( binXdataList[i], binYdataList[i], color="#CCCCCC", linestyle='-', linewidth=0.005 ) #Draw main plots: for i, ref in enumerate(refs): xdatai = [ x + offset*i for x in xdata ] ydata = ydataList[ref] if scale > 0: ydata = [ float(y)/10**scale for y in ydata ] c += 1 l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none') lines.append(l) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale #Draw horizontal line at y = 0: xmin = -0.4 xmax = len(samples) - 1 + offset*len(refs) + offset axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005) fontP = FontProperties() fontP.set_size('x-small') yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.2 #Draw vertical lines to separate each sample: #for i in xrange(1, len(samples)): # d = (1 - offset*len(refs))/2.0 # x = [i - d, i - d] # y = [miny , maxy] # axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax ) axes.set_ylim( miny, maxy ) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(refs)/2.0) for i in range(0, len(samples))] ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) properRefs = [] for r in refs: if re.search('cactusRef', r): r = r.lstrip('cactusRef') properRefs.append( "%s %s" %(libplot.properName('cactusRef'), r)) else: properRefs.append( libplot.properName(r) ) legend = pyplot.legend( lines,properRefs, numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName) if scale > 0: axes.set_ylabel( 'Event counts (x%d)' %(10**scale) ) #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawSamplePlot(rexps, exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) axes = fig.add_axes( [0.14, 0.12, 0.8, 0.8] ) #Set title: axes.set_title( "SNP Rate Using BWA Mapping" ) sampleNsize = [] if len(rexps) < 1: return ref = '' for sample in rexps: if sample == 'average': continue exp = rexps[sample] ref = exp.ref #sampleNsize.append( (sample, exp.snps) ) sampleNsize.append( (sample, exp.snprate) ) otherRefName = ref sampleNsize = sorted( sampleNsize, key=lambda item: item[1], reverse=True ) samples = [ item[0] for item in sampleNsize] samples.append( 'average' ) #Get ydata: ydata1 = [] #otherRef (hg19, apd, ...) ydata2 = [] #cactusRef2 for sample in samples: explist = exps[sample] otherRef = rexps[sample] ydata1.append( otherRef.snprate ) for e in explist: if e.ref == 'cactusRef' and e.weight == 2: ydata2.append( e.snprate ) miny = min([min(ydata1), min(ydata2)]) maxy = max([max(ydata1), max(ydata2)]) xdata = range( 0, len(samples) ) #colors = ["#E31A1C", "#1F78B4"] #red, blue colors = ["#1F78B4", "#E31A1C"] #red, blue scale = -1 if miny > 1000: scale = len( str(int(miny)) ) - 1 if scale > 0: ydata1 = [ float(y)/10**scale for y in ydata1 ] ydata2 = [ float(y)/10**scale for y in ydata2 ] lines = [] lines.append( axes.plot(xdata, ydata1, color=colors[0], marker=".", markersize=16.0, linestyle='none') ) lines.append( axes.plot(xdata, ydata2, color=colors[1], marker=".", markersize=16.0, linestyle='none') ) if scale > 0: miny = float(miny)/10**scale maxy = float(maxy)/10**scale fontP = FontProperties() fontP.set_size('x-small') axes.set_xlim(-0.4, len(samples) - 0.6 ) yrange = maxy - miny miny = miny - yrange*0.05 maxy = maxy + yrange*0.1 axes.set_ylim( miny, maxy ) libplot.editSpine( axes ) axes.set_xticks( xdata ) axes.set_xticklabels( samples ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.yaxis.set_ticks_position( 'left' ) axes.xaxis.set_ticks_position( 'bottom' ) legend = pyplot.legend( lines, [libplot.properName(otherRefName), libplot.properName("cactusRef")], numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) axes.set_ylabel( 'SNPs Per Site' ) if scale > 0: axes.set_ylabel( 'Snp counts (x%d)' %(10**scale) ) axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawPlot2(exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) #Set title: titleDict = {'total':'Total Indels Called'} if 'All' not in exps: return samples = getSamplesOrder( exps['All'], type ) if len( samples ) < 1: return samples.append('average') samples.append('reference') samples.append('panTro3') xdata = range( 0, len(samples) ) colors = libplot.getColors6() c = -1 lines = [] pointsize = 10.0 offset = 0.15 exporder = ['All', 'No repeats'] #Get ydata ydataList, ymin, ymax = getData(samples, exps, type, exporder) yrange = ymax - ymin #Get normal range and outlier range: normalvals, outliers = getOutliers(ydataList) minNormal = min(normalvals) - 0.05*yrange maxNormal = max(normalvals) + 0.05*yrange minOutlier = min(outliers) - 0.05*yrange maxOutlier = max(outliers) + 0.05*yrange if minNormal< 0: minNormal = -0.5 #Set up the axes ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxNormal - minNormal) scale = -1 if minNormal > 1000: scale = len( str(int(minNormal)) ) -1 if scale > 0: for exp in ydataList: ydataList[exp] = [ float(y)/10**scale for y in ydataList[exp]] #PLOT for i, exp in enumerate(exporder): xdatai = [x + offset*i for x in xdata] ydata = ydataList[exp] c += 1 #Outlier plot l = ax.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none') lines.append(l) #Normal range plot ax2.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none') xmin = -0.4 xmax = len(samples) - 1 + offset*len(exps) + offset*3 fontP = FontProperties() fontP.set_size('x-small') if scale > 0: minNormal = float(minNormal)/10**scale maxNormal = float(maxNormal)/10**scale minOutlier = float(minOutlier)/10**scale maxOutlier = float(maxOutlier)/10**scale #Draw the Discontinue sign: d = 0.2 #how big to make the diagonal lines in axes coordinates if scale == -1: d = 50 ax.plot( (-1, 0), (minOutlier +d, minOutlier - d), color = "k", clip_on=False ) ax2.plot( (-1, 0), (maxNormal +d, maxNormal - d), color = "k", clip_on=False ) #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(exporder))/2.0 x = [i - d, i - d] y = [minNormal , maxOutlier] ax.plot(x,y, color="#CCCCCC", linewidth=0.005) ax2.plot(x,y, color="#CCCCCC", linewidth=0.005) xticklabels = [libplot.properName(s) for s in samples] #Set limit for the top plot (outlier) ax.set_ylim(minOutlier, maxOutlier) ax.set_xlim(xmin, xmax) ax.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] ) dummyxticklabels = [ "" for l in xticklabels ] ax.set_xticklabels(dummyxticklabels) #Make sure the y ticks of the top plot is the same with the bottom plot: step = 2 if scale == -1: step = 500 ytickpositions = [] ytickpos = 0 while ytickpos < maxOutlier: if ytickpos >= minOutlier: ytickpositions.append(ytickpos) ytickpos += step ax.set_yticks(ytickpositions) #Set limit for the bottom plot: ax2.set_ylim(minNormal, maxNormal) ax2.set_xlim(xmin, xmax) #Hide the spines between ax and ax2: ax.spines['bottom'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.yaxis.set_ticks_position('left') ax.xaxis.set_ticks_position('none') ax2.spines['top'].set_visible(False) ax2.spines['right'].set_visible(False) ax2.xaxis.tick_bottom() ax2.yaxis.set_ticks_position( 'left' ) ax2.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] ) ax2.set_xticklabels( xticklabels ) for label in ax2.xaxis.get_ticklabels(): label.set_rotation(75) legend = pyplot.legend( lines, exporder, numpoints=1, loc='upper left', prop=fontP) legend._drawFrame = False ax2.set_xlabel( 'Samples' ) ylabel = 'Number of indels' if scale > 0: ylabel += '(x%d)' %10**scale ax2.set_ylabel(ylabel) ax.set_title( titleDict[type] ) ax.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawPlot(exps, options, outfile, type): options.out = outfile fig, pdf = libplot.initImage( 11.2, 10.0, options ) axes = fig.add_axes( [0.12, 0.18, 0.85, 0.75] ) #Set title: titleDict = {'tpfn':'Indel Overlap with dbSNP', 'tp':'True Positives According to dbSNP', 'tp2':'Indel Overlap with dbSNP', 'fn':'False Negatives According to dbSNP', 'total':'Total Indels Called'} axes.set_title( titleDict[type] ) if 'All' not in exps: return samples = getSamplesOrder( exps['All'], type ) if len( samples ) < 1: return samples.append('average') if type != 'fn': samples.append('reference') samples.append('panTro3') xdata = range( 0, len(samples) ) colors = libplot.getColors6() c = -1 lines = [] pointsize = 10.0 offset = 0.15 #exporder = ['All', 'No repeats', 'Wobble', 'Wobble, No repeats'] exporder = ['Wobble', 'Wobble, No repeats', 'All', 'No repeats'] if type == 'fn': exporder = ['Wobble', 'All'] elif type == 'total': exporder = ['All', 'No repeats'] elif type == 'tp2' or type == 'tpfn': exporder = ['Wobble', 'Wobble, No repeats'] #pointsize = 16.0 #offset = 0.3 #Get ydata ydataList, ymin, ymax = getData(samples, exps, type, exporder) scale = -1 if ymin > 1000: scale = len( str(int(ymin)) ) -1 if scale > 0: for exp in ydataList: ydataList[exp] = [ float(y)/10**scale for y in ydataList[exp]] if type == 'tpfn': for j,t in enumerate(['tp', 'fn']): for i, exp in enumerate(exporder): if i > 0 and t == 'fn': continue if t == 'tp': xdatai = [x + offset*(j*2+i) for x in xdata] else: xdatai = [x + offset*(j*2+i) for x in xdata[: len(xdata) -3]] ydata = ydataList["%s.%s" %(exp,t)] c +=1 lines.append(axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none')) else: for i, exp in enumerate(exporder): xdatai = [x + offset*i for x in xdata] ydata = ydataList[exp] c += 1 l = axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none') if type == 'fn': c += 1 lines.append(l) xmin = -0.4 xmax = len(samples) - 1 + offset*len(exps) + offset*3 fontP = FontProperties() fontP.set_size('x-small') if scale > 0: ymin = float(ymin)/10**scale ymax = float(ymax)/10**scale datarange = ymax -ymin ymin = ymin - datarange*0.01 ymax = ymax + datarange*0.01 #Draw vertical lines to separate each sample: for i in xrange(1, len(samples)): d = (1 - offset*len(exporder))/2.0 x = [i - d, i - d] y = [ymin , ymax] axes.plot(x,y, color="#CCCCCC", linewidth=0.005) axes.set_xlim(xmin, xmax) axes.set_ylim(ymin, ymax) libplot.editSpine( axes ) axes.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] ) axes.set_xticklabels( [ libplot.properName(s) for s in samples] ) for label in axes.xaxis.get_ticklabels(): label.set_rotation(90) axes.xaxis.set_ticks_position( 'bottom' ) axes.yaxis.set_ticks_position( 'left' ) if type == 'tp': legend = pyplot.legend(lines, ['All', 'No repeats', 'No wobble', 'No wobble, No repeats'], numpoints=1, loc='best', prop=fontP) elif type == 'fn': legend = pyplot.legend( lines, ['All', 'No wobble'], numpoints=1, loc='best', prop=fontP) elif type == 'tpfn': legend = pyplot.legend(lines, ['All, TP', 'No repeats, TP', 'All, FN'], numpoints=1, loc='best', prop=fontP) elif type == 'tp2': legend = pyplot.legend( lines, ['All', 'No repeats'], numpoints=1, loc='best', prop=fontP) else: legend = pyplot.legend( lines, exporder, numpoints=1, loc='best', prop=fontP) legend._drawFrame = False axes.set_xlabel( 'Samples' ) ylabel = "Percentage" if type == 'total': ylabel = 'Number of indels' if scale > 0: ylabel += '(x%d)' %10**scale axes.set_ylabel(ylabel) axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) libplot.writeImage( fig, pdf, options )
def drawPlot( options, samples1, samples2, type ): #Sorted in decreasing order of errorPerSite in samples1 if type == 'insertion': samples1 = sorted( samples1, key=lambda s:s.ins, reverse=True ) else: samples1 = sorted( samples1, key=lambda s:s.dels, reverse=True ) if len( samples1 ) < 1: return #remove chimpSample: chimpSample = None for i, s in enumerate(samples1): if s.name == 'panTro3': chimpSample = samples1.pop(i) break refname1 = samples1[0].refname refname2 = samples2[0].refname y1data = [ s.ins for s in samples1 ] if type == 'deletion': y1data = [ s.dels for s in samples1 ] xticklabels = [ s.name for s in samples1 ] #indel of refname1 w.r.t itself (0) y1data.append(0) xticklabels.append(refname1) y2data = [] for name in xticklabels: if name == refname2:#indel of refname2 w.r.t itself (0) y2data.append(0) for s in samples2: if s.name == name: if type == 'insertion': y2data.append(s.ins) else: y2data.append(s.dels) break if len(xticklabels) != len(y2data): sys.stderr.write("Input file 1 and 2 do not have the same set of samples\n") sys.exit( 1 ) #add the average column: num = 1 y1avr = sum(y1data)/float(len(y1data) - 1) y1data.append(y1avr) xticklabels.append('average') y2avr = sum(y2data)/float(len(y2data) - 1) y2data.append(y2avr) print "%s Average: %s %f, %s %f" %(type, refname1, y1avr, refname2, y2avr) #Add chimp: samples1.append(chimpSample) if type == 'insertion': y1data.append( chimpSample.ins ) else: y1data.append( chimpSample.dels ) for s in samples2: if s.name == 'panTro3': if type == 'insertion': y2data.append(s.ins) else: y2data.append(s.dels) xticklabels.append("panTro3") minMajority = min( [min(y2data), min(y1data)] ) - 0.0001 maxMajority = max( [max(y2data), max(y1data)] ) + 0.0001 basename = os.path.basename(options.files[0]) options.out = os.path.join( options.outdir, '%s_%s' %( type, basename.lstrip('pathStats').lstrip('_').rstrip('.xml') ) ) fig, pdf = libplot.initImage( 11.2, 10.0, options ) #ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxMajority - minMajority) ax2 = fig.add_axes( [0.15, 0.15, 0.8, 0.8] ) l2 = ax2.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" ) l1 = ax2.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" ) #Legend fontP = FontProperties() fontP.set_size("x-small") legend = ax2.legend([l1, l2], [libplot.properName(refname1), libplot.properName(refname2)], 'upper right', numpoints=1, prop=fontP) legend._drawFrame = False ax2.set_ylim( minMajority, maxMajority ) ax2.set_xlim( -0.5, len(xticklabels) -0.5 ) ax2.spines['top'].set_visible(False) ax2.spines['right'].set_visible(False) ax2.xaxis.tick_bottom() ax2.yaxis.set_ticks_position( 'left' ) ax2.set_xticks( range( 0, len(xticklabels) ) ) properxticklabels = [ libplot.properName(l) for l in xticklabels ] ax2.set_xticklabels( properxticklabels ) for label in ax2.xaxis.get_ticklabels(): label.set_rotation( 90 ) ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.set_xlabel( 'Samples' ) title = 'Deletions' #if type == 'insertion': if type == 'insertion': ax2.set_ylabel( 'Insertions per site' ) title = 'Insertions' else: ax2.set_ylabel( 'Deletions per site' ) ax2.set_title( title ) libplot.writeImage( fig, pdf, options )
def drawSnpPlot(options, samples1, samples2): #All the samples sorted indecreasing order of SNP rate, then average, then chimp samples1 = sorted( samples1, key=lambda s:s.errPerSite, reverse=True ) if len( samples1 ) < 1: return chimpSample = None #remove chimpSample for i, s in enumerate(samples1): if s.name == "panTro3": chimpSample = samples1.pop(i) break refname1 = samples1[0].refname refname2 = samples2[0].refname y1data = [ s.errPerSite for s in samples1 ] xticklabels = [s.name for s in samples1] y1data.append(0) #snps of refname1 w.r.t itself (which is 0) xticklabels.append(refname1) y2data = [] for name in xticklabels: if name == refname2: y2data.append(0) #snps of refname2 w.r.t itself for s2 in samples2: if s2.name == name: y2data.append(s2.errPerSite) break if len(y1data) != len(y2data): sys.stderr.write("Input files have different number of samples: %d, %d\n" %(len(y1data), len(y2data))) sys.exit(1) #add the average column: if len(y1data) >= 2: y1avr = sum(y1data)/float(len(y1data) -1) y1data.append(y1avr) y2avr = sum(y2data)/float(len(y2data) -1) y2data.append(y2avr) #Print summary stats to stderr: sys.stderr.write("%s\t%f\t%f\t%f\n" %( refname1, sorted(y1data)[1] , max(y1data), y1avr )) sys.stderr.write("%s\t%f\t%f\t%f\n" %( refname2, sorted(y2data)[1] , max(y2data), y2avr )) xticklabels.append('average') #add chimp: samples1.append(chimpSample) y1data.append( chimpSample.errPerSite ) for s in samples2: if s.name == 'panTro3': y2data.append( s.errPerSite ) xticklabels.append( 'panTro3' ) #Min, max values: num = options.numOutliners numcols = len(y1data) minOutlier = min( [ min(y1data[numcols -num:]), min(y2data[numcols - num:]) ] ) - 0.001 maxOutlier = max( [ max(y1data[numcols -num:]), max(y2data[numcols - num:]) ] ) + 0.001 minMajority = min( [min(y1data[:numcols - num]), min(y2data[: numcols - num])] ) - 0.001 maxMajority = max( [max(y1data[:numcols - num]), max(y2data[: numcols - num])] ) + 0.001 if minMajority < 0: minMajority = -0.0001 #Set up basename = os.path.basename(options.files[0]) options.out = os.path.join(options.outdir, '%s' %(basename.lstrip('snpStats').lstrip('_').rstrip('.xml')) ) fig, pdf = libplot.initImage( 11.2, 10.0, options ) ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxMajority - minMajority) #Plot the outliers: l2 = ax.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" )#Red l1 = ax.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" )#Blue ax2.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" ) ax2.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" ) #Legend fontP = FontProperties() fontP.set_size("x-small") legend = ax.legend([l1, l2], [libplot.properName(refname1), libplot.properName(refname2)], 'upper left', numpoints=1, prop=fontP) legend._drawFrame = False d = .0001 # how big to make the diagonal lines in axes coordinates ax.plot( (-1, 0), (minOutlier +d, minOutlier - d), color = "k", clip_on=False ) ax2.plot( (-1, 0), (maxMajority +d, maxMajority - d), color = "k", clip_on=False ) ax.set_ylim( minOutlier, maxOutlier ) # outliers only ax.set_xlim( -0.5, len(xticklabels) -0.5 ) dummyxticklabels = [ "" for l in xticklabels ] ax.set_xticklabels( dummyxticklabels ) #Make sure the y ticks of the top plot (the outlier plot) is the same with the other plot: step = 0.001 ytickpositions = [] ytickpos = 0 while ytickpos < maxOutlier: if ytickpos >= minOutlier: ytickpositions.append(ytickpos) ytickpos += step ax.set_yticks( ytickpositions ) ax2.set_ylim( minMajority, maxMajority ) ax2.set_xlim( -0.5, len(xticklabels) -0.5 ) # hide the spines between ax and ax2 ax.spines['bottom'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.yaxis.set_ticks_position( 'left' ) ax.xaxis.set_ticks_position( 'none' ) ax2.spines['top'].set_visible(False) ax2.spines['right'].set_visible(False) ax2.xaxis.tick_bottom() ax2.yaxis.set_ticks_position( 'left' ) ax2.set_xticks( range( 0, len(xticklabels) ) ) properxticklabels = [ libplot.properName(l) for l in xticklabels ] ax2.set_xticklabels( properxticklabels ) #Make sure the x ticks of the top plot is the same with the other plot: ax.set_xticks( range(0, len(xticklabels)) ) for label in ax2.xaxis.get_ticklabels(): label.set_rotation( 90 ) ax.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005) ax2.set_xlabel( 'Samples' ) ax2.set_ylabel( 'SNPs per site' ) title = 'SNPs' ax.set_title( title ) libplot.writeImage( fig, pdf, options )