Exemple #1
0
def main():
   usage = ( 'usage: %prog [options]\n\n'
             '%prog takes in a reference genome name ( --referenceGenome ),\n'
             'optionally a directory where annotation wig pickles are stored ( --annotPickleDir [optional] ),\n'
             'a directory where maf wig pickles are stored ( --mafPickleDir ), a paired set of chromosome names\n'
             '( --chrNames comma separated ) and chromosome lengths ( --chrLengths comma separated ) and \n'
             'then various other options specifed below to draw a figure.')
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   las.initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( options, parser, data )
   las.checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   loadAnnots( options, data )
   loadMafs( options, data )

   normalizeData( options, data )
   transformData( options, data )

   figHeight = ( data.numberOfMafs + len( data.annotationOrder ) + 0.5 ) / 4.0
   fig, pdf = lpt.initImage( 8.0, figHeight, options, data )
   axDict = establishAxes( fig, options, data )
   labelAxes( fig, axDict, options, data )
   drawAnnotations( axDict, options, data )
   drawMafs( axDict, options, data )
   drawLegend( options, data )

   setAxisLimits( axDict, options, data )
   lpt.writeImage( fig, pdf, options )
Exemple #2
0
def main():
    usage = (
        'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n'
        '%prog takes in a directory of substitution stats files ( --subStatsDir )\n'
        'with filenames as NAME.subStats.[upper|lower].xml and produces a plot showing\n'
        'the difference in subs in hap1 versus hap2.\n')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    las.initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    las.checkOptions(options, parser)
    lpt.checkOptions(options, parser)

    fig, pdf = lpt.initImage(11., 8., options, data)
    axDict = establishAxis(fig, options, data)

    assembliesDict = {}
    assembliesDict = cssp.readSubStatsDir(assembliesDict, options)
    valuesList = createValsList(assembliesDict.values(), options)
    valuesList = sorted(valuesList,
                        key=lambda x: float(x[1]) / x[2],
                        reverse=False)
    drawData(valuesList, axDict, options, data)

    lpt.writeImage(fig, pdf, options)
def main():
   usage = ( 'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n'
             '%prog takes in a directory of substitution stats files ( --subStatsDir )\n'
             'with filenames as NAME.subStats.[upper|lower].xml and produces a plot.')
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   las.initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( options, parser )
   las.checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   
   if not options.outputRanks:
      fig, pdf = lpt.initImage( 9., 11., options, data )
      axDict = establishAxes( fig, options, data )
   
   assembliesDict = {}
   assembliesDict = readSubStatsDir( assembliesDict, options )
   
   sumErrors( assembliesDict, options )
   normalizeData( assembliesDict, options )

   sortOrder = sorted( assembliesDict, key=lambda key: assembliesDict[ key ].allLo, reverse=False )

   if options.outputRanks:
      rankings( assembliesDict, sortOrder, options, data )
      return

   drawData( assembliesDict, sortOrder, axDict, options, data )
   
   lpt.writeImage( fig, pdf, options )
def main():
    usage = (
        "usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ [options]\n\n"
        "%prog takes a directory of contig path stats xml files\n"
        "( --statsScaffoldsContigPathDir ) named as NAME.contigPathStats.xml and creates a plot."
    )
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    las.initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    las.checkOptions(options, parser)
    lpt.checkOptions(options, parser)
    checkOptions(options, parser)

    assembliesDict = readDir(options.statsScaffoldsContigPathDir, options)
    assembliesList = assembliesDict.values()

    if len(assembliesList) <= 20:
        fig, pdf = lpt.initImage(14.0, 8.0, options, data)
    else:
        fig, pdf = lpt.initImage(14.0, 24.0, options, data)
    axDict = establishAxis(len(assembliesList), fig, options, data)

    assembliesList = createXYData(assembliesList, options)

    assembliesList = normalizeDist(assembliesList, options)
    assembliesList = sorted(assembliesList, key=lambda x: max(x.yData["insertionErrorSizeDistribution"]), reverse=True)

    drawData(assembliesList, axDict, options, data)
    drawLegend(assembliesList, axDict, options, data)

    lpt.writeImage(fig, pdf, options)
def main():
   usage = ( 'usage: %prog [options] < rankedAssemblies.txt\n\n'
             '%prog takes via STDIN a list of coverage values, each line formatted as:\n'
             '#assembly ave.CovBothGenomes hap1 hap2 delta bac\n'
             'P1 .9885178 .9888077 .9882265 5.782e-04 0\n'
             'B1 .9869388 .9871948 .9866798 5.257e-04 .9978954\n'
             'F5 .9869094 .9872685 .9865338 7.295e-04 .9993371\n'
             '...\n'
             'And produces a plot showing the Total and Bacterial coverages for\n'
             'all assemblies in the input.')
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   
   valuesList = readStream( options )
   valuesList = sorted( valuesList, key=lambda key: key.tot, reverse=True )
   
   fig, pdf = lpt.initImage( 8.0, 6.0, options, data )
   ax = establishAxis( fig, options, data )
   
   drawData( valuesList, ax, options )

   lpt.writeImage( fig, pdf, options )
def main():
   usage = ( 'usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ '
             '--statsContigssContigPathDir=path/to/dir/ [options]\n\n'
             '%prog takes a directory of scaffold-alignment contig path stats xml files\n'
             '( --statsScaffoldsContigPathDir ) named as NAME.pathStats.xml, contig-alignment '
             'contig path stats xml files ( --statsContigsContigPathDir ) named as NAME.pathStats.xml,'
             ' and creates a plot.\n')
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   cscp.initOptions( parser )
   las.initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   cscp.checkOptions( options, parser )
   las.checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   checkOptions( options, parser )
   
   assembliesList = readDirs( options )
   assembliesList = sorted( assembliesList, key=lambda x: x.valuesDict[ options.sortOn ], 
                            reverse=True )

   maxesMax, minsMin = findMaxMin( assembliesList, options )
   if options.outputRanks:
      rankings( assembliesList, options )
      return

   fig, pdf = lpt.initImage( 10.0, 8.0, options, data )
   axDict = establishAxis( fig, options, data )

   drawData( assembliesList, maxesMax, minsMin, axDict, options )
   
   lpt.writeImage( fig, pdf, options )
Exemple #7
0
def drawCompareN50Plot( options, xsamples, ysamples ):
    #Sort xsamples and ysamples in the order of the sampleNames:
    xsamples = sorted( xsamples, key=lambda s: s.attrib[ 'sampleName' ] )
    
    if len(xsamples) < 1 or len(ysamples) < 1:
        return

    xrefname = xsamples[0].attrib[ 'referenceName' ]
    yrefname = ysamples[0].attrib[ 'referenceName' ]
    options.out = os.path.join( options.outdir, options.prefix + '_' + xrefname + '_' + yrefname )
    fig, pdf = libplot.initImage( 8.0, 8.0, options )
    axes = fig.add_axes( [0.12, 0.1, 0.85, 0.85] )

    lines, lineNames, maxval, minval = drawCompareN50data( axes, xsamples, ysamples, options )
    if len(lines) == 0:
        sys.stderr.write('Comparing N50 stats of %s and %s: All values are 0, no plot created\n' %(xrefname, yrefname) )
        return

    title = "N50" #% ( libplot.properName(xrefname), libplot.properName(yrefname) )
    axes.set_title(title)
     
    #Legend
    fontP = FontProperties()
    fontP.set_size( 'small' )
    box = axes.get_position()
    axes.set_position( [box.x0, box.y0, box.width*0.8, box.height*0.8] )
    legend = pyplot.legend( lines, lineNames, numpoints=1, prop=fontP, loc="best", bbox_to_anchor=(1, 0.5) )
    legend._drawFrame = False

    libplot.setTicks( axes )
    span = maxval - minval
    axes.set_xlim( minval - span*0.1, maxval + span*0.1 )
    axes.set_ylim( minval - span*0.1, maxval + span*0.1 )
    libplot.writeImage( fig, pdf, options )
def main():
   usage = ( 'usage: %prog [options] file1.xml file2.xml\n\n'
             '%prog takes in contiguous path statistics file(s)\n'
             'and creates an image file.' )
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   las.initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( args, options, parser )
   las.checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   if not options.outputRanks:
      fig, pdf = lpt.initImage( 11., 8.0, options, data ) # 8
      axDict = establishAxes( fig, options, data )
   
   data.statsList, data.xData = readFiles( options )
   for i in xrange( 0, len( data.statsList )):
      # ensure that the buckets are all in order by their midpoint.
      data.statsList[i] = sorted( data.statsList[i], key=lambda x: x.mid, reverse=False )
   
   if options.outputRanks:
      ranks = rankFiles( options, data )
      printRanks( ranks, options, data )
      sys.exit(0)
      
   drawData( axDict['main'], data.xData, data.statsList, options, data )
   drawLegend( options, data )
   drawAxisLabels( fig, options, data )
   setAxisLimits( axDict['main'], data.xData, options, data )
   establishTicks( axDict['main'], data.xData, options, data )
   
   lpt.writeImage( fig, pdf, options )
Exemple #9
0
def main():
    usage = ('usage: %prog [options] --dir=path/to/dir/\n\n'
             '%prog takes in a copy statistics file\n'
             'and creates an image file.')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    las.initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(args, options, parser)
    las.checkOptions(options, parser)
    lpt.checkOptions(options, parser)
    if not options.outputRanks:
        fig, pdf = lpt.initImage(8.0, 10.0, options, data)

    stats = readFiles(options)
    sortedOrder = sorted(stats.values(),
                         key=lambda x: x.sumLower,
                         reverse=False)

    if options.outputRanks:
        rankings(sortedOrder, options, data)
        return

    axDict = establishAxes(fig, options, data)
    drawData(axDict, sortedOrder, options, data)
    drawLegend(options, data)
    drawAxisLabels(axDict, stats, options, data)
    setAxisLimits(axDict, options, data)

    lpt.writeImage(fig, pdf, options)
def main():
   usage = ( 'usage: %prog [options] --dir=path/to/dir/\n\n'
             '%prog takes in a copy statistics file\n'
             'and creates an image file.' )
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   las.initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( args, options, parser )
   las.checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   if not options.outputRanks:
      fig, pdf = lpt.initImage( 8.0, 10.0, options, data )

   stats = readFiles( options )
   sortedOrder = sorted( stats.values(), key=lambda x: x.sumLower, reverse=False )
   
   if options.outputRanks:
      rankings( sortedOrder, options, data )
      return

   axDict = establishAxes( fig, options, data )
   drawData( axDict, sortedOrder, options, data )
   drawLegend( options, data )
   drawAxisLabels( axDict, stats, options, data )
   setAxisLimits( axDict, options, data )
   
   lpt.writeImage( fig, pdf, options )
def main():
    usage = (
        'usage: %prog [options] < rankedAssemblies.txt\n\n'
        '%prog takes via STDIN a list of coverage values, each line formatted as:\n'
        '#assembly ave.CovBothGenomes hap1 hap2 delta bac\n'
        'P1 .9885178 .9888077 .9882265 5.782e-04 0\n'
        'B1 .9869388 .9871948 .9866798 5.257e-04 .9978954\n'
        'F5 .9869094 .9872685 .9865338 7.295e-04 .9993371\n'
        '...\n'
        'And produces a plot showing the Total and Bacterial coverages for\n'
        'all assemblies in the input.')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    lpt.checkOptions(options, parser)

    valuesList = readStream(options)
    valuesList = sorted(valuesList, key=lambda key: key.tot, reverse=True)

    fig, pdf = lpt.initImage(8.0, 6.0, options, data)
    ax = establishAxis(fig, options, data)

    drawData(valuesList, ax, options)

    lpt.writeImage(fig, pdf, options)
def main():
   usage = ( 'usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ [options]\n\n'
             '%prog takes a directory of contig path stats xml files\n'
             '( --statsScaffoldsContigPathDir ) named as NAME.contigPathStats.xml and creates a plot.')
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   las.initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   las.checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   checkOptions( options, parser )
   
   assembliesDict = readDir( options.statsScaffoldsContigPathDir, options )
   assembliesList = assembliesDict.values()
   
   if len(assembliesList) <= 20:
      fig, pdf = lpt.initImage( 14.0, 8.0, options, data )
   else:
      fig, pdf = lpt.initImage( 14.0, 24.0, options, data )
   axDict = establishAxis( len(assembliesList), fig, options, data )
   
   assembliesList = createXYData( assembliesList, options )

   assembliesList = normalizeDist( assembliesList, options )
   assembliesList = sorted( assembliesList, 
                            key=lambda x: max(x.yData['insertionErrorSizeDistribution']), 
                            reverse=True )

   drawData( assembliesList, axDict, options, data )
   drawLegend( assembliesList, axDict, options, data )
   
   lpt.writeImage( fig, pdf, options )
def main():
   usage = ( 'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n'
             '%prog takes in a directory of substitution stats files ( --subStatsDir )\n'
             'with filenames as NAME.subStats.[upper|lower].xml and produces a plot showing\n'
             'the difference in subs in hap1 versus hap2.\n')
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   las.initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( options, parser )
   las.checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   
   fig, pdf = lpt.initImage( 11., 8., options, data )
   axDict = establishAxis( fig, options, data )
   
   assembliesDict = {}
   assembliesDict = cssp.readSubStatsDir( assembliesDict, options )
   valuesList = createValsList( assembliesDict.values(), options )
   valuesList = sorted( valuesList, key = lambda x: float(x[1])/x[2], reverse=False)
   drawData( valuesList, axDict, options, data )
   
   lpt.writeImage( fig, pdf, options )
def getSummary( runs, analysis, options, sortId, refnames ):
    analysisName = analysis.attrib[ 'name' ]
    options.out = os.path.join( options.outdir, 'summary_%s_%d' %(analysisName, sortId) )
    #fig, pdf = libplot.initImage( 8.0, 10.0, options )
    fig, pdf = libplot.initImage( 16.0, 20.0, options )
    
    drawSum( fig, runs, analysis, options, refnames)
    libplot.writeImage( fig, pdf, options )
def drawScatterPlot( options, stats, type, cumulative ):
    prefix = "coverageScatter_%s" %type
    if cumulative:
        prefix += "_culm"
    options.out = os.path.join( options.outdir, "%s" %(prefix) )
    fig, pdf = libplot.initImage( 12.0, 8.0, options )
    axes = fig.add_axes( [0.1, 0.15, 0.85, 0.75] )
    drawScatter( axes, options, stats, type, cumulative )
    libplot.writeImage( fig, pdf, options )
def drawCoveragePlot( options, stats, isAbs, ycutoff ):
    prefix = "coverage_%.2f_" %ycutoff
    if not isAbs:
        prefix = "rel_coverage_%.2f_" %ycutoff
    options.out = os.path.join(options.outdir, prefix +  stats[0].referenceName)
    fig, pdf = libplot.initImage( 8.0, 10.0, options )
    axes = fig.add_axes( [0.14, 0.2, 0.8, 0.6] )
    #axes = libplot.setAxes( fig )

    lines, linenames = drawData( axes, stats, isAbs, ycutoff )
    libplot.writeImage( fig, pdf, options )
def drawCompareCoveragePlot2( options, stats, isAbs ):
    if len(stats) == 0:
        return
    prefix = "cmpCoverage2_"
    if not isAbs:
        prefix = "cmpRelCoverage2_"
    options.out = os.path.join( options.outdir, "%s%s_%s" %(prefix, stats[0].referenceName, stats[0].otherReferenceName) )
    fig, pdf = libplot.initImage( 12.0, 8.0, options )
    axes = fig.add_axes( [0.09, 0.2, 0.9, 0.6] )

    drawCompareData2( axes, options, stats, isAbs )
    libplot.writeImage( fig, pdf, options )
def drawPlot(samplesList, sampleNames, options):
    options.out = os.path.join(options.outdir, "nonLinearBp")
    fig, pdf = libplot.initImage(12.0, 8.0, options)
    axes = fig.add_axes([0.09, 0.2, 0.9, 0.6])
   
    list1 = samplesList[0]
    list2 = samplesList[1]
    if len(list1) < 1 or len(list2) < 1:
        return
    refname1 = list1[0].attrib['referenceName']
    refname2 = list2[0].attrib['referenceName']

    lines = []

    barwidth = 0.3
    y1data = []
    y2data = []
    for sample in sampleNames:
        for s in list1:
            if sample == s.attrib['sampleName']:
                y1data.append( int(s.attrib['totalIntraJoin']) )
        for s in list2:
            if sample == s.attrib['sampleName']:
                y2data.append( int(s.attrib['totalIntraJoin']) )
    x1data = range( len(y1data) )
    x2data = [ x+ barwidth for x in x1data]
     
    colors =["#1F78B4", "#E31A1C"]
    l1 = axes.bar( x1data, y1data, barwidth, color = colors[0], ec='w')
    lines.append(l1[0])
    l2 = axes.bar( x2data, y2data, barwidth, color = colors[1], ec='w')
    lines.append(l2[0])

    libplot.editSpine(axes)
    axes.set_title("Non-linear Breakpoints")
    
    #set ticks
    xlabels = [ libplot.properName(name) for name in sampleNames ]
    fontP = FontProperties()
    fontP.set_size('small')
    pyplot.xticks(x2data, xlabels, rotation=45, fontproperties=fontP)
    pyplot.yticks( fontproperties = fontP )
    pyplot.xlabel("Samples")
    pyplot.ylabel("Number of breakpoints")
    axes.xaxis.set_ticks_position('bottom')
    axes.yaxis.set_ticks_position('left')
    axes.yaxis.grid(b=True, color="#A8A8A8", linestyle='-', linewidth=0.25)
    legend = axes.legend( lines, [libplot.properName(refname1), libplot.properName(refname2)], prop=fontP, loc="best" )
    legend._drawFrame = False

    libplot.writeImage(fig, pdf, options)
def drawCompareContiguityPlot( options, xstats, ystats ):
    #options.out = os.path.join(options.outdir, "contiguity_" + xstats.refname + "_" + ystats.refname)
    options.out = os.path.join(options.outdir, options.exp + "_" + xstats.refname + "_" + ystats.refname)
    if options.includeCov:
        options.out = options.out + "_incCov"
    fig, pdf = libplot.initImage( 8.0, 8.0, options )
    
    #Set axes:
    #axes = fig.add_axes( [0.12, 0.1, 0.85, 0.85] )
    axesList = setCompareAxes( fig )

    drawCompareData( axesList, xstats, ystats, options )
    
    libplot.writeImage( fig, pdf, options )
Exemple #20
0
def drawCnvPlot( sample, options ):
    sampleName = sample.attrib[ 'sampleName' ]
    #print sampleName
    options.out = os.path.join( options.outdir, 'cnv_%s' %sampleName  )
    fig, pdf = libplot.initImage( 11.0, 3.25, options )

    title = "Copy Number Variation between %s and %s" % ( libplot.properName(sampleName), libplot.properName(sample.attrib['referenceName']) )
    
    cnvDict, minCn, maxCn = getSampleData( sample )
    axDict = setAxes( fig, cnvDict.keys(), options )
    for r in axDict:
        if r != 'bg':
            drawOneCnvPlot( r, axDict[ r ], cnvDict[ r ], options, minCn, maxCn )
    drawAxisLabels( axDict, cnvDict, options, title, maxCn )
    setAxisLimits( axDict, minCn, maxCn )
    libplot.writeImage( fig, pdf, options )
def main():
   usage = ( '%prog --dir=path/to/dir --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n'
             '%prog takes an aggregate directory ( --dir ) and a mode \n'
             '( --mode ) and then produces a pretty picture.' )
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   
   readFiles( options, data )
   lpt.initImage( 7.0, 8.0, options, data )
   establishAxis( options, data )
   
   drawPlots( options, data )
   
   lpt.writeImage( options, data )
Exemple #22
0
def main():
    usage = (
        '%prog --dir=path/to/dir --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n'
        '%prog takes an aggregate directory ( --dir ) and a mode \n'
        '( --mode ) and then produces a pretty picture.')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    lpt.checkOptions(options, parser)

    readFiles(options, data)
    lpt.initImage(7.0, 8.0, options, data)
    establishAxis(options, data)

    drawPlots(options, data)

    lpt.writeImage(options, data)
def drawContiguityPlot( options, stats ):
    #options.out = os.path.join(options.outdir, "contiguity_" + stats.refname) #name of output file
    options.out = os.path.join(options.outdir, options.exp + "_" + stats.refname) #name of output file
    
    if options.includeCov:
        options.out = options.out + "_incCov"
        options.ycutoff = 0.7 #HACK
    else:#HACK
        options.ycutoff = 0.95 #HACK
    fig, pdf = libplot.initImage( 8.0, 10.0, options )
    axes = libplot.setAxes( fig )
    
    lines, sampleNames, ymin = drawData( axes, stats, options )
    drawLegend( axes, lines, sampleNames, options )
    if options.ycutoff:
        setAxisLimits( axes, options.ycutoff )
    else:
        setAxisLimits( axes, ymin*0.98 )
    libplot.setTicks( axes )

    libplot.writeImage( fig, pdf, options )
Exemple #24
0
def drawN50Plot( options, samples ):
    #sort samples:
    samples = sorted( samples, key=lambda s: int(s.attrib[ options.sortkey ]), reverse=True )
    sampleNames = getSampleNames( samples )

    if len(samples) < 1:
        return

    refname = samples[0].attrib[ 'referenceName' ]
    options.out = os.path.join( options.outdir, options.prefix + '_' + refname )
    fig, pdf = libplot.initImage( 8.0, 10.0, options )
    axes = fig.add_axes( [0.12, 0.1, 0.85, 0.85] )

    title = "N50"
    lines = drawN50data( axes, samples, options )
    axes.set_title(title)

    #Legend
    fontP = FontProperties()
    fontP.set_size( 'small' )
    box = axes.get_position()
    axes.set_position( [box.x0, box.y0, box.width*0.8, box.height] )
    legend = pyplot.legend( lines, options.keys, numpoints=1, prop=fontP, loc="best", bbox_to_anchor=(1, 0.9) )
    legend._drawFrame = False

    #libplot.setTicks( axes )
    axes.set_xticks( range( 0, len(samples) ) )
    axes.set_xticklabels( [ libplot.properName(n) for n in sampleNames ] )
    for label in axes.xaxis.get_ticklabels():
        label.set_rotation( 90 )

    axes.xaxis.set_ticks_position( 'bottom' )
    axes.yaxis.set_ticks_position( 'left' )

    axes.set_xlim( -0.5, len(samples) - 0.5 )
    #axes.set_ylim( -20, 6000 )

    libplot.writeImage( fig, pdf, options )
def main():
    usage = (
        'usage: %prog --statsScaffoldsContigPathDir=path/to/dir/ '
        '--statsContigssContigPathDir=path/to/dir/ '
        '--statsScaffoldsContigPathPhasingDir=path/to/dir/ [options]\n\n'
        '%prog takes a directory of scaffold-alignment contig path stats xml files\n'
        '( --statsScaffoldsContigPathDir ) named as NAME.pathStats.xml, contig-alignment '
        'contig path stats xml files ( --statsContigsContigPathDir ) named as NAME.pathStats.xml,'
        'scaffold-alignment contig path phasing stats xml files ( --statsScaffoldsContigPathPhasingDir )'
        ' named as NAME.hap%d.pathStats.xml and creates a  plot.\n')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    cscp.initOptions(parser)
    las.initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    cscp.checkOptions(options, parser)
    las.checkOptions(options, parser)
    lpt.checkOptions(options, parser)
    checkOptions(options, parser)

    assembliesList = readData(options)
    assembliesList = sorted(assembliesList,
                            key=lambda x: x.valuesDict[options.sortOn],
                            reverse=True)

    maxesMax, minsMin = findMaxMin(assembliesList, options)
    if options.outputRanks:
        rankings(assembliesList, options)
        return

    fig, pdf = lpt.initImage(10.0, 8.0, options, data)
    axDict = establishAxis(fig, options, data)

    drawData(assembliesList, maxesMax, minsMin, axDict, options)

    lpt.writeImage(fig, pdf, options)
def main():
   usage = ('usage: %prog --scaffoldsFile=sFile.txt --contigsFile=cFile.txt --size=N --title=TITLE\n\n'
             '%prog takes in a scaffolds file (--scaffoldsFile), a contigs\n'
             'file (--contigs), the size of the genome (--size) and a title (--title)\n'
             'and then produces an N50 style figure.')
   data = Data()
   parser = OptionParser(usage=usage)
   initOptions(parser)
   lpt.initOptions(parser)
   options, args = parser.parse_args()
   checkOptions(options, parser)
   lpt.checkOptions(options, parser)
   
   scaffolds = readFile(options.scaffoldsFile)
   contigs   = readFile(options.contigsFile)
   
   pScaffs, pContigs = processData(scaffolds, contigs, options)
   fig, pdf = lpt.initImage(8.0, 5.0, options, data)
   ax = establishAxis(fig, options)
   
   drawData(pScaffs, pContigs, ax, options)

   lpt.writeImage(fig, pdf, options)
def drawPlots( options, samples, outname, proportion, culm ):
    #sort samples:
    #samples = sorted( samples, key=lambda s: s.attrib[ 'sampleName' ] )

    if len(samples) < 1:
        return

    refname = samples[0].attrib[ 'referenceName' ]
    sys.stderr.write("%s\n" %refname)
    #options.out = os.path.join( options.outdir, 'indelDist_' + refname )
    options.out = os.path.join( options.outdir, 'indelDist_' + outname )
    if proportion:
        options.out = os.path.join( options.outdir, 'indelDist2_' + outname )
    if culm:
        options.out += '_culm'
    fig, pdf = libplot.initImage( 8.0, 10.0, options )
    
    samplesPerPlot = 10
    axesList = setAxes( fig, len(samples), samplesPerPlot )

    lines = drawData( axesList, samples, samplesPerPlot, options, proportion, culm )

    libplot.writeImage( fig, pdf, options )
Exemple #28
0
def main():
    usage = (
        '%prog --file=file.txt --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n'
        '%prog takes an aggregate text file ( --file ) and a mode \n'
        '( --mode ) and then produces a pretty picture.')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    lpt.checkOptions(options, parser)
    fig, pdf = lpt.initImage(8.0, 10.0, options, data)
    axDict = establishAxes(fig, options, data)

    data.valuesDict = readFile(options.file, options)
    data.xData = data.valuesDict['columnLength']

    if options.mode != 'contamination':
        data.valuesDict = normalizeDataNormalMode(data.valuesDict, options,
                                                  data)
    else:
        normalizeDataContaminationMode(options, data)

    setAxisLimits(axDict['main'], axDict['crazy'], axDict['blowUp'],
                  data.xData, options, data)
    drawData(axDict['main'], axDict['crazy'], axDict['blowUp'], data.xData,
             data.valuesDict, options, data)
    drawLegend(options, data)
    drawAxisLabels(fig, options, data)

    setAxisLimits(axDict['main'], axDict['crazy'], axDict['blowUp'],
                  data.xData, options, data)

    establishTicks(axDict['main'], axDict['crazy'], axDict['blowUp'], options,
                   data)
    lpt.writeImage(fig, pdf, options)
def main():
    usage = (
        'usage: %prog --subStatsDir=path/to/dir/ [options]\n\n'
        '%prog takes in a directory of substitution stats files ( --subStatsDir )\n'
        'with filenames as NAME.subStats.[upper|lower].xml and produces a plot.'
    )
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    las.initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    las.checkOptions(options, parser)
    lpt.checkOptions(options, parser)

    if not options.outputRanks:
        fig, pdf = lpt.initImage(9., 11., options, data)
        axDict = establishAxes(fig, options, data)

    assembliesDict = {}
    assembliesDict = readSubStatsDir(assembliesDict, options)

    sumErrors(assembliesDict, options)
    normalizeData(assembliesDict, options)

    sortOrder = sorted(assembliesDict,
                       key=lambda key: assembliesDict[key].allLo,
                       reverse=False)

    if options.outputRanks:
        rankings(assembliesDict, sortOrder, options, data)
        return

    drawData(assembliesDict, sortOrder, axDict, options, data)

    lpt.writeImage(fig, pdf, options)
def main():
   usage = ( '%prog --file=file.txt --mode=[scaffPaths|contigs|contigPaths|blocks|contamination] [options]\n\n'
             '%prog takes an aggregate text file ( --file ) and a mode \n'
             '( --mode ) and then produces a pretty picture.' )
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( options, parser )
   lpt.checkOptions( options, parser )
   fig, pdf = lpt.initImage( 8.0, 10.0, options, data )
   axDict = establishAxes( fig, options, data )
   
   data.valuesDict = readFile( options.file, options )
   data.xData = data.valuesDict['columnLength']
   
   if options.mode != 'contamination':
      data.valuesDict = normalizeDataNormalMode( data.valuesDict, options, data )
   else:
      normalizeDataContaminationMode( options, data )

   setAxisLimits( axDict['main'], axDict['crazy'], 
                  axDict['blowUp'], data.xData, 
                  options, data )
   drawData( axDict['main'], axDict['crazy'], 
             axDict['blowUp'], data.xData, data.valuesDict, options, data )
   drawLegend( options, data )
   drawAxisLabels( fig, options, data )
   
   setAxisLimits( axDict['main'], axDict['crazy'], 
                  axDict['blowUp'], data.xData,
                  options, data )

   establishTicks( axDict['main'], axDict['crazy'], 
                   axDict['blowUp'], options, data )
   lpt.writeImage( fig, pdf, options )
Exemple #31
0
def main():
    usage = ('usage: %prog [options] file1.xml\n\n'
             '%prog takes in a copy number statistics file\n'
             'and creates an image file.')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(args, options, parser)
    lpt.checkOptions(options, parser)
    fig, pdf = lpt.initImage(11.0, 3.25, options, data)

    storedCategories = readFiles(options)

    establishGlobalMinMax(storedCategories, options, data)
    axDict = establishAxes(fig, storedCategories, options, data)

    drawData(axDict, storedCategories, options, data)
    drawLegend(options, data)
    drawAxisLabels(axDict, storedCategories, options, data)
    setAxisLimits(axDict, options, data)

    lpt.writeImage(fig, pdf, options)
def main():
    usage = (
        'usage: %prog --scaffoldsFile=sFile.txt --contigsFile=cFile.txt --size=N --title=TITLE\n\n'
        '%prog takes in a scaffolds file (--scaffoldsFile), a contigs\n'
        'file (--contigs), the size of the genome (--size) and a title (--title)\n'
        'and then produces an N50 style figure.')
    data = Data()
    parser = OptionParser(usage=usage)
    initOptions(parser)
    lpt.initOptions(parser)
    options, args = parser.parse_args()
    checkOptions(options, parser)
    lpt.checkOptions(options, parser)

    scaffolds = readFile(options.scaffoldsFile)
    contigs = readFile(options.contigsFile)

    pScaffs, pContigs = processData(scaffolds, contigs, options)
    fig, pdf = lpt.initImage(8.0, 5.0, options, data)
    ax = establishAxis(fig, options)

    drawData(pScaffs, pContigs, ax, options)

    lpt.writeImage(fig, pdf, options)
def main():
   usage = ( 'usage: %prog [options] file1.xml\n\n'
             '%prog takes in a copy number statistics file\n'
             'and creates an image file.' )
   data = Data()
   parser = OptionParser( usage=usage )
   initOptions( parser )
   lpt.initOptions( parser )
   options, args = parser.parse_args()
   checkOptions( args, options, parser )
   lpt.checkOptions( options, parser )
   fig, pdf = lpt.initImage( 11.0, 3.25, options, data )

   storedCategories = readFiles( options )
   
   establishGlobalMinMax( storedCategories, options, data )
   axDict = establishAxes( fig, storedCategories, options, data )
   
   drawData( axDict, storedCategories, options, data )
   drawLegend( options, data )
   drawAxisLabels( axDict, storedCategories, options, data )
   setAxisLimits( axDict, options, data )
   
   lpt.writeImage( fig, pdf, options )
def drawRef2(rexps, exps, options, outfile, numCats):
    options.out = outfile
    fig, pdf = libplot.initImage( 8.0, 10.0, options )
    axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] )

    if len(rexps) < 1:
        return
    
    sampleNotherRefmapped = []
    ref = ''
    for sample in rexps:
        if sample == 'average':
            continue
        e = rexps[sample]
        ref = e.ref
        sampleNotherRefmapped.append( (sample, e.total) )

    otherRefName = libplot.properName( ref )
    #Set title:
    #axes.set_title("Mapability of C. Ref. in Comparison to %s" % otherRefName)
    #HACK
    axes.set_title("Mapability of C. Ref. in Comparison to GRCh37 haplotypes")
    sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True )
    samples = [ item[0] for item in sampleNotherRefmapped]
    samples.append( 'average' )

    xdata = range( 0, len(samples) )
    colors = libplot.getColors4()
    c = -1
    #c = 0
    lines = []
    #titleDict = {'mapped':'Mapped', 'uniquelyMapped':'Uniquely Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired', 'snps':'Snp'}
    titleDict = {'mapped':'Mapped', 'properlyPaired':'Properly Paired', 'uniquelyMapped':'Uniquely Mapped', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired'}
    ydataList, miny, maxy = getData2(samples, rexps, exps, titleDict.keys())
    #ydataList, miny, maxy = getData2(samples, exps, titleDict.keys())
    
    #refs = sorted( ydataList.keys() )
    offset = 0.12
    scale = -1
    if miny > 1000:
        scale = len( str(int(miny)) ) - 1

    linenames = []
    categories = ["mapped", "properlyPaired", "uniquelyMapped", "uniquelyMappedAndProperlyPaired"]
    cats = categories[:numCats]
    for i, key in enumerate( cats ):
        xdatai = [ x + offset*i for x in xdata ]
        ydata = ydataList[key]
        if scale > 0:
            ydata = [ float(y)/10**scale for y in ydata ]
        
        c += 1
        l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none')
        lines.append(l)
        linenames.append( titleDict[key] )

    if scale > 0:
        miny = float(miny)/10**scale
        maxy = float(maxy)/10**scale

    #Draw horizontal line at y = 0:
    xmin = -0.4
    xmax = len(samples) - 1 + offset*len(linenames) + offset
    axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005)

    fontP = FontProperties()
    fontP.set_size('x-small')
    
    yrange = maxy - miny
    miny = miny - yrange*0.05
    maxy = maxy + yrange*0.2
    
    #Draw vertical lines to separate each sample:
    for i in xrange(1, len(samples)):
        d = (1 - offset*len(linenames))/2.0
        x = [i - d, i - d]
        y = [miny , maxy]
        axes.plot(x,y, color="#CCCCCC", linewidth=0.005)
    
    axes.set_xlim(xmin, xmax )
    axes.set_ylim( miny, maxy )
    #HACK:
    #axes.set_ylim( -2, 0 )
    libplot.editSpine( axes )

    axes.set_xticks( [ i + offset*(len(linenames)/2.0) for i in range(0, len(samples))] )
    axes.set_xticklabels( samples )
    for label in axes.xaxis.get_ticklabels():
        label.set_rotation(90)
    axes.xaxis.set_ticks_position( 'bottom' )
    axes.yaxis.set_ticks_position( 'left' )
    
    legend = pyplot.legend( lines, linenames, numpoints=1, loc='upper right', prop=fontP)
    legend._drawFrame = False

    axes.set_xlabel( 'Samples' )
    axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName) #NEED TO DO
    #axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and GRCh37 haplotypes')
    if scale > 0:
        axes.set_ylabel( 'Event counts (x%d)' %(10**scale) )
    #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    libplot.writeImage( fig, pdf, options )
def drawPlot(rexps, exps, options, outfile, type):
    options.out = outfile
    fig, pdf = libplot.initImage( 8.0, 10.0, options )
    axes = fig.add_axes( [0.12, 0.14, 0.85, 0.8] )

    #Set title:
    titleDict = {'mapped':'Mapped reads', 'uniquelyMapped':'Uniquely Mapped Reads', 'properlyPaired':'Properly Paired Reads', 'uniquelyMappedAndProperlyPaired':'Uniquely Mapped And Properly Paired Reads', 'snps':'SNPs'}
    axes.set_title( titleDict[type] )
    
    if len(rexps) < 1:
        return
    
    sampleNotherRefmapped = []
    ref = ''
    for sample in rexps:
        if sample == 'average':
            continue
        exp = rexps[sample]
        ref = exp.ref
        sampleNotherRefmapped.append( (sample, exp.total) )
    otherRefName = libplot.properName( ref )
    
    sampleNotherRefmapped = sorted( sampleNotherRefmapped, key=lambda item: item[1], reverse=True )
    samples = [ item[0] for item in sampleNotherRefmapped]
    samples.append( 'average' )

    xdata = range( 0, len(samples) )
    colors = libplot.getColors4()
    #c = -1
    c = 0
    lines = []
    ydataList, miny, maxy = getData(samples, exps, rexps, type)
    #print ydataList
    
    refs = sorted( ydataList.keys() )
    #miny = float('inf')
    #maxy = 0
    #offset = 0.075
    offset = 0.12
    #if type != 'snps':
    #    offset = 0
    #axes.set_yscale('log')
    scale = -1
    if miny > 1000:
        scale = len( str(int(miny)) ) - 1

    #Draw line connecting the data for each sample (each bin):
    binXdataList = [ [] for x in xdata ]
    binYdataList = [ [] for x in xdata ]
    for i, ref in enumerate(refs):
        xdatai = [ x + offset*i for x in xdata ]
        ydata = ydataList[ref]
        if scale > 0:
            ydata = [ float(y)/10**scale for y in ydata ]
        for j, x in enumerate(xdatai):
            binXdataList[j].append(x)
            binYdataList[j].append( ydata[j] )
    for i in xrange( len(binXdataList) ):
        axes.plot( binXdataList[i], binYdataList[i], color="#CCCCCC", linestyle='-', linewidth=0.005 )
    
    #Draw main plots:
    for i, ref in enumerate(refs):
        xdatai = [ x + offset*i for x in xdata ]
        ydata = ydataList[ref]
        if scale > 0:
            ydata = [ float(y)/10**scale for y in ydata ]
        
        c += 1
        l = axes.plot( xdatai, ydata, color=colors[c], marker='.', markersize=16.0, linestyle='none')
        lines.append(l)
    
    if scale > 0:
        miny = float(miny)/10**scale
        maxy = float(maxy)/10**scale

    #Draw horizontal line at y = 0:
    xmin = -0.4
    xmax = len(samples) - 1 + offset*len(refs) + offset
    axes.plot( [xmin, xmax], [0,0], color="#6B6B6B", linewidth=0.005)

    fontP = FontProperties()
    fontP.set_size('x-small')
    
    yrange = maxy - miny
    miny = miny - yrange*0.05
    maxy = maxy + yrange*0.2
    
    #Draw vertical lines to separate each sample:
    #for i in xrange(1, len(samples)):
    #    d = (1 - offset*len(refs))/2.0
    #    x = [i - d, i - d]
    #    y = [miny , maxy]
    #    axes.plot(x,y, color="#CCCCCC", linewidth=0.005)
    
    axes.set_xlim(xmin, xmax )
    axes.set_ylim( miny, maxy )
    libplot.editSpine( axes )

    axes.set_xticks( [ i + offset*(len(refs)/2.0) for i in range(0, len(samples))] )
    axes.set_xticklabels( samples )
    for label in axes.xaxis.get_ticklabels():
        label.set_rotation(90)
    axes.xaxis.set_ticks_position( 'bottom' )
    axes.yaxis.set_ticks_position( 'left' )
    
    properRefs = []
    for r in refs:
        if re.search('cactusRef', r):
            r = r.lstrip('cactusRef')
            properRefs.append( "%s %s" %(libplot.properName('cactusRef'), r))
        else:
            properRefs.append( libplot.properName(r) )

    legend = pyplot.legend( lines,properRefs, numpoints=1, loc='best', prop=fontP)
    legend._drawFrame = False

    axes.set_xlabel( 'Samples' )
    axes.set_ylabel( 'Percentage of mapping difference between C. Ref. and %s' % otherRefName)
    if scale > 0:
        axes.set_ylabel( 'Event counts (x%d)' %(10**scale) )
    #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    libplot.writeImage( fig, pdf, options )
def drawSamplePlot(rexps, exps, options, outfile, type):
    options.out = outfile
    fig, pdf = libplot.initImage( 11.2, 10.0, options )
    axes = fig.add_axes( [0.14, 0.12, 0.8, 0.8] )

    #Set title:
    axes.set_title( "SNP Rate Using BWA Mapping" )
    
    sampleNsize = []
    if len(rexps) < 1:
        return
    ref = ''
        
    for sample in rexps:
        if sample == 'average':
            continue
        exp = rexps[sample]
        ref = exp.ref
        #sampleNsize.append( (sample, exp.snps) )
        sampleNsize.append( (sample, exp.snprate) )
    otherRefName = ref

    sampleNsize = sorted( sampleNsize, key=lambda item: item[1], reverse=True )
    samples = [ item[0] for item in sampleNsize]
    samples.append( 'average' )

    #Get ydata:
    ydata1 = [] #otherRef (hg19, apd, ...)
    ydata2 = [] #cactusRef2
    for sample in samples:
        explist = exps[sample]
        otherRef = rexps[sample]
        ydata1.append( otherRef.snprate )
        for e in explist:
            if e.ref == 'cactusRef' and e.weight == 2:
                ydata2.append( e.snprate )

    miny = min([min(ydata1), min(ydata2)])
    maxy = max([max(ydata1), max(ydata2)])

    xdata = range( 0, len(samples) )
    #colors = ["#E31A1C", "#1F78B4"] #red, blue
    colors = ["#1F78B4", "#E31A1C"] #red, blue
    scale = -1
    if miny > 1000:
        scale = len( str(int(miny)) ) - 1
    if scale > 0:
        ydata1 = [ float(y)/10**scale for y in ydata1 ]
        ydata2 = [ float(y)/10**scale for y in ydata2 ]
    lines = []
    lines.append( axes.plot(xdata, ydata1, color=colors[0], marker=".", markersize=16.0, linestyle='none') )
    lines.append( axes.plot(xdata, ydata2, color=colors[1], marker=".", markersize=16.0, linestyle='none') )
    
    if scale > 0:
        miny = float(miny)/10**scale
        maxy = float(maxy)/10**scale

    fontP = FontProperties()
    fontP.set_size('x-small')
    axes.set_xlim(-0.4, len(samples) - 0.6 )
    
    yrange = maxy - miny
    miny = miny - yrange*0.05
    maxy = maxy + yrange*0.1
    axes.set_ylim( miny, maxy )

    libplot.editSpine( axes )

    axes.set_xticks( xdata )
    axes.set_xticklabels( samples )
    for label in axes.xaxis.get_ticklabels():
        label.set_rotation(90)
    axes.yaxis.set_ticks_position( 'left' )
    axes.xaxis.set_ticks_position( 'bottom' )
    
    legend = pyplot.legend( lines, [libplot.properName(otherRefName), libplot.properName("cactusRef")], numpoints=1, loc='best', prop=fontP)
    legend._drawFrame = False

    axes.set_xlabel( 'Samples' )
    axes.set_ylabel( 'SNPs Per Site' )
    if scale > 0:
        axes.set_ylabel( 'Snp counts (x%d)' %(10**scale) )
    axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    libplot.writeImage( fig, pdf, options )
def drawPlot2(exps, options, outfile, type):
    options.out = outfile
    fig, pdf = libplot.initImage( 11.2, 10.0, options )

    #Set title:
    titleDict = {'total':'Total Indels Called'}
    if 'All' not in exps:
        return
    samples = getSamplesOrder( exps['All'], type ) 
    if len( samples ) < 1:
        return

    samples.append('average')
    samples.append('reference')
    samples.append('panTro3')

    xdata = range( 0, len(samples) )
    colors = libplot.getColors6()
    c = -1
    lines = []
   
    pointsize = 10.0
    offset = 0.15
    exporder = ['All', 'No repeats']
    
    #Get ydata
    ydataList, ymin, ymax = getData(samples, exps, type, exporder)
    yrange = ymax - ymin

    #Get normal range and outlier range:
    normalvals, outliers = getOutliers(ydataList)
    minNormal = min(normalvals) - 0.05*yrange
    maxNormal = max(normalvals) + 0.05*yrange
    minOutlier = min(outliers) - 0.05*yrange
    maxOutlier = max(outliers) + 0.05*yrange
    if minNormal< 0:
        minNormal = -0.5

    #Set up the axes
    ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxNormal - minNormal)

    scale = -1
    if minNormal > 1000:
        scale = len( str(int(minNormal)) ) -1
    if scale > 0:
        for exp in ydataList:
            ydataList[exp] = [ float(y)/10**scale for y in ydataList[exp]]
    
    #PLOT
    for i, exp in enumerate(exporder):
        xdatai = [x + offset*i for x in xdata]
        ydata = ydataList[exp]
        c += 1
        #Outlier plot
        l = ax.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none')
        lines.append(l)
        #Normal range plot
        ax2.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none')

    xmin = -0.4
    xmax = len(samples) - 1 + offset*len(exps) + offset*3
    
    fontP = FontProperties()
    fontP.set_size('x-small')

    if scale > 0:
        minNormal = float(minNormal)/10**scale
        maxNormal = float(maxNormal)/10**scale
        minOutlier = float(minOutlier)/10**scale
        maxOutlier = float(maxOutlier)/10**scale
    
    #Draw the Discontinue sign:
    d = 0.2 #how big to make the diagonal lines in axes coordinates
    if scale == -1:
        d = 50
    ax.plot( (-1, 0), (minOutlier +d, minOutlier - d), color = "k", clip_on=False )
    ax2.plot( (-1, 0), (maxNormal +d, maxNormal - d), color = "k", clip_on=False )
    
    #Draw vertical lines to separate each sample:
    for i in xrange(1, len(samples)):
        d = (1 - offset*len(exporder))/2.0
        x = [i - d, i - d]
        y = [minNormal , maxOutlier]
        ax.plot(x,y, color="#CCCCCC", linewidth=0.005)
        ax2.plot(x,y, color="#CCCCCC", linewidth=0.005)
    
    xticklabels = [libplot.properName(s) for s in samples]
    
    #Set limit for the top plot (outlier)
    ax.set_ylim(minOutlier, maxOutlier)
    ax.set_xlim(xmin, xmax)
    ax.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] )
    dummyxticklabels = [ "" for l in xticklabels ]
    ax.set_xticklabels(dummyxticklabels)

    #Make sure the y ticks of the top plot is the same with the bottom plot:
    step = 2
    if scale == -1:
        step = 500
    ytickpositions = []
    ytickpos = 0
    while ytickpos < maxOutlier:
        if ytickpos >= minOutlier:
            ytickpositions.append(ytickpos)
        ytickpos += step
    ax.set_yticks(ytickpositions)

    #Set limit for the bottom plot:
    ax2.set_ylim(minNormal, maxNormal)
    ax2.set_xlim(xmin, xmax)
 
    #Hide the spines between ax and ax2:
    ax.spines['bottom'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.yaxis.set_ticks_position('left')
    ax.xaxis.set_ticks_position('none')

    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    ax2.xaxis.tick_bottom()
    ax2.yaxis.set_ticks_position( 'left' )

    ax2.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] )
    ax2.set_xticklabels( xticklabels ) 
    
    for label in ax2.xaxis.get_ticklabels():
        label.set_rotation(75)
    
    legend = pyplot.legend( lines, exporder, numpoints=1, loc='upper left', prop=fontP)
    legend._drawFrame = False

    ax2.set_xlabel( 'Samples' )
    ylabel = 'Number of indels'
    if scale > 0:
        ylabel += '(x%d)' %10**scale
    ax2.set_ylabel(ylabel)
    ax.set_title( titleDict[type] )
    
    ax.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    libplot.writeImage( fig, pdf, options )
def drawPlot(exps, options, outfile, type):
    options.out = outfile
    fig, pdf = libplot.initImage( 11.2, 10.0, options )
    axes = fig.add_axes( [0.12, 0.18, 0.85, 0.75] )

    #Set title:
    titleDict = {'tpfn':'Indel Overlap with dbSNP', 'tp':'True Positives According to dbSNP', 'tp2':'Indel Overlap with dbSNP', 'fn':'False Negatives According to dbSNP', 'total':'Total Indels Called'}
    axes.set_title( titleDict[type] )
    if 'All' not in exps:
        return
    samples = getSamplesOrder( exps['All'], type ) 
    if len( samples ) < 1:
        return

    samples.append('average')
    if type != 'fn':
        samples.append('reference')
        samples.append('panTro3')

    xdata = range( 0, len(samples) )
    colors = libplot.getColors6()
    c = -1
    lines = []
   
    pointsize = 10.0
    offset = 0.15
    #exporder = ['All', 'No repeats', 'Wobble', 'Wobble, No repeats']
    exporder = ['Wobble', 'Wobble, No repeats', 'All', 'No repeats']
    if type == 'fn':
        exporder = ['Wobble', 'All']
    elif type == 'total':
        exporder = ['All', 'No repeats']
    elif type == 'tp2' or type == 'tpfn':
        exporder = ['Wobble', 'Wobble, No repeats']
        #pointsize = 16.0
        #offset = 0.3
    
    #Get ydata
    ydataList, ymin, ymax = getData(samples, exps, type, exporder)

    scale = -1
    if ymin > 1000:
        scale = len( str(int(ymin)) ) -1
    if scale > 0:
        for exp in ydataList:
            ydataList[exp] = [ float(y)/10**scale for y in ydataList[exp]]
    
    if type == 'tpfn':
        for j,t in enumerate(['tp', 'fn']):
            for i, exp in enumerate(exporder):
                if i > 0 and t == 'fn':
                    continue
                if t == 'tp':
                    xdatai = [x + offset*(j*2+i) for x in xdata]
                else:
                        xdatai = [x + offset*(j*2+i) for x in xdata[: len(xdata) -3]]
                ydata = ydataList["%s.%s" %(exp,t)]
                c +=1
                lines.append(axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none'))
    else:
        for i, exp in enumerate(exporder):
            xdatai = [x + offset*i for x in xdata]
            ydata = ydataList[exp]
            c += 1
            l = axes.plot(xdatai, ydata, color=colors[c], marker='.', markersize=pointsize, linestyle='none')
            if type == 'fn':
                c += 1
            lines.append(l)

    xmin = -0.4
    xmax = len(samples) - 1 + offset*len(exps) + offset*3
    
    fontP = FontProperties()
    fontP.set_size('x-small')

    if scale > 0:
        ymin = float(ymin)/10**scale
        ymax = float(ymax)/10**scale
    datarange = ymax -ymin
    ymin = ymin - datarange*0.01
    ymax = ymax + datarange*0.01
    
    #Draw vertical lines to separate each sample:
    for i in xrange(1, len(samples)):
        d = (1 - offset*len(exporder))/2.0
        x = [i - d, i - d]
        y = [ymin , ymax]
        axes.plot(x,y, color="#CCCCCC", linewidth=0.005)
    
    axes.set_xlim(xmin, xmax)
    axes.set_ylim(ymin, ymax)
    libplot.editSpine( axes )
 
    axes.set_xticks( [ i + offset*(len(exps)/2-1) for i in range(0, len(samples))] )
    axes.set_xticklabels( [ libplot.properName(s) for s in samples] )
    for label in axes.xaxis.get_ticklabels():
        label.set_rotation(90)
    axes.xaxis.set_ticks_position( 'bottom' )
    axes.yaxis.set_ticks_position( 'left' )
    
    if type == 'tp':
        legend = pyplot.legend(lines, ['All', 'No repeats', 'No wobble', 'No wobble, No repeats'], numpoints=1, loc='best', prop=fontP)
    elif type == 'fn':
        legend = pyplot.legend( lines, ['All', 'No wobble'], numpoints=1, loc='best', prop=fontP)
    elif type == 'tpfn':
        legend = pyplot.legend(lines, ['All, TP', 'No repeats, TP', 'All, FN'], numpoints=1, loc='best', prop=fontP)
    elif type == 'tp2':
        legend = pyplot.legend( lines, ['All', 'No repeats'], numpoints=1, loc='best', prop=fontP)
    else:
        legend = pyplot.legend( lines, exporder, numpoints=1, loc='best', prop=fontP)
    legend._drawFrame = False

    axes.set_xlabel( 'Samples' )
    ylabel = "Percentage"
    if type == 'total':
        ylabel = 'Number of indels'
    if scale > 0:
        ylabel += '(x%d)' %10**scale
    axes.set_ylabel(ylabel)

    axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    libplot.writeImage( fig, pdf, options )
Exemple #39
0
def drawPlot( options, samples1, samples2, type ):
    #Sorted in decreasing order of errorPerSite in samples1
    if type == 'insertion':
        samples1 = sorted( samples1, key=lambda s:s.ins, reverse=True )
    else:
        samples1 = sorted( samples1, key=lambda s:s.dels, reverse=True )
    if len( samples1 ) < 1:
        return
    
    #remove chimpSample:
    chimpSample = None
    for i, s in enumerate(samples1):
        if s.name == 'panTro3':
            chimpSample = samples1.pop(i)
            break

    refname1 = samples1[0].refname
    refname2 = samples2[0].refname

    y1data = [ s.ins for s in samples1 ]
    if type == 'deletion':
        y1data = [ s.dels for s in samples1 ]
    xticklabels = [ s.name for s in samples1 ]
    
    #indel of refname1 w.r.t itself (0)
    y1data.append(0)
    xticklabels.append(refname1)

    y2data = []
    for name in xticklabels:
        if name == refname2:#indel of refname2 w.r.t itself (0)
            y2data.append(0)
        for s in samples2:
            if s.name == name:
                if type == 'insertion':
                    y2data.append(s.ins)
                else:
                    y2data.append(s.dels)
                break
    
    if len(xticklabels) != len(y2data):
        sys.stderr.write("Input file 1 and 2 do not have the same set of samples\n")
        sys.exit( 1 )

    #add the average column:
    num = 1
    y1avr = sum(y1data)/float(len(y1data) - 1)
    y1data.append(y1avr)
    xticklabels.append('average')
    y2avr = sum(y2data)/float(len(y2data) - 1)
    y2data.append(y2avr)
    print "%s Average: %s %f, %s %f" %(type, refname1, y1avr, refname2, y2avr)

    #Add chimp:
    samples1.append(chimpSample)
    if type == 'insertion':
        y1data.append( chimpSample.ins )
    else:
        y1data.append( chimpSample.dels )
    for s in samples2:
        if s.name == 'panTro3':
            if type == 'insertion':
                y2data.append(s.ins)
            else:
                y2data.append(s.dels)
    xticklabels.append("panTro3")

    minMajority = min( [min(y2data), min(y1data)] ) - 0.0001
    maxMajority = max( [max(y2data), max(y1data)] ) + 0.0001

    basename = os.path.basename(options.files[0])
    options.out = os.path.join( options.outdir, '%s_%s' %( type, basename.lstrip('pathStats').lstrip('_').rstrip('.xml') ) )
    fig, pdf = libplot.initImage( 11.2, 10.0, options )
    #ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxMajority - minMajority)
    ax2 = fig.add_axes( [0.15, 0.15, 0.8, 0.8] )

    l2 = ax2.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" )
    l1 = ax2.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" )
    
    #Legend
    fontP = FontProperties()
    fontP.set_size("x-small")
    legend = ax2.legend([l1, l2], [libplot.properName(refname1), libplot.properName(refname2)], 'upper right', numpoints=1, prop=fontP)
    legend._drawFrame = False
            
    ax2.set_ylim( minMajority, maxMajority )
    ax2.set_xlim( -0.5, len(xticklabels) -0.5 )

    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    ax2.xaxis.tick_bottom()
    ax2.yaxis.set_ticks_position( 'left' )

    ax2.set_xticks( range( 0, len(xticklabels) ) )
    properxticklabels = [ libplot.properName(l) for l in xticklabels ]
    ax2.set_xticklabels( properxticklabels )

    for label in ax2.xaxis.get_ticklabels():
        label.set_rotation( 90 )
   
    ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    ax2.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)

    ax2.set_xlabel( 'Samples' )
    title = 'Deletions'
    #if type == 'insertion':
    if type == 'insertion':
        ax2.set_ylabel( 'Insertions per site' )
        title = 'Insertions'
    else:
        ax2.set_ylabel( 'Deletions per site' )
    ax2.set_title( title )
    
    libplot.writeImage( fig, pdf, options )
Exemple #40
0
def drawSnpPlot(options, samples1, samples2):
    #All the samples sorted indecreasing order of SNP rate, then average, then chimp
    samples1 = sorted( samples1, key=lambda s:s.errPerSite, reverse=True )
    if len( samples1 ) < 1:
        return
    
    chimpSample = None
    #remove chimpSample
    for i, s in enumerate(samples1):
        if s.name == "panTro3":
            chimpSample = samples1.pop(i)
            break
    refname1 = samples1[0].refname
    refname2 = samples2[0].refname

    y1data = [ s.errPerSite for s in samples1 ]
    xticklabels = [s.name for s in samples1]
    
    y1data.append(0) #snps of refname1 w.r.t itself (which is 0)
    xticklabels.append(refname1)

    y2data = []
    for name in xticklabels:
        if name == refname2:
            y2data.append(0) #snps of refname2 w.r.t itself
        for s2 in samples2:
            if s2.name == name:
                y2data.append(s2.errPerSite)
                break

    if len(y1data) != len(y2data):
        sys.stderr.write("Input files have different number of samples: %d, %d\n" %(len(y1data), len(y2data)))
        sys.exit(1)
    
    #add the average column:
    if len(y1data) >= 2:
        y1avr = sum(y1data)/float(len(y1data) -1)
        y1data.append(y1avr)
        y2avr = sum(y2data)/float(len(y2data) -1)
        y2data.append(y2avr)
        
        #Print summary stats to stderr:
        sys.stderr.write("%s\t%f\t%f\t%f\n" %( refname1, sorted(y1data)[1] , max(y1data), y1avr ))
        sys.stderr.write("%s\t%f\t%f\t%f\n" %( refname2, sorted(y2data)[1] , max(y2data), y2avr ))
        

    xticklabels.append('average')
    
    #add chimp:
    samples1.append(chimpSample)
    y1data.append( chimpSample.errPerSite )
    for s in samples2:
        if s.name == 'panTro3':
            y2data.append( s.errPerSite )
    xticklabels.append( 'panTro3' )

    #Min, max values:
    num = options.numOutliners
    numcols = len(y1data)
    
    minOutlier = min( [ min(y1data[numcols -num:]), min(y2data[numcols - num:]) ] ) - 0.001
    maxOutlier = max( [ max(y1data[numcols -num:]), max(y2data[numcols - num:]) ] ) + 0.001
    minMajority = min( [min(y1data[:numcols - num]), min(y2data[: numcols - num])] ) - 0.001
    maxMajority = max( [max(y1data[:numcols - num]), max(y2data[: numcols - num])] ) + 0.001
    if minMajority < 0:
        minMajority = -0.0001

    #Set up
    basename = os.path.basename(options.files[0])
    options.out = os.path.join(options.outdir, '%s' %(basename.lstrip('snpStats').lstrip('_').rstrip('.xml')) )
    fig, pdf = libplot.initImage( 11.2, 10.0, options )
    ax, ax2 = setAxes(fig, maxOutlier - minOutlier, maxMajority - minMajority)

    #Plot the outliers:
    l2 = ax.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" )#Red
    l1 = ax.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" )#Blue

    ax2.plot( y2data, marker='.', markersize=14.0, linestyle='none', color="#E31A1C" )
    ax2.plot( y1data, marker='.', markersize=14.0, linestyle='none', color="#1F78B4" )
  
    #Legend
    fontP = FontProperties()
    fontP.set_size("x-small")
    legend = ax.legend([l1, l2], [libplot.properName(refname1), libplot.properName(refname2)], 'upper left', numpoints=1, prop=fontP)
    legend._drawFrame = False

    d = .0001 # how big to make the diagonal lines in axes coordinates
    ax.plot( (-1, 0), (minOutlier +d, minOutlier - d), color = "k", clip_on=False )
    ax2.plot( (-1, 0), (maxMajority +d, maxMajority - d), color = "k", clip_on=False )
    
    ax.set_ylim( minOutlier, maxOutlier ) # outliers only
    ax.set_xlim( -0.5, len(xticklabels) -0.5 )
    dummyxticklabels = [ "" for l in xticklabels ]
    ax.set_xticklabels( dummyxticklabels )
    
    #Make sure the y ticks of the top plot (the outlier plot) is the same with the other plot:
    step = 0.001
    ytickpositions = []
    ytickpos = 0
    while ytickpos < maxOutlier:
        if ytickpos >= minOutlier:
            ytickpositions.append(ytickpos)
        ytickpos += step
    ax.set_yticks( ytickpositions )
        
    ax2.set_ylim( minMajority, maxMajority )
    ax2.set_xlim( -0.5, len(xticklabels) -0.5 )

    # hide the spines between ax and ax2
    ax.spines['bottom'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.yaxis.set_ticks_position( 'left' )
    ax.xaxis.set_ticks_position( 'none' )

    ax2.spines['top'].set_visible(False)
    ax2.spines['right'].set_visible(False)
    ax2.xaxis.tick_bottom()
    ax2.yaxis.set_ticks_position( 'left' )

    ax2.set_xticks( range( 0, len(xticklabels) ) )
    properxticklabels = [ libplot.properName(l) for l in xticklabels ]
    ax2.set_xticklabels( properxticklabels )
    #Make sure the x ticks of the top plot is the same with the other plot:
    ax.set_xticks( range(0, len(xticklabels)) )

    for label in ax2.xaxis.get_ticklabels():
        label.set_rotation( 90 )
   
    ax.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    ax.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    ax2.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    ax2.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)

    ax2.set_xlabel( 'Samples' )
    ax2.set_ylabel( 'SNPs per site' )
    title = 'SNPs'
    ax.set_title( title )
    
    libplot.writeImage( fig, pdf, options )