Exemple #1
0
def drawPca(rownames, transformedM, outfile, options):
    #Draw V usage distribution of each sample onto the same pdf
    options.out = outfile
    fig, pdf = immunoseqLib.initImage( 12.5, 8.0, options )
    axes = immunoseqLib.setAxes(fig)
    drawPcaData( axes, rownames, transformedM, options )
    immunoseqLib.writeImage( fig, pdf, options )
def drawOverlapReads(exps, options):
    sample = options.sample
    options.out = os.path.join(options.outdir, "overlapPlot-%s" % sample)
    fig, pdf = iseqlib.initImage( 10.0, 12.0, options )
    axes = iseqlib.setAxes(fig)
    drawOverlapReadsData(axes, exps, sample, options.sampleOrder)
    iseqlib.writeImage(fig, pdf, options)
def drawClonesizeDist(outdir, options, sample2data):
    options.out = os.path.join( outdir, "cloneVsRead" )
    fig, pdf = iseqlib.initImage( 10.0, 8.0, options )
    axes = fig.add_axes( [0.12, 0.15, 0.85, 0.75] )

    lines = []
    sampleNames = sorted(sample2data.keys())
    #draw
    colors = iseqlib.getColors6()
    lightColors = iseqlib.getColors6light()
    markers = ['o', '*', 's', 'd', '^', 'p', 'v']
    c = 0

    xdata = []
    xticklabels = []
    for sample in sampleNames:
        #xticklabels, ydata = binData( sample2freqs[sample] )
        (xticklabels, ydata) = sample2data[sample]
        print sample
        print xdata
        print ydata
        xdata = range(0, len(xticklabels), 1)
        markersize = 10.0
        m = markers[c]
        if m == '*':
            markersize = 12.0
        elif m == 's':
            markersize = 8.0
        
        l = axes.plot(xdata, ydata, color=colors[c], marker=m, markeredgecolor=colors[c], markersize=markersize, linestyle='none')
        axes.plot(xdata, ydata, color=lightColors[c], linestyle='-', linewidth=0.2)
        lines.append(l)
        c += 1
    
    iseqlib.editSpine(axes)
    
    axes.set_title('Clone Size Distribution', size='xx-large')
    axes.set_xlabel('Clone size as percentage of total reads', size='large')
    axes.set_ylabel('Percentage of total clones', size='large')
    
    fontP = FontProperties()
    fontP.set_size('medium')
    legend = axes.legend( lines, sampleNames, numpoints=1, loc='best', ncol=1, prop=fontP)
    legend._drawFrame = False
    
    axes.xaxis.set_ticklabels( xticklabels )
    axes.xaxis.set_ticks( [x - 0.5 for x in xdata] )
    axes.set_xlim(-0.5, len(xdata) - 0.5)
    
    axes.set_ylim(-0.5, 50)
    for label in axes.get_xticklabels():
        label.set_fontsize('medium')
        label.set_rotation(45)
    for label in axes.get_yticklabels():
        label.set_fontsize('medium')
    
    axes.yaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)
    axes.xaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)

    iseqlib.writeImage(fig, pdf, options)
Exemple #4
0
def drawUsageDist( samples, stds, options, genetype, intersectGenes ):
    #Draw V usage distribution of each sample onto the same pdf
    options.out = os.path.join( options.outdir, "%sUsage" %genetype)
    fig, pdf = immunoseqLib.initImage( 10.0, 12.0, options )
    axesList = setUsageDistAxes( fig, len(samples), options.samplesPerPlot )
    drawUsageData( axesList, samples, stds, options, genetype, intersectGenes )
    immunoseqLib.writeImage( fig, pdf, options )
Exemple #5
0
def drawVJ( sample, vgenes, jgenes, options, minvj, maxvj ):
    options.out = os.path.join( options.outdir, "%s-vjUsage" %sample.name )
    fig, pdf = immunoseqLib.initImage( 8.0, 10.0, options )
    axes = immunoseqLib.setAxes(fig)
    #axesList = setCompareAxes( fig )

    drawVJdata( fig, axes, sample, vgenes, jgenes, options, minvj, maxvj )
    immunoseqLib.writeImage( fig, pdf, options )
def drawCloneVsRead( samples, options, isAbs ):
    options.out = os.path.join( options.outdir, "cloneVsRead" )
    if not isAbs:
        options.out = os.path.join( options.outdir, "cloneVsRead-Rel" )
    #fig, pdf = libplot.initImage( 10.0, 8.0, options )
    fig, pdf = iseqlib.initImage( 10.0, 8.0, options )
    axesList = setUsageDistAxes( fig, len(samples), options.samplesPerPlot )
    drawCloneVsReadData( axesList, samples, options.samplesPerPlot, options, isAbs )
    #libplot.writeImage( fig, pdf, options )
    iseqlib.writeImage( fig, pdf, options )
def drawCloneSizeDist( samples, options, isAbs, yaxisPcReads, yaxisPcClones, cumulative ):
    options.out = os.path.join( options.outdir, "cloneSizeDist" )
    if cumulative:
        options.out += "-cumulative"
    if not isAbs:
        options.out = os.path.join( options.outdir, "cloneSizeDist-Rel" )
    if yaxisPcReads:
        options.out += "-pcReads"
    if yaxisPcClones:
        options.out += "-pcClones"
    #fig, pdf = libplot.initImage( 10.0, 8.0, options )
    fig, pdf = iseqlib.initImage( 10.0, 8.0, options )
    axesList = setUsageDistAxes( fig, len(samples), options.samplesPerPlot )
    drawCloneSizeData( axesList, samples, options.samplesPerPlot, options, isAbs, yaxisPcReads, yaxisPcClones, cumulative )
    #libplot.writeImage( fig, pdf, options )
    iseqlib.writeImage( fig, pdf, options )
def drawCombine( samples, options ):
    options.out = os.path.join(options.outdir, "combine")
    fig, pdf = iseqlib.initImage( 8.0, 12.0, options )
    #fig, (ax1, ax2, ax3) = plt.subplots(nrows=3, ncols=1) 
    fig.subplots_adjust(hspace=.5)

    #drawCloneSizeDist( samples, options, isAbs, yaxisPcReads, yaxisPcClones, cumulative )
    ax1 = fig.add_subplot(311)
    drawCloneSizeData( [ax1], samples, options.samplesPerPlot, options, False, False, True, True )
    
    ax2 = fig.add_subplot(312)
    drawCloneSizeData( [ax2], samples, options.samplesPerPlot, options, False, True, False, True )
    
    ax3 = fig.add_subplot(313)
    #drawCloneVsRead( samples, options, True )
    drawCloneVsReadData( [ax3], samples, options.samplesPerPlot, options, True )

    #Write to output file  
    #import matplotlib.backends.backend_pdf as pltBack
    #pdf = None
    #if options.outFormat == 'pdf' or options.outFormat == 'all':
    #    pdf = pltBack.PdfPages( options.out + '.pdf' )
    iseqlib.writeImage( fig, pdf, options )
Exemple #9
0
def drawDist(sam2nt2aa, options):
    options.out = os.path.join(options.outdir, 'nt2aa')
    fig, pdf = iseqlib.initImage(10.0, 10.0, options)
    axes = iseqlib.setAxes(fig)
    drawDistData(axes, sam2nt2aa)
    iseqlib.writeImage(fig, pdf, options)
Exemple #10
0
def drawDist(samples, options):
    options.out = os.path.join(options.outdir, "sharedSeqsDist")
    fig, pdf = iseqlib.initImage(10.0, 10.0, options)
    axes = iseqlib.setAxes(fig)
    drawDistData(axes, samples)
    iseqlib.writeImage(fig, pdf, options)
Exemple #11
0
def drawAll(options, outdir, rowname2cells, index2colname):
    if options.infile == '-':
        outname = 'all'
    else:
        outname = os.path.basename(options.infile).split('.')[0]
    options.out = os.path.join(outdir, outname)
    fig, pdf = iseqlib.initImage(10.0, 8.0, options)
    axes = fig.add_axes( [0.12, 0.15, 0.85, 0.75] )

    lines = []
    #rownames = sorted( rowname2cells.keys() )
    rownames = ['as11D', 'as16D', 'as1D', 'asBD', 'as20D', 'as15D', 'as8D']
    name2color = sample2color(rownames)
    
    #xdata = sorted( [ int(x) for x in colname2index.keys() ] )
    xmax = 0.0
    xmin = float('inf')
    #ymax = 0
    markersize = 12.0
    xindices = []
    #for rowname in rownames:
    r = 0
    while r < len(rownames):
        rowname = rownames[r]
        row = rowname2cells[rowname]
        means = []
        stds = []
        xdata = []
        for i, m in enumerate( row ):
            if i % 2 == 0 and m != 'NA' and m != '' and m != '-':
                colname = index2colname[i]
                try:
                    colname = int(colname)
                    xdata.append( colname )
                except:
                    xdata.append(i/2)
                    if i not in xindices:
                        xindices.append(i)
                means.append( float(m) )

            elif i%2 == 1 and m != 'NA' and m != '' and m != '-':
                stds.append( float(m) )
        xmax = max([xmax, max(xdata)])
        xmin = min([xmin, min(xdata)])
         
        #HACK
        #if rowname == 'uniqClones':
        #if rowname == 'mountford' or rowname == 'horn':
        #if rowname != 'horn':
        #if rowname == 'manhattan' or rowname == 'euclidean' or rowname == 'binomial':
        exceptions = ['manhattan', 'euclidean', 'binomial', 'kulczynski', 'canberra', 'jaccard']
        if rowname in exceptions:
            rownames.remove(rowname)
            continue
        #means = [m/min(means) for m in means]
        #stds = [0.0 for s in stds]
        #END HACK
        color = name2color[rowname]
        axes.errorbar(xdata, means, yerr=stds, color=color, markeredgecolor=color, markersize=markersize, fmt='.')
        line = axes.plot(xdata, means, color=color, linestyle='-', linewidth=4.0)
        lines.append(line)
        r += 1
    
    #axes.set_xscale('log')
    #axes.set_yscale('log')
    iseqlib.editSpine(axes)
    #axes.set_title("%s index across different sampling sizes" %outname, size='xx-large')
    #axes.set_xlabel("Sampling size (number of reads)", size='large' )
    #axes.set_ylabel("%s index" %outname, size='large')
    
    #HACK
    axes.set_title("Sequencing Saturation", size='xx-large', weight='bold')
    axes.set_xlabel("Sampling size (number of sequences, in millions) ", size='x-large', weight='bold' )
    #axes.set_xlabel("Sampling size (number of sequences, in thousands) ", size='x-large', weight='bold' )
    axes.set_ylabel("Number of clones (in thousands)", size='x-large', weight='bold')
    
    fontP = FontProperties()
    fontP.set_size('medium')
    #rownames = ["A", "B"]
    rownames = [iseqlib.properName(n) for n in rownames]
    legend = axes.legend( lines, rownames, numpoints = 1, loc='best', ncol = 1, prop=fontP)
    legend._drawFrame = False

    if len(xindices) > 0:
        xticklabels = []
        for i in sorted(xindices):
            xticklabels.append( index2colname[i] )
        axes.xaxis.set_ticks( xrange(len(xticklabels)) )
        axes.xaxis.set_ticklabels( xticklabels )

    #HACK:
    #xticks = [ 10000, 50000, 100000, 200000, 300000, 400000, 500000]
    #xticklabels = [ str(x/1000) for x in xticks]
    xticks = [0, 1000000,2000000,3000000,4000000,5000000,6000000,7000000,8000000,9000000]
    xticklabels = [ str(x) for x in xrange(0, 10) ]
    axes.xaxis.set_ticks(xticks)
    axes.xaxis.set_ticklabels( xticklabels )
    
    #yticks = xrange(0, 121000, 20000)
    #yticklabels = [ str(y) for y in xrange(0, 121, 20) ]
    yticks = xrange(0, 250000, 50000)
    yticklabels = [ str(y) for y in xrange(0, 250, 50) ]
    axes.yaxis.set_ticks(yticks)
    axes.yaxis.set_ticklabels(yticklabels)

    
    for label in axes.get_xticklabels():
        label.set_fontsize('large')
        label.set_fontweight ('bold')
        #label.set_rotation(45)
    for label in axes.get_yticklabels():
        label.set_fontsize('large')
        label.set_fontweight ('bold')
        
    axes.xaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05)
    axes.yaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05)
    #HACK
    #axes.set_ylim(0.996, 1)
    xspan = xmax - xmin
    #axes.set_xlim(xmin - xspan*0.01, xmax + xspan*0.01)
    #axes.set_ylim(10000, 121000)
    
    #axes.set_xlim(-10, 110000)
    #axes.set_ylim(-10, 60000)
    
    axes.set_xlim(-10, 5100000)
    axes.set_ylim(-10, 205000)
    
    iseqlib.writeImage(fig, pdf, options)