Ejemplo n.º 1
0
def drawClonesizeDist(outdir, options, sample2data):
    options.out = os.path.join( outdir, "cloneVsRead" )
    fig, pdf = iseqlib.initImage( 10.0, 8.0, options )
    axes = fig.add_axes( [0.12, 0.15, 0.85, 0.75] )

    lines = []
    sampleNames = sorted(sample2data.keys())
    #draw
    colors = iseqlib.getColors6()
    lightColors = iseqlib.getColors6light()
    markers = ['o', '*', 's', 'd', '^', 'p', 'v']
    c = 0

    xdata = []
    xticklabels = []
    for sample in sampleNames:
        #xticklabels, ydata = binData( sample2freqs[sample] )
        (xticklabels, ydata) = sample2data[sample]
        print sample
        print xdata
        print ydata
        xdata = range(0, len(xticklabels), 1)
        markersize = 10.0
        m = markers[c]
        if m == '*':
            markersize = 12.0
        elif m == 's':
            markersize = 8.0
        
        l = axes.plot(xdata, ydata, color=colors[c], marker=m, markeredgecolor=colors[c], markersize=markersize, linestyle='none')
        axes.plot(xdata, ydata, color=lightColors[c], linestyle='-', linewidth=0.2)
        lines.append(l)
        c += 1
    
    iseqlib.editSpine(axes)
    
    axes.set_title('Clone Size Distribution', size='xx-large')
    axes.set_xlabel('Clone size as percentage of total reads', size='large')
    axes.set_ylabel('Percentage of total clones', size='large')
    
    fontP = FontProperties()
    fontP.set_size('medium')
    legend = axes.legend( lines, sampleNames, numpoints=1, loc='best', ncol=1, prop=fontP)
    legend._drawFrame = False
    
    axes.xaxis.set_ticklabels( xticklabels )
    axes.xaxis.set_ticks( [x - 0.5 for x in xdata] )
    axes.set_xlim(-0.5, len(xdata) - 0.5)
    
    axes.set_ylim(-0.5, 50)
    for label in axes.get_xticklabels():
        label.set_fontsize('medium')
        label.set_rotation(45)
    for label in axes.get_yticklabels():
        label.set_fontsize('medium')
    
    axes.yaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)
    axes.xaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)

    iseqlib.writeImage(fig, pdf, options)
Ejemplo n.º 2
0
def drawVJdata( fig, axes, sample, vgenes, jgenes, options, minvj, maxvj ):
    #Draw heatmap:
    #data = sample.normalizeVJusage()
    if options.abs:
        data = sample.intersectVJusage
    else:
        data = sample.normIntersectVJusage

    #Normalize data to the range minvj-maxvj:
    if options.heatmapNoScale:
        hmaxes = axes.imshow( data, interpolation='nearest' )
    else:
        norm = mpl.colors.Normalize(vmin=minvj, vmax=maxvj)
        cmap = mpl.cm.get_cmap('rainbow', 20)
        hmaxes = axes.imshow( data, interpolation='nearest', cmap=cmap, norm=norm )
        
    #Colorbar:
    #from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    #axins = inset_axes(axes, width="5%", height="10%", loc=3, bbox_to_anchor=(1.05, 0, 1, 1), bbox_transform=axes.transAxes, borderpad=0)
    #matplotlib.pyplot.colorbar(hmaxes, cax=axins, ticks=[0, 0.5, 1])
    cbar = fig.colorbar(hmaxes, shrink=0.3)
    #cbar.ax.set_yticklabels(['0', '0.5', '1'])

    immunoseqLib.editSpine( axes )
    axes.set_title( "VJ usage of sample %s" % immunoseqLib.properName(sample.name) )
    xticks = [ x for x in range( len(jgenes) ) ]
    axes.xaxis.set_ticks( xticks )
    for i, xlabel in enumerate(jgenes):
        items = xlabel.split('|')
        items = [item.lstrip('TRB') for item in items]
        jgenes[i] = '|'.join(items)
    axes.xaxis.set_ticklabels( jgenes )
    
    yticks = [ y for y in range( len(vgenes) ) ]
    axes.yaxis.set_ticks( yticks )
    for i, ylabel in enumerate(vgenes):
        items = ylabel.split('|')
        items = [item.lstrip('TRB') for item in items]
        vgenes[i] = '|'.join(items)
    axes.yaxis.set_ticklabels( vgenes )
    
    textsize = 'x-small'
    for label in axes.get_xticklabels():
        label.set_fontsize( textsize )
        label.set_rotation( 80 )
    for label in axes.get_yticklabels():
        label.set_fontsize( textsize )
Ejemplo n.º 3
0
def drawDistData(axes, sam2nt2aa):
    #sam2nt2aa = getNumNtPerAa(samples)
    
    lines = []
    labels = []
    colors = iseqlib.getColors6()
    if len(colors) < len(sam2nt2aa.keys()):
        colors.extend( iseqlib.getColors6light() )
    if len(colors) < len(sam2nt2aa.keys()):
        colors.extend( iseqlib.getColors6dark() )
    #colors = iseqlib.getColors0()
    #lightColors = getColors6light()
    markersize = 10.0
    c = -1
    axes.set_yscale('log')
    for s in sorted( sam2nt2aa.keys() ):
        nt2aa = sam2nt2aa[s]
        
        c += 1
        xdata = sorted( nt2aa.keys() )
        ydata = [ nt2aa[x] for x in xdata ]
        
        line = axes.plot(xdata, ydata, color=colors[c], marker='o', markeredgecolor=colors[c], markersize = markersize, linestyle='-', linewidth=2)
        #axes.plot(xdata, ydata, color=lightColors[c], linestyle='-', linewidth=0.5)
        lines.append(line)
        labels.append(s)
        print s
        print xdata
        print ydata
    
    xticks = xrange(0,8,1)
    xticklabels = [ str(x) for x in xticks]
    axes.xaxis.set_ticks(xticks)
    axes.xaxis.set_ticklabels( xticklabels )
    #axes.set_xlim(0.5, 5.5)
    #axes.set_ylim(-0.1, 40)

    axes.set_title('Nucleotide sequences to amino acid sequence', size="xx-large")
    iseqlib.editSpine( axes )
    axes.set_xlabel("Number of nucleotide sequences", size='x-large')
    #axes.set_ylabel("Number of amino acid sequences", size='x-large')
    axes.set_ylabel("Percentage of amino acid sequences", size='x-large')
    legend = axes.legend( lines, labels, numpoints=1, loc='best', ncol=1)
    legend.__drawFrame = False
    axes.yaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)
    axes.xaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)
Ejemplo n.º 4
0
def drawDistData(axes, sample2dist):
    #sample2dist = getSharedSeqDist(samples, uniq)
    
    lines = []
    labels = []
    colors = iseqlib.getColors6()
    #lightColors = getColors6light()
    markersize = 10.0
    c = -1
    xmax = 0
    for s in sorted( sample2dist.keys() ):
        numsam2count = sample2dist[s]
        
        c += 1
        xdata = sorted( numsam2count.keys() )
        xmax = max([xmax, max(xdata)])
        ydata = [ numsam2count[x] for x in xdata ]
        totaly = sum(ydata)
        pcydata = [(100.0*y)/totaly for y in ydata]
        
        #line = axes.plot(xdata, ydata, color=colors[c], marker='o', markeredgecolor=colors[c], markersize = markersize, linestyle='-', linewidth=2)
        line = axes.plot(xdata, pcydata, color=colors[c], marker='o', markeredgecolor=colors[c], markersize = markersize, linestyle='-', linewidth=2)
        #axes.plot(xdata, ydata, color=lightColors[c], linestyle='-', linewidth=0.5)
        lines.append(line)
        labels.append(s)
        print s
        print xdata
        print ydata
        print pcydata
    
    axes.set_yscale('log')
    axes.set_xlim(0.8, xmax + 0.2)
    xticks = xrange(1, xmax + 1)
    xticklabels = [ str(x) for x in xticks ]
    axes.xaxis.set_ticks(xticks)
    axes.xaxis.set_ticklabels( xticklabels )

    axes.set_title('Shared sequences', size="xx-large")
    iseqlib.editSpine( axes )
    axes.set_xlabel("Number of samples", size='x-large')
    axes.set_ylabel("Number of clones", size='x-large')
    legend = axes.legend( lines, labels, numpoints=1, loc='best', ncol=1)
    legend.__drawFrame = False
    axes.yaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)
    axes.xaxis.grid(b=True, color="#BDBDBD", linestyle='-', linewidth=0.005)
Ejemplo n.º 5
0
def drawAll(options, outdir, rowname2cells, index2colname):
    if options.infile == '-':
        outname = 'all'
    else:
        outname = os.path.basename(options.infile).split('.')[0]
    options.out = os.path.join(outdir, outname)
    fig, pdf = iseqlib.initImage(10.0, 8.0, options)
    axes = fig.add_axes( [0.12, 0.15, 0.85, 0.75] )

    lines = []
    #rownames = sorted( rowname2cells.keys() )
    rownames = ['as11D', 'as16D', 'as1D', 'asBD', 'as20D', 'as15D', 'as8D']
    name2color = sample2color(rownames)
    
    #xdata = sorted( [ int(x) for x in colname2index.keys() ] )
    xmax = 0.0
    xmin = float('inf')
    #ymax = 0
    markersize = 12.0
    xindices = []
    #for rowname in rownames:
    r = 0
    while r < len(rownames):
        rowname = rownames[r]
        row = rowname2cells[rowname]
        means = []
        stds = []
        xdata = []
        for i, m in enumerate( row ):
            if i % 2 == 0 and m != 'NA' and m != '' and m != '-':
                colname = index2colname[i]
                try:
                    colname = int(colname)
                    xdata.append( colname )
                except:
                    xdata.append(i/2)
                    if i not in xindices:
                        xindices.append(i)
                means.append( float(m) )

            elif i%2 == 1 and m != 'NA' and m != '' and m != '-':
                stds.append( float(m) )
        xmax = max([xmax, max(xdata)])
        xmin = min([xmin, min(xdata)])
         
        #HACK
        #if rowname == 'uniqClones':
        #if rowname == 'mountford' or rowname == 'horn':
        #if rowname != 'horn':
        #if rowname == 'manhattan' or rowname == 'euclidean' or rowname == 'binomial':
        exceptions = ['manhattan', 'euclidean', 'binomial', 'kulczynski', 'canberra', 'jaccard']
        if rowname in exceptions:
            rownames.remove(rowname)
            continue
        #means = [m/min(means) for m in means]
        #stds = [0.0 for s in stds]
        #END HACK
        color = name2color[rowname]
        axes.errorbar(xdata, means, yerr=stds, color=color, markeredgecolor=color, markersize=markersize, fmt='.')
        line = axes.plot(xdata, means, color=color, linestyle='-', linewidth=4.0)
        lines.append(line)
        r += 1
    
    #axes.set_xscale('log')
    #axes.set_yscale('log')
    iseqlib.editSpine(axes)
    #axes.set_title("%s index across different sampling sizes" %outname, size='xx-large')
    #axes.set_xlabel("Sampling size (number of reads)", size='large' )
    #axes.set_ylabel("%s index" %outname, size='large')
    
    #HACK
    axes.set_title("Sequencing Saturation", size='xx-large', weight='bold')
    axes.set_xlabel("Sampling size (number of sequences, in millions) ", size='x-large', weight='bold' )
    #axes.set_xlabel("Sampling size (number of sequences, in thousands) ", size='x-large', weight='bold' )
    axes.set_ylabel("Number of clones (in thousands)", size='x-large', weight='bold')
    
    fontP = FontProperties()
    fontP.set_size('medium')
    #rownames = ["A", "B"]
    rownames = [iseqlib.properName(n) for n in rownames]
    legend = axes.legend( lines, rownames, numpoints = 1, loc='best', ncol = 1, prop=fontP)
    legend._drawFrame = False

    if len(xindices) > 0:
        xticklabels = []
        for i in sorted(xindices):
            xticklabels.append( index2colname[i] )
        axes.xaxis.set_ticks( xrange(len(xticklabels)) )
        axes.xaxis.set_ticklabels( xticklabels )

    #HACK:
    #xticks = [ 10000, 50000, 100000, 200000, 300000, 400000, 500000]
    #xticklabels = [ str(x/1000) for x in xticks]
    xticks = [0, 1000000,2000000,3000000,4000000,5000000,6000000,7000000,8000000,9000000]
    xticklabels = [ str(x) for x in xrange(0, 10) ]
    axes.xaxis.set_ticks(xticks)
    axes.xaxis.set_ticklabels( xticklabels )
    
    #yticks = xrange(0, 121000, 20000)
    #yticklabels = [ str(y) for y in xrange(0, 121, 20) ]
    yticks = xrange(0, 250000, 50000)
    yticklabels = [ str(y) for y in xrange(0, 250, 50) ]
    axes.yaxis.set_ticks(yticks)
    axes.yaxis.set_ticklabels(yticklabels)

    
    for label in axes.get_xticklabels():
        label.set_fontsize('large')
        label.set_fontweight ('bold')
        #label.set_rotation(45)
    for label in axes.get_yticklabels():
        label.set_fontsize('large')
        label.set_fontweight ('bold')
        
    axes.xaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05)
    axes.yaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05)
    #HACK
    #axes.set_ylim(0.996, 1)
    xspan = xmax - xmin
    #axes.set_xlim(xmin - xspan*0.01, xmax + xspan*0.01)
    #axes.set_ylim(10000, 121000)
    
    #axes.set_xlim(-10, 110000)
    #axes.set_ylim(-10, 60000)
    
    axes.set_xlim(-10, 5100000)
    axes.set_ylim(-10, 205000)
    
    iseqlib.writeImage(fig, pdf, options)
Ejemplo n.º 6
0
def drawCloneVsReadData( axesList, samples, samplesPerPlot, options, isAbs ):
    if len( samples ) <= 0:
        return
    colors = getColors6()
    s2c, s2m, s2cLight = sample2color( colors )
    #textsize = 'x-large'
    textsize = 'medium'
    fontP = FontProperties()
    fontP.set_size(textsize)
    linesDict = {}
    labelsDict = {}
    xticklabels = [] 
    
    #get x location
    #xdata = range( 0, len(xtickLabels), 1 )
    maxy = 0
    #markersize = 10.0
    markersize = 8.0
    name2line = {} 
    for i in range( len(axesList) ):
        lines = []
        sampleNames = []
        axes = axesList[i]
        #HACK
        #if isAbs:
        #    axes.set_xscale('log')
        startIndex = i*samplesPerPlot
        endIndex = min( [startIndex + samplesPerPlot, len(samples)] )
        for j in range( startIndex, endIndex ):
            sample = samples[j]
            if isAbs:
                xtickLabels, xdata, ydata = getClonesVsReads( sample )
            else:
                xtickLabels, xdata, ydata = getClonesVsReadsRel( sample )
           
            #HACK discrete:
            discreteydata = [ydata[0]]
            if not options.cumulative:
                for k in xrange(1, len(ydata)):
                    discreteydata.append( ydata[k] - ydata[k-1] )
                ydata = discreteydata


            #sampleNames.append( "%s-%d" % (sample.name, sample.totalCount))
            sampleNames.append( "%s" % (sample.name))
            maxy = max( [maxy, max(ydata)] )

            l = axes.plot( xdata, ydata, color=s2c[sample.name], marker=s2m[sample.name], markeredgecolor=s2c[sample.name], markersize=markersize, linestyle='none' )
            axes.plot( xdata, ydata, color=s2cLight[sample.name], linestyle='-', linewidth=0.2 )
            lines.append( l )
            name2line[sample.name] = l 
        
        #if yaxisPcReads or yaxisPcClones:
        #    axes.plot( [-0.5, len(xtickLabels)], [90, 90], linestyle='-', linewidth=0.3, color="#CCCCCC" )
        linesDict[i] = lines
        labelsDict[i] = sampleNames
        if options.cumulative:
            #axes.set_title( 'Cumulative Distribution of 50 Largest Clones', size='xx-large', weight='bold' )
            axes.set_title( 'C. Cumulative Distribution of 50 Largest Clones', size='large', weight='bold' )
        else:
            axes.set_title( 'Distribution of 50 Largest Clones', size='xx-large' )

    if not options.cumulative:
        axes.set_yscale('log')

    for i in range( len(axesList) ):
        axes = axesList[ i ]
        #libplot.editSpine( axes )
        iseqlib.editSpine( axes )
        axes.set_xlabel('Number of top clones', size = textsize, weight='bold')
        if not options.cumulative:
            axes.set_xlabel('Clone rank', size = textsize, weight='bold')

        if not isAbs:
            axes.set_xlabel('Percentage of total clones', size = textsize, weight='bold')
        axes.set_ylabel('Frequency (% of total sequences)', size = textsize, weight= 'bold')
        #legend = pyplot.legend( linesDict[i], labelsDict[i], numpoints=1, prop=fontP, loc="best" )
        
        ##HACK
        #labelorder = ['as15D', 'as20D', 'asBD', 'asBDdraw2', 'as8D', 'as16D', 'as1D', 'as11D']
        #currlabels = []
        #currlines = []
        #for label in labelorder:
        #    if label in labelsDict[i]:
        #        currlabels.append( iseqlib.properName(label) )
        #        currlines.append(name2line[label])
        ##legend = pyplot.legend( currlines, currlabels, numpoints=1, prop=fontP, loc="upper right" )
        #legend = pyplot.legend( currlines, currlabels, numpoints=1, prop=fontP, loc="best" )
        ##END HACK
        #legend._drawFrame = False

        axes.xaxis.set_ticklabels( xtickLabels )
        axes.xaxis.set_ticks( xdata )
        
        #axes.set_xlim(-0.5, len(xtickLabels))
        
        for label in axes.get_xticklabels():
            #label.set_fontsize( 'large' )
            label.set_fontsize( 'small' )
            #label.set_rotation( 90 )
        for label in axes.get_yticklabels():
            #label.set_fontsize( 'large' )
            label.set_fontsize( 'small' )
        
        #HACK: yaxis tick labels:
        #ytickdata = [0.1, 1, 2.5, 5, 7.5, 10, 15, 20]
        #for y in ytickdata:
        #    if y == 20:
        #        continue
        #    if y in [0.1, 1, 10, 100]:
        #        #axes.plot([0,50.5], [y, y], color='#838383', linestyle='-', linewidth=0.005)
        #        axes.plot([0,50.5], [y, y], color='#BCBCBC', linestyle='-', linewidth=0.005)
        #    else:
        #        axes.plot([0,50.5], [y, y], color='#BCBCBC', linestyle='-.', linewidth=0.005)
        #axes.yaxis.set_ticks(ytickdata)
        #axes.set_ylim(0, 21)
        #axes.yaxis.set_ticklabels([str(y) for y in ytickdata] , size='medium')
        
        #x ticks:
        xticks = [1]
        xticks.extend( xrange(5, 51, 5) )
        axes.xaxis.set_ticks( xticks )
        axes.xaxis.set_ticklabels( [str(x) for x in xticks] , size = 'large')
        #x tick lines:
        for x in xdata:
            if x in xrange(0, 51, 5):
                axes.plot([x,x], [0.02, maxy+5], color='#3F3F3F', linestyle='-', linewidth=0.05)
            #else:
            #    axes.plot([x,x], [0.02, maxy+5], color='#BCBCBC', linestyle='-.', linewidth=0.005)
        
        #END HACK
        if isAbs:
            #HACK
            axes.set_xlim(0, 50.5)
            #axes.set_xlim(0, 525)
            #axes.set_xlim(0, 10500) #before hack
        else:
            axes.set_xlim(0, 101)
            #axes.set_xlim(-1, 51)

        #axes.set_ylim(0.02, maxy + 5)
        axes.set_ylim(0, maxy + 2)
        #HACK
        #axes.set_ylim(-1, maxy + 2)

        axes.yaxis.grid(b=True, color="#3F3F3F", linestyle='-', linewidth=0.05)
        #axes.xaxis.grid(b=True, color="#3F3F3F", linestyle='-', linewidth=0.05)
    
    return 
Ejemplo n.º 7
0
def drawCloneSizeData( axesList, samples, samplesPerPlot, options, isAbs, yaxisPcReads, yaxisPcClones, cumulative ):
    if len( samples ) <= 0:
        return
    colors = getColors6()
    s2c, s2m, s2cLight = sample2color( colors )
    #markers = ['o', '^']
    #markers=['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', '^', '^']

    #c = -1
    #textsize = 'x-large'
    textsize = 'medium'
    fontP = FontProperties()
    fontP.set_size(textsize)
    linesDict = {}
    labelsDict = {}

    if isAbs:
        xtickLabels = samples[0].countArr
    else:
        xtickLabels = samples[0].percentArr
    
    #get x location
    xdata = range( 0, len(xtickLabels), 1 )
    #offset = 0.01
    offset = 0.0
    maxy = 0
    for i in range( len(axesList) ):
        lines = []
        sampleNames = []
        axes = axesList[i]
        #HACK
        #if not yaxisPcReads and not yaxisPcClones:
        if not yaxisPcReads:
            axes.set_yscale('log')
        #axes.set_yscale('log')
        startIndex = i*samplesPerPlot
        endIndex = min( [startIndex + samplesPerPlot, len(samples)] )
        for j in range( startIndex, endIndex ):
            sample = samples[j]
            #sampleNames.append( "%s" % (sample.name))
            sampleNames.append( "%s" % (iseqlib.properName(sample.name)))
            ydata = sample.clonesPerCount
            if not yaxisPcReads and not isAbs:
                ydata = sample.clonesPerPercent
            elif yaxisPcReads and isAbs:
                ydata = sample.readsPerCount
            elif yaxisPcReads and not isAbs:
                ydata = sample.readsPerPercent
            
            if yaxisPcClones:
                ydata = [ 100*y/sample.totalClones for y in ydata ]
            maxy = max( [maxy, max(ydata)] )

            #if (i*len(axesList) + j) %2 == 0:
            #    c += 1
            #l = axes.plot( xdata, ydata, color=colors[c], marker=markers[(i*len(axesList) + j)%2], markersize=6.0, linestyle='none' )
            #axes.plot( xdata, ydata, color=colors[c], linestyle='-', linewidth=0.01 )
            currxdata = [x + offset*(j - startIndex) for x in xdata]
            if isAbs:
                markersize = 8.0
                #markersize = 10.0
            else:
                markersize = 8.0
                #markersize = 10.0
            if s2m[sample.name] == '*':
                markersize += 2.0
            elif s2m[sample.name] == 's':
                markersize -= 2.0

            print sample.name
            print ydata

            #l = axes.plot( xdata, ydata, color=s2c[sample.name], marker=s2m[sample.name], markeredgecolor=s2c[sample.name], markersize=6.0, linestyle='none' )
            l = axes.plot( currxdata, ydata, color=s2c[sample.name], marker=s2m[sample.name], markeredgecolor=s2c[sample.name], markersize=markersize, linestyle='none' )
            #l = axes.plot( currxdata, ydata, color="#377EB8", marker='o', markeredgecolor=s2c[sample.name], markersize=markersize, linestyle='none' )
            #axes.plot( xdata, ydata, color=s2cLight[sample.name], linestyle='-', linewidth=0.3 )
            axes.plot( currxdata, ydata, color=s2cLight[sample.name], linestyle='-', linewidth=0.5 )
            #axes.plot( currxdata, ydata, color="#377EB8", linestyle='-', linewidth=0.7 )
            lines.append( l )
        
        #if yaxisPcReads or yaxisPcClones:
        #    axes.plot( [-0.5, len(xtickLabels)], [90, 90], linestyle='-', linewidth=0.2, color="#CCCCCC" )
        linesDict[i] = lines
        labelsDict[i] = sampleNames
    
        if cumulative:
            #axes.set_title( 'Cumulative Distribution of Clones', size="xx-large", weight='bold' )
            #axes.set_title( 'Cumulative Distribution of Clones', size="large", weight='bold' )
            axes.set_title( 'A. Cumulative Distribution of Clones', size="large", weight='bold' )
            if yaxisPcReads:
                #axes.set_title( 'Cumulative Distribution of Sequences', size="xx-large", weight='bold')
                axes.set_title( 'B. Cumulative Distribution of Sequences', size="large", weight='bold')
        else:
            axes.set_title( 'Clone Size Distribution', size="xx-large" )

    for i in range( len(axesList) ):
        axes = axesList[ i ]
        #libplot.editSpine( axes )
        iseqlib.editSpine( axes )
        axes.set_xlabel('Clone size (number of sequences)', size = textsize)
        if not isAbs:
            axes.set_xlabel('Clone size (% of total sequences)', size = textsize, weight='bold')
        axes.set_ylabel('Frequency (number of clones)', size=textsize)
        if yaxisPcReads:
            axes.set_ylabel('Frequency (% of total sequences)', size=textsize, weight='bold')
        if yaxisPcClones:
            axes.set_ylabel('Frequency (% of total clones)', size = textsize, weight='bold')
        #Legend
        #legend = pyplot.legend( linesDict[i], labelsDict[i], numpoints=1, prop=fontP, loc="best" )
        #legend = axes.legend( linesDict[i], labelsDict[i], numpoints = 1, "upper right", ncol=3)
        if yaxisPcClones and not yaxisPcReads:
            legend = axes.legend( linesDict[i], labelsDict[i], numpoints = 1, loc="best", ncol=1, prop=fontP)
            #for t in legend.get_texts():
            #    t.set_fontsize(textsize)
            legend._drawFrame = False

        #HACK
        #if isAbs:
        if False:
            xtickdata = xrange(10, 101, 10)
            #grid:
            for x in xtickdata:
                axes.plot([x,x], [0.0001, maxy], color='#3F3F3F', linestyle='-', linewidth=0.1)
            axes.xaxis.set_ticks(xtickdata)
            minx = 0
            maxx = 101
            axes.set_xlim(minx, maxx)
            axes.xaxis.set_ticklabels([str(x) for x in xtickdata] , size='medium')
        else:
            axes.xaxis.set_ticklabels( xtickLabels, size='medium' )
            axes.xaxis.set_ticks([x + 0.5*offset*(samplesPerPlot-1) - 0.5 for x in xdata] )
            minx = -0.5
            maxx = len(xtickLabels) - 0.5
            
            xticks = [x + 0.5*offset*(samplesPerPlot-1) - 0.5 for x in xdata]
            for xi, x in enumerate(xticks):
                if xi == 0:
                    continue
                axes.plot([x,x], [0.0001, maxy], color='#3F3F3F', linestyle='-', linewidth=0.1)
            axes.set_xlim(minx, maxx)

        #END HACK

        #axes.xaxis.set_ticks( xdata )
        #axes.xaxis.set_ticks([x + 0.5*offset*(samplesPerPlot-1) for x in xdata] )
        #axes.set_xlim(-0.5, len(xtickLabels))

        #numTicks = 20
        #yticks = [ float(t)/numTicks for t in range(numTicks +1) ]
        #ytickLabels = []
        #for y in yticks:
        #    ytickLabels.append( "%d" %(y*100) )
        
        if not yaxisPcReads and not yaxisPcClones:
            #yticks = [100, 1000, 5000, 10000, 15000, 20000]
            #ytickLabels = ["100", "1k", "5k", "10k", "15k", "20k"]
            yticks = [1, 10, 100, 1000, 10000, 50000, 100000, 150000]
            #ytickLabels = ["1", "10", "100", "1k", "10k", "50k", "100k", "150k"]
            ytickLabels = ["1", "10", "100", "1k", "10k", "50k", "100k", "150k"]
            axes.yaxis.set_ticklabels( ytickLabels )
            axes.yaxis.set_ticks( yticks )
            for y in yticks:
                if y in [1, 10, 100, 1000, 10000, 100000]:
                #if y in [100, 1000, 5000, 10000, 15000, 20000]:
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1)
                else:
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-.', linewidth=0.1)
            axes.set_ylim(0.8, maxy + 15000)
        elif yaxisPcClones:
        #elif yaxisPcClones or yaxisPcReads:
            #yticks = [0.001, 0.01, 0.1, 1, 10, 25, 50, 75, 100]
            yticks = [0.001, 0.01, 0.1, 1, 10, 100]
            #yticks = xrange(0, 101, 20)
            ytickLabels = [str(y) for y in yticks]
            axes.yaxis.set_ticks(yticks)
            axes.yaxis.set_ticklabels( ytickLabels )
            for y in yticks:
                if y in [0.001, 0.01, 0.1, 1, 10, 100]:
                #if y in xrange(0, 101, 20):
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1)
                else:
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-.', linewidth=0.1)
            axes.set_ylim(0.0005, maxy + 25)
            #axes.set_ylim(0.001, maxy + 25)
        else:
            yticks = xrange(0, 101, 20)
            ytickLabels = [str(y) for y in yticks]
            axes.yaxis.set_ticks(yticks)
            axes.yaxis.set_ticklabels( ytickLabels )
            for y in yticks:
                axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1)
            axes.set_ylim(-1, maxy + 1)
        #axes.set_ylim(-1, maxy + 1)
        #if isAbs: #HACK
        #    axes.set_ylim(-0.1, 15)

        #axes.set_ylim(-0.1, 50) #HACK
        for label in axes.get_xticklabels():
            #label.set_fontsize( 'large' )
            label.set_fontsize( 'small' )
            #label.set_rotation( 45 )

        for label in axes.get_yticklabels():
            #label.set_fontsize( 'large' )
            label.set_fontsize( 'small' )
        
        #axes.yaxis.grid(b=True, color="#BCBCBC", linestyle='-', linewidth=0.005)
        #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    return 
Ejemplo n.º 8
0
def drawUsageData( axesList, samples, stds, options, genetype, intersectGenes ):
    if len( samples ) <= 0:
        return
    samplesPerPlot = options.samplesPerPlot 
    
    #HACK COLORS
    #colors = []
    #if options.sampleColor:
    #    colors = options.sampleColor
    #if options.sampleMarker:
    #    markers = options.sampleMarker

    #if options.sampleOrder and not options.sampleColor:
    #    colors = [ "#E31A1C", "#FE8E8F"]*len(options.sampleOrder) + ["#377EB8", "#A6D7FE"]*(len(samples) - len(options.sampleOrder))
    #    markers = ["o"]*len(options.sampleOrder) + ["^"]*(len(samples) - len(options.sampleOrder))
    #if not options.groupDrawInfo and len(colors) < len(samples):
    #    raise ValueError("Too many samples (%d), not enough colors (%d). Please specify the options sampleColor and sampleMarker to fix the problem." %(len(samples), len(colors)))
    #END HACK COLORS

    c = -2
    textsize = 'large'
    fontP = FontProperties()
    fontP.set_size(textsize)
    linesDict = {}
    labelsDict = {}

    xtickLabels = intersectGenes
    
    #get x location
    xdata = range( 0, len(xtickLabels), 1 )
    maxy = 0

    #HACK COLORS:
    s2group = {'as1D': 'B27+,AS', 'as8D': 'B27+,AS', 'as11D': 'B27+,AS', 'as16D': 'B27+,AS',
               'as15D': 'B27-,AS', 
               'asBD': 'B27+,Healthy', 'as20D': 'B27+,Healthy',
               'adaptMA': 'B27-,Healthy', 'adaptM35': 'B27-,Healthy', 'adaptF57': 'B27-,Healthy', 'adaptAS': 'B27-,Healthy', 'adaptCD': 'B27-,Healthy', 'adaptF28':'B27-,Healthy',
               'irep1D': 'B27+,AS', 'as10R': 'B27+,AS', 'as11R': 'B27+,AS', 'as12R': 'B27+,AS', 'as13R': 'B27+,AS', 'as1R': 'B27+,AS', 'irepBD': 'B27+,Healthy', 'asBR': 'B27+,Healthy',
               'sameGroupShared': 'AS', 'diffGroupShared': 'Healthy', 'uniq': 'Unique',
               'b27pos': 'B27+', 'b27neg':'B27-'}

    #B27 +-
    #g2c = {'B27+,AS': '#E31A1C', 'B27+,Healthy': '#E31A1C', 'B27-,AS':'#377EB8', 'B27-,Healthy':'#377EB8', 'B27+':'#E31A1C', 'B27-': '#377EB8'}
    #g2lc = {'B27+,AS': '#FE8E8F', 'B27+,Healthy': '#FE8E8F', 'B27-,AS':'#A6D7FE', 'B27-,Healthy':'#A6D7FE', 'B27+': '#FE8E8F', 'B27-': '#A6D7FE'}
    #g2m = {'B27+,AS': 'o', 'B27+,Healthy': '^', 'B27-,AS':'o', 'B27-,Healthy':'^', 'B27+':'o', 'B27-':'^'}
    
    #AS vs Healthy
    #g2c = {'B27+,AS': '#E31A1C', 'B27+,Healthy': '#9E1114', 'AS':'#377EB8', 'Healthy':'#275880', 'Unique': '#4DAF4A'}
    #g2lc = {'B27+,AS': '#FE8E8F', 'B27+,Healthy': '#FE8E8F', 'AS':'#A6D7FE', 'Healthy':'#A6D7FE', 'Unique': '#B8FEB5'}
    #g2m = {'B27+,AS': 'o', 'B27+,Healthy': '^', 'AS':'o', 'Healthy':'^', 'Unique': 's'}
    
    #AS vs Healthy, no color (gray scale)
    g2c = {'B27+,AS': '#353535', 'B27+,Healthy': '#848484', 'AS':'#353535', 'Healthy':'#848484', 'Unique': '#4DAF4A'}
    g2lc = {'B27+,AS': '#BDBDBD', 'B27+,Healthy': '#BDBDBD', 'AS':'#BDBDBD', 'Healthy':'#BDBDBD', 'Unique': '#B8FEB5'}
    g2m = {'B27+,AS': 'o', 'B27+,Healthy': '^', 'AS':'o', 'Healthy':'^', 'Unique': 's'}
    #END HACK COLORS

    if options.groupDrawInfo:
        s2group = options.s2g
        g2c = options.g2c
        g2lc = options.g2lc
        g2m = options.g2m
    
    for i in range( len(axesList) ):
        lines = []
        sampleNames = []
        axes = axesList[i]
        startIndex = i*samplesPerPlot
        endIndex = min( [startIndex + samplesPerPlot, len(samples)] )
        groups = []
        for j in range( startIndex, endIndex ):
            sample = samples[j]
            sampleNames.append( "%s" % (sample.name))
            ydata = getGeneUsage( sample, genetype, xtickLabels, options.abs )
            maxy = max( [maxy, max(ydata)] )

            #c += 2
            #l = axes.plot( xdata, ydata, color=colors[c], marker=markers[c/2], markersize=8.0, markeredgecolor=colors[c], linestyle='none' )
            #axes.plot( xdata, ydata, color=colors[c +1], linestyle='-', linewidth=0.01 )
            #lines.append( l )
            
            group = s2group[sample.name]
            l = axes.plot( xdata, ydata, color=g2c[group], marker=g2m[group], markersize=8.0, markeredgecolor=g2c[group], linestyle='none' )
            axes.plot( xdata, ydata, color=g2lc[group], linestyle='-', linewidth=0.01 )
            if group not in groups:
                lines.append( l )
                groups.append(group)
            
            #Draw standard deviation if the data is available:
            if sample.name in options.avr2std and options.avr2std[sample.name] in stds:
                std = stds[ options.avr2std[sample.name] ]
                higherYdata, lowerYdata = getStdData( std, sample, genetype, xtickLabels, options.abs )
                if len(higherYdata) == 0 or len(lowerYdata) == 0:
                    continue
                for i in xrange( len(ydata) ):
                    #axes.plot( [xdata[i], xdata[i]], [higherYdata[i], lowerYdata[i]], linestyle='-', marker='_', markersize=8.0, linewidth=0.02, color=colors[c+1] )
                    axes.plot( [xdata[i], xdata[i]], [higherYdata[i], lowerYdata[i]], linestyle='-', marker='_', markersize=8.0, linewidth=0.02, color=g2lc[group] )
                maxy = max([maxy, max(higherYdata)])

        linesDict[i] = lines
        #labelsDict[i] = sampleNames
        labelsDict[i] = groups
        #axes.set_title( 'TRB%s Usage Distribution' % genetype.upper(), size='xx-large', weight='bold' )

    #HACK
    for i, xlabel in enumerate(xtickLabels):
        if xlabel == 'TRBV6-5/TRBV6-6':
            xtickLabels[i] = 'TRBV6-5/6-6'
    xtickLabels = [xlabel.lstrip('TRB') for xlabel in xtickLabels]
    for i, xlabel in enumerate(xtickLabels):
        items = xlabel.split('|')
        items = [item.lstrip('TRB') for item in items]
        xtickLabels[i] = '|'.join(items)
    #END HACK

    for i in range( len(axesList) ):
        axes = axesList[ i ]
        immunoseqLib.editSpine( axes )
        axes.set_xlabel('Gene', size = 'xx-large', weight='bold')
        axes.set_ylabel('Frequency (% of total sequences)', size='xx-large', weight='bold')

        #Legend
        if not options.std:
            #HACK
            cat2index = {}
            for j, label in enumerate(labelsDict[i]):
                cat2index[label] = j
            labels = cat2index.keys()
            lines = [linesDict[i][cat2index[label]] for label in labels]
            legend = pyplot.legend( lines, labels, numpoints=1, prop=fontP, loc="best" )
            #END HACK

            #legend = pyplot.legend( linesDict[i], labelsDict[i], numpoints=1, prop=fontP, loc="best" )
            #legend = axes.legend( linesDict[i], labelsDict[i], numpoints = 1, "upper right", ncol=3)
            #for t in legend.get_texts():
            #    t.set_fontsize('xx-small')
            legend._drawFrame = False
         
        axes.xaxis.set_ticklabels( xtickLabels )
        axes.xaxis.set_ticks( xdata )
        axes.set_xlim(-0.5, len(xtickLabels) + 0.5)

        numTicks = 20
        yticks = [ float(t)/numTicks for t in range(numTicks +1) ]
        ytickLabels = []
        for y in yticks:
            ytickLabels.append( "%d" %(y*100) )
        axes.yaxis.set_ticklabels( ytickLabels )
        axes.yaxis.set_ticks( yticks )
        axes.set_ylim(-0.01, maxy + 0.01)
        #axes.set_ylim(-0.01, 0.22)
        for label in axes.get_xticklabels():
            label.set_fontsize( 'small' )
            label.set_fontweight( 'bold' )
            label.set_rotation( 75 )
        for label in axes.get_yticklabels():
            label.set_fontsize( textsize )
            label.set_fontweight( 'bold' )
        axes.yaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
        axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    return