Ejemplo n.º 1
0
def tab(f, sample2row):
    #Print total:
    for sample in sorted(sample2row.keys()):
        f.write("%s" % iseqlib.properName(sample))
        row = sample2row[sample]
        for i, cell in enumerate(row):
            if cell in ['+', '-']:
                cell = "$%s$" %cell
            f.write( " & %s " %(cell) )
        f.write("\\\\\n")
        f.write("\\hline\n")
def tab0(f, clones, clone2hits, group2keywords, options):
    for clone in clones:
        vs, js, ds, sample2size = getCloneInfo(clone)
        if clone in clone2hits:
            hits = clone2hits[clone]
            hitsWithKeyword = []  # list of hits that have at least 1 keyword
            for hit in hits:
                for g, kw in group2keywords.iteritems():
                    if g == "b27" or g == "pathogen":
                        continue
                    if checkKeywords(hit[0], kw):
                        hitsWithKeyword.append(hit)
                        break
            if len(hitsWithKeyword) == 0:  # no hit with keyword
                continue

            seq = hits[0][2]
            samples = sorted([iseqlib.properName(s) for s in sample2size.keys()])
            numrow = max([len(samples), len(hitsWithKeyword)])

            f.write(
                "\\multirow{%d}{*}{%s} & \\multirow{%d}{*}{%s} & \\multirow{%d}{*}{%s} & "
                % (numrow, vs, numrow, seq, numrow, js)
            )  # Write V, CDR3, J
            # f.write("%s & %d & %s & %s & %s \\\\\n " %( samples[0], sample2size[iseqlib.properName2name(samples[0])], hitsWithKeyword[0][4], hitsWithKeyword[0][3], hitsWithKeyword[0][0] )) #first row
            f.write(
                "%s & %d & %s & %s & %s \\\\\n "
                % (
                    samples[0],
                    sample2size[iseqlib.properName2name(samples[0])],
                    hitsWithKeyword[0][4],
                    hitsWithKeyword[0][3],
                    parsePaperInfo(hitsWithKeyword[0][0]),
                )
            )
            for i in xrange(1, numrow):
                f.write("\\cline{4-8}\n")
                f.write(" & & & ")
                if i < len(samples):
                    s = samples[i]
                    f.write(" %s & %d &" % (s, sample2size[iseqlib.properName2name(s)]))
                else:
                    f.write(" & & ")
                if i < len(hitsWithKeyword):
                    h = hitsWithKeyword[i]
                    # f.write( "%s & %s & %s \\\\\n" %(h[4], h[3], h[0]) )
                    f.write("%s & %s & %s \\\\\n" % (h[4], h[3], parsePaperInfo(h[0])))
                else:
                    f.write(" & & \\\\\n")
            f.write("\\hline\n")
Ejemplo n.º 3
0
def tab(f, colnames, sample2row):
    #Print total:
    totalRow = sample2row['Total']
    f.write("%s & %s \\\\\n" % ("Total", " & ".join( ["%s & 100.00" %cell for cell in totalRow] )) )
    f.write("\\hline\n")
    for sample in sorted(sample2row.keys()):
        if sample == 'Total' or sample == 'controls' or sample == 'patients' :
            continue
        f.write("%s" % iseqlib.properName(sample))
        row = sample2row[sample]
        for i, cell in enumerate(row):
            total = totalRow[i]
            pc = iseqlib.getPc( int(cell), int(total) )
            f.write( " & %s & %.2f " %(cell, pc) )
        f.write("\\\\\n")
        f.write("\\hline\n")
Ejemplo n.º 4
0
def drawVJdata( fig, axes, sample, vgenes, jgenes, options, minvj, maxvj ):
    #Draw heatmap:
    #data = sample.normalizeVJusage()
    if options.abs:
        data = sample.intersectVJusage
    else:
        data = sample.normIntersectVJusage

    #Normalize data to the range minvj-maxvj:
    if options.heatmapNoScale:
        hmaxes = axes.imshow( data, interpolation='nearest' )
    else:
        norm = mpl.colors.Normalize(vmin=minvj, vmax=maxvj)
        cmap = mpl.cm.get_cmap('rainbow', 20)
        hmaxes = axes.imshow( data, interpolation='nearest', cmap=cmap, norm=norm )
        
    #Colorbar:
    #from mpl_toolkits.axes_grid1.inset_locator import inset_axes
    #axins = inset_axes(axes, width="5%", height="10%", loc=3, bbox_to_anchor=(1.05, 0, 1, 1), bbox_transform=axes.transAxes, borderpad=0)
    #matplotlib.pyplot.colorbar(hmaxes, cax=axins, ticks=[0, 0.5, 1])
    cbar = fig.colorbar(hmaxes, shrink=0.3)
    #cbar.ax.set_yticklabels(['0', '0.5', '1'])

    immunoseqLib.editSpine( axes )
    axes.set_title( "VJ usage of sample %s" % immunoseqLib.properName(sample.name) )
    xticks = [ x for x in range( len(jgenes) ) ]
    axes.xaxis.set_ticks( xticks )
    for i, xlabel in enumerate(jgenes):
        items = xlabel.split('|')
        items = [item.lstrip('TRB') for item in items]
        jgenes[i] = '|'.join(items)
    axes.xaxis.set_ticklabels( jgenes )
    
    yticks = [ y for y in range( len(vgenes) ) ]
    axes.yaxis.set_ticks( yticks )
    for i, ylabel in enumerate(vgenes):
        items = ylabel.split('|')
        items = [item.lstrip('TRB') for item in items]
        vgenes[i] = '|'.join(items)
    axes.yaxis.set_ticklabels( vgenes )
    
    textsize = 'x-small'
    for label in axes.get_xticklabels():
        label.set_fontsize( textsize )
        label.set_rotation( 80 )
    for label in axes.get_yticklabels():
        label.set_fontsize( textsize )
Ejemplo n.º 5
0
def drawAll(options, outdir, rowname2cells, index2colname):
    if options.infile == '-':
        outname = 'all'
    else:
        outname = os.path.basename(options.infile).split('.')[0]
    options.out = os.path.join(outdir, outname)
    fig, pdf = iseqlib.initImage(10.0, 8.0, options)
    axes = fig.add_axes( [0.12, 0.15, 0.85, 0.75] )

    lines = []
    #rownames = sorted( rowname2cells.keys() )
    rownames = ['as11D', 'as16D', 'as1D', 'asBD', 'as20D', 'as15D', 'as8D']
    name2color = sample2color(rownames)
    
    #xdata = sorted( [ int(x) for x in colname2index.keys() ] )
    xmax = 0.0
    xmin = float('inf')
    #ymax = 0
    markersize = 12.0
    xindices = []
    #for rowname in rownames:
    r = 0
    while r < len(rownames):
        rowname = rownames[r]
        row = rowname2cells[rowname]
        means = []
        stds = []
        xdata = []
        for i, m in enumerate( row ):
            if i % 2 == 0 and m != 'NA' and m != '' and m != '-':
                colname = index2colname[i]
                try:
                    colname = int(colname)
                    xdata.append( colname )
                except:
                    xdata.append(i/2)
                    if i not in xindices:
                        xindices.append(i)
                means.append( float(m) )

            elif i%2 == 1 and m != 'NA' and m != '' and m != '-':
                stds.append( float(m) )
        xmax = max([xmax, max(xdata)])
        xmin = min([xmin, min(xdata)])
         
        #HACK
        #if rowname == 'uniqClones':
        #if rowname == 'mountford' or rowname == 'horn':
        #if rowname != 'horn':
        #if rowname == 'manhattan' or rowname == 'euclidean' or rowname == 'binomial':
        exceptions = ['manhattan', 'euclidean', 'binomial', 'kulczynski', 'canberra', 'jaccard']
        if rowname in exceptions:
            rownames.remove(rowname)
            continue
        #means = [m/min(means) for m in means]
        #stds = [0.0 for s in stds]
        #END HACK
        color = name2color[rowname]
        axes.errorbar(xdata, means, yerr=stds, color=color, markeredgecolor=color, markersize=markersize, fmt='.')
        line = axes.plot(xdata, means, color=color, linestyle='-', linewidth=4.0)
        lines.append(line)
        r += 1
    
    #axes.set_xscale('log')
    #axes.set_yscale('log')
    iseqlib.editSpine(axes)
    #axes.set_title("%s index across different sampling sizes" %outname, size='xx-large')
    #axes.set_xlabel("Sampling size (number of reads)", size='large' )
    #axes.set_ylabel("%s index" %outname, size='large')
    
    #HACK
    axes.set_title("Sequencing Saturation", size='xx-large', weight='bold')
    axes.set_xlabel("Sampling size (number of sequences, in millions) ", size='x-large', weight='bold' )
    #axes.set_xlabel("Sampling size (number of sequences, in thousands) ", size='x-large', weight='bold' )
    axes.set_ylabel("Number of clones (in thousands)", size='x-large', weight='bold')
    
    fontP = FontProperties()
    fontP.set_size('medium')
    #rownames = ["A", "B"]
    rownames = [iseqlib.properName(n) for n in rownames]
    legend = axes.legend( lines, rownames, numpoints = 1, loc='best', ncol = 1, prop=fontP)
    legend._drawFrame = False

    if len(xindices) > 0:
        xticklabels = []
        for i in sorted(xindices):
            xticklabels.append( index2colname[i] )
        axes.xaxis.set_ticks( xrange(len(xticklabels)) )
        axes.xaxis.set_ticklabels( xticklabels )

    #HACK:
    #xticks = [ 10000, 50000, 100000, 200000, 300000, 400000, 500000]
    #xticklabels = [ str(x/1000) for x in xticks]
    xticks = [0, 1000000,2000000,3000000,4000000,5000000,6000000,7000000,8000000,9000000]
    xticklabels = [ str(x) for x in xrange(0, 10) ]
    axes.xaxis.set_ticks(xticks)
    axes.xaxis.set_ticklabels( xticklabels )
    
    #yticks = xrange(0, 121000, 20000)
    #yticklabels = [ str(y) for y in xrange(0, 121, 20) ]
    yticks = xrange(0, 250000, 50000)
    yticklabels = [ str(y) for y in xrange(0, 250, 50) ]
    axes.yaxis.set_ticks(yticks)
    axes.yaxis.set_ticklabels(yticklabels)

    
    for label in axes.get_xticklabels():
        label.set_fontsize('large')
        label.set_fontweight ('bold')
        #label.set_rotation(45)
    for label in axes.get_yticklabels():
        label.set_fontsize('large')
        label.set_fontweight ('bold')
        
    axes.xaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05)
    axes.yaxis.grid(b=True, color='#3F3F3F', linestyle='-', linewidth=0.05)
    #HACK
    #axes.set_ylim(0.996, 1)
    xspan = xmax - xmin
    #axes.set_xlim(xmin - xspan*0.01, xmax + xspan*0.01)
    #axes.set_ylim(10000, 121000)
    
    #axes.set_xlim(-10, 110000)
    #axes.set_ylim(-10, 60000)
    
    axes.set_xlim(-10, 5100000)
    axes.set_ylim(-10, 205000)
    
    iseqlib.writeImage(fig, pdf, options)
Ejemplo n.º 6
0
def drawCloneSizeData( axesList, samples, samplesPerPlot, options, isAbs, yaxisPcReads, yaxisPcClones, cumulative ):
    if len( samples ) <= 0:
        return
    colors = getColors6()
    s2c, s2m, s2cLight = sample2color( colors )
    #markers = ['o', '^']
    #markers=['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', '^', '^']

    #c = -1
    #textsize = 'x-large'
    textsize = 'medium'
    fontP = FontProperties()
    fontP.set_size(textsize)
    linesDict = {}
    labelsDict = {}

    if isAbs:
        xtickLabels = samples[0].countArr
    else:
        xtickLabels = samples[0].percentArr
    
    #get x location
    xdata = range( 0, len(xtickLabels), 1 )
    #offset = 0.01
    offset = 0.0
    maxy = 0
    for i in range( len(axesList) ):
        lines = []
        sampleNames = []
        axes = axesList[i]
        #HACK
        #if not yaxisPcReads and not yaxisPcClones:
        if not yaxisPcReads:
            axes.set_yscale('log')
        #axes.set_yscale('log')
        startIndex = i*samplesPerPlot
        endIndex = min( [startIndex + samplesPerPlot, len(samples)] )
        for j in range( startIndex, endIndex ):
            sample = samples[j]
            #sampleNames.append( "%s" % (sample.name))
            sampleNames.append( "%s" % (iseqlib.properName(sample.name)))
            ydata = sample.clonesPerCount
            if not yaxisPcReads and not isAbs:
                ydata = sample.clonesPerPercent
            elif yaxisPcReads and isAbs:
                ydata = sample.readsPerCount
            elif yaxisPcReads and not isAbs:
                ydata = sample.readsPerPercent
            
            if yaxisPcClones:
                ydata = [ 100*y/sample.totalClones for y in ydata ]
            maxy = max( [maxy, max(ydata)] )

            #if (i*len(axesList) + j) %2 == 0:
            #    c += 1
            #l = axes.plot( xdata, ydata, color=colors[c], marker=markers[(i*len(axesList) + j)%2], markersize=6.0, linestyle='none' )
            #axes.plot( xdata, ydata, color=colors[c], linestyle='-', linewidth=0.01 )
            currxdata = [x + offset*(j - startIndex) for x in xdata]
            if isAbs:
                markersize = 8.0
                #markersize = 10.0
            else:
                markersize = 8.0
                #markersize = 10.0
            if s2m[sample.name] == '*':
                markersize += 2.0
            elif s2m[sample.name] == 's':
                markersize -= 2.0

            print sample.name
            print ydata

            #l = axes.plot( xdata, ydata, color=s2c[sample.name], marker=s2m[sample.name], markeredgecolor=s2c[sample.name], markersize=6.0, linestyle='none' )
            l = axes.plot( currxdata, ydata, color=s2c[sample.name], marker=s2m[sample.name], markeredgecolor=s2c[sample.name], markersize=markersize, linestyle='none' )
            #l = axes.plot( currxdata, ydata, color="#377EB8", marker='o', markeredgecolor=s2c[sample.name], markersize=markersize, linestyle='none' )
            #axes.plot( xdata, ydata, color=s2cLight[sample.name], linestyle='-', linewidth=0.3 )
            axes.plot( currxdata, ydata, color=s2cLight[sample.name], linestyle='-', linewidth=0.5 )
            #axes.plot( currxdata, ydata, color="#377EB8", linestyle='-', linewidth=0.7 )
            lines.append( l )
        
        #if yaxisPcReads or yaxisPcClones:
        #    axes.plot( [-0.5, len(xtickLabels)], [90, 90], linestyle='-', linewidth=0.2, color="#CCCCCC" )
        linesDict[i] = lines
        labelsDict[i] = sampleNames
    
        if cumulative:
            #axes.set_title( 'Cumulative Distribution of Clones', size="xx-large", weight='bold' )
            #axes.set_title( 'Cumulative Distribution of Clones', size="large", weight='bold' )
            axes.set_title( 'A. Cumulative Distribution of Clones', size="large", weight='bold' )
            if yaxisPcReads:
                #axes.set_title( 'Cumulative Distribution of Sequences', size="xx-large", weight='bold')
                axes.set_title( 'B. Cumulative Distribution of Sequences', size="large", weight='bold')
        else:
            axes.set_title( 'Clone Size Distribution', size="xx-large" )

    for i in range( len(axesList) ):
        axes = axesList[ i ]
        #libplot.editSpine( axes )
        iseqlib.editSpine( axes )
        axes.set_xlabel('Clone size (number of sequences)', size = textsize)
        if not isAbs:
            axes.set_xlabel('Clone size (% of total sequences)', size = textsize, weight='bold')
        axes.set_ylabel('Frequency (number of clones)', size=textsize)
        if yaxisPcReads:
            axes.set_ylabel('Frequency (% of total sequences)', size=textsize, weight='bold')
        if yaxisPcClones:
            axes.set_ylabel('Frequency (% of total clones)', size = textsize, weight='bold')
        #Legend
        #legend = pyplot.legend( linesDict[i], labelsDict[i], numpoints=1, prop=fontP, loc="best" )
        #legend = axes.legend( linesDict[i], labelsDict[i], numpoints = 1, "upper right", ncol=3)
        if yaxisPcClones and not yaxisPcReads:
            legend = axes.legend( linesDict[i], labelsDict[i], numpoints = 1, loc="best", ncol=1, prop=fontP)
            #for t in legend.get_texts():
            #    t.set_fontsize(textsize)
            legend._drawFrame = False

        #HACK
        #if isAbs:
        if False:
            xtickdata = xrange(10, 101, 10)
            #grid:
            for x in xtickdata:
                axes.plot([x,x], [0.0001, maxy], color='#3F3F3F', linestyle='-', linewidth=0.1)
            axes.xaxis.set_ticks(xtickdata)
            minx = 0
            maxx = 101
            axes.set_xlim(minx, maxx)
            axes.xaxis.set_ticklabels([str(x) for x in xtickdata] , size='medium')
        else:
            axes.xaxis.set_ticklabels( xtickLabels, size='medium' )
            axes.xaxis.set_ticks([x + 0.5*offset*(samplesPerPlot-1) - 0.5 for x in xdata] )
            minx = -0.5
            maxx = len(xtickLabels) - 0.5
            
            xticks = [x + 0.5*offset*(samplesPerPlot-1) - 0.5 for x in xdata]
            for xi, x in enumerate(xticks):
                if xi == 0:
                    continue
                axes.plot([x,x], [0.0001, maxy], color='#3F3F3F', linestyle='-', linewidth=0.1)
            axes.set_xlim(minx, maxx)

        #END HACK

        #axes.xaxis.set_ticks( xdata )
        #axes.xaxis.set_ticks([x + 0.5*offset*(samplesPerPlot-1) for x in xdata] )
        #axes.set_xlim(-0.5, len(xtickLabels))

        #numTicks = 20
        #yticks = [ float(t)/numTicks for t in range(numTicks +1) ]
        #ytickLabels = []
        #for y in yticks:
        #    ytickLabels.append( "%d" %(y*100) )
        
        if not yaxisPcReads and not yaxisPcClones:
            #yticks = [100, 1000, 5000, 10000, 15000, 20000]
            #ytickLabels = ["100", "1k", "5k", "10k", "15k", "20k"]
            yticks = [1, 10, 100, 1000, 10000, 50000, 100000, 150000]
            #ytickLabels = ["1", "10", "100", "1k", "10k", "50k", "100k", "150k"]
            ytickLabels = ["1", "10", "100", "1k", "10k", "50k", "100k", "150k"]
            axes.yaxis.set_ticklabels( ytickLabels )
            axes.yaxis.set_ticks( yticks )
            for y in yticks:
                if y in [1, 10, 100, 1000, 10000, 100000]:
                #if y in [100, 1000, 5000, 10000, 15000, 20000]:
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1)
                else:
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-.', linewidth=0.1)
            axes.set_ylim(0.8, maxy + 15000)
        elif yaxisPcClones:
        #elif yaxisPcClones or yaxisPcReads:
            #yticks = [0.001, 0.01, 0.1, 1, 10, 25, 50, 75, 100]
            yticks = [0.001, 0.01, 0.1, 1, 10, 100]
            #yticks = xrange(0, 101, 20)
            ytickLabels = [str(y) for y in yticks]
            axes.yaxis.set_ticks(yticks)
            axes.yaxis.set_ticklabels( ytickLabels )
            for y in yticks:
                if y in [0.001, 0.01, 0.1, 1, 10, 100]:
                #if y in xrange(0, 101, 20):
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1)
                else:
                    axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-.', linewidth=0.1)
            axes.set_ylim(0.0005, maxy + 25)
            #axes.set_ylim(0.001, maxy + 25)
        else:
            yticks = xrange(0, 101, 20)
            ytickLabels = [str(y) for y in yticks]
            axes.yaxis.set_ticks(yticks)
            axes.yaxis.set_ticklabels( ytickLabels )
            for y in yticks:
                axes.plot([minx, maxx], [y, y], color='#3F3F3F', linestyle='-', linewidth=0.1)
            axes.set_ylim(-1, maxy + 1)
        #axes.set_ylim(-1, maxy + 1)
        #if isAbs: #HACK
        #    axes.set_ylim(-0.1, 15)

        #axes.set_ylim(-0.1, 50) #HACK
        for label in axes.get_xticklabels():
            #label.set_fontsize( 'large' )
            label.set_fontsize( 'small' )
            #label.set_rotation( 45 )

        for label in axes.get_yticklabels():
            #label.set_fontsize( 'large' )
            label.set_fontsize( 'small' )
        
        #axes.yaxis.grid(b=True, color="#BCBCBC", linestyle='-', linewidth=0.005)
        #axes.xaxis.grid(b=True, color="#CCCCCC", linestyle='-', linewidth=0.005)
    return 
Ejemplo n.º 7
0
def drawPcaData(axes, rownames, transformedM, options):
    group2xdata = {}
    group2ydata = {}

    for i,r in enumerate(transformedM):
        sample = rownames[i][0]
        group = rownames[i][1]
        x = r[0]
        y = r[1]
        if group not in group2xdata:
            group2xdata[group] = [x]
            group2ydata[group] = [y]
        else:
            group2xdata[group].append(x)
            group2ydata[group].append(y)

    groups = sorted(group2xdata.keys())
    if options.groupDrawInfo:
        g2c = options.g2c
        g2m = options.g2m
        colors = [g2c[g] for g in groups]
        markers = [g2m[g] for g in groups]
    else:
        colors = immunoseqLib.getColors6()
        markers = ['^', 'o', 'd', 'p', 'v', '*', 's']
    
    lines = []
    minx = float('inf')
    maxx = float('-inf')
    miny = float('inf')
    maxy = float('-inf')
    for i, group in enumerate(groups):
        if i >= len(colors):
            raise ValueError("drawPcaData: Need more color!")
        color = colors[i]
        marker = markers[i]
        xdata = group2xdata[group]
        ydata = group2ydata[group]
        
        minx = min(minx, min(xdata))
        miny = min(miny, min(ydata))
        maxx = max(maxx, max(xdata))
        maxy = max(maxy, max(ydata))

        l = axes.plot(xdata, ydata, color=color, marker=marker, markersize=15.0, markeredgecolor=color, linestyle='none' )
        lines.append(l)
    
    for label in axes.get_xticklabels():
        label.set_fontsize( 'large' )
        label.set_fontweight( 'bold' )
    for label in axes.get_yticklabels():
        label.set_fontsize( 'large' )
        label.set_fontweight( 'bold' )

    rangex = maxx - minx
    axes.set_xlim(minx - rangex*0.1, maxx + rangex*0.1)
    rangey = maxy - miny
    axes.set_ylim(miny - rangey*0.1, maxy + rangey*0.1 )
    
     
    linenames = []
    for g in groups:
        linenames.append( immunoseqLib.properName(g) )
    #legend = pyplot.legend( lines, sorted(group2xdata.keys()), numpoints=1, loc="lower left" )
    legend = pyplot.legend( lines, linenames, numpoints=1, loc="best" )
    #legend._drawFrame = False
    #axes.set_title( 'PCA', size='xx-large', weight='bold' )
    axes.set_xlabel('PC1', size='xx-large', weight='bold')
    axes.set_ylabel('PC2', size='xx-large', weight='bold')
    axes.yaxis.grid(b=True, color="#3F3F3F", linestyle='-', linewidth=0.5)
    axes.xaxis.grid(b=True, color="#3F3F3F", linestyle='-', linewidth=0.5)