Exemple #1
0
def plotCDFs(siteScore, boundMatrix):
    """
    say you have a matrix of points around a motif for two groups.
    Plot the CDF of site scores that are bound in each group and in both groups at a height proportional to the
    number of sites bound.
    """
    boundByBoth = np.all(boundMatrix,0)
    boundByOne = np.all((np.logical_not(boundByBoth), boundMatrix[0]), 0)
    boundByTwo = np.all((np.logical_not(boundByBoth), boundMatrix[1]), 0)
    boundByNeither = np.logical_not(np.any((boundByBoth, boundByOne, boundByTwo), 0))

    xvalues = ['']*4
    yvalues = ['']*4
    
    xvalues[0], yvalues[0] = seqfun.getCDF(siteScore[boundByBoth])
    xvalues[1], yvalues[1] = seqfun.getCDF(siteScore[boundByOne])
    xvalues[2], yvalues[2] = seqfun.getCDF(siteScore[boundByTwo])
    xvalues[3], yvalues[3] = seqfun.getCDF(siteScore[boundByNeither])
    
    ax = plt.gca()
    ax.plot(xvalues[0], yvalues[0], 'k', label='bound by both groups', linewidth=2)
    ax.plot(xvalues[2], yvalues[2], 'b', label='bound by normal only', linewidth=2)
    ax.plot(xvalues[1], yvalues[1], 'r', label='bound by hyperaccesible only', linewidth=2)
    ax.plot(xvalues[3], yvalues[3], 'k--', label='not bound', linewidth=2)
    
    handles,labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels, loc='lower right')
    
    ax.set_xlabel('motif score')
    ax.set_ylabel('cumulative frequency')
    plt.tight_layout()
    
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.6, box.height])
    
    # Put a legend to the right of the current axis
    ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    return
     
     plt.figure(figsize=(4,4))
     plotfun.plotAggregateSignalOneline(footprintVecs_norm_nobsub[parameters.indices_low_noreplicates], labels=labels[parameters.indices_low_noreplicates], cmap='Blues')
     ax = plt.gca()
     ax.set_ylim((0.0, 0.50))
     ax.grid()
     ax.legend_ = None
     plt.savefig('%s.all.normalizedbyinsertions.background_sub.low.%d.pdf'%(outfile, labeli))
 
 # plot CDF
 siteScore = bedFile[:, 4].astype(float)
 xvalues = ['']*3
 yvalues = ['']*3
 filename_up = '/home/sarah/GreenDragonRaid1/lab/sarah/SCLC_ATAC/140908_bams/mm9/motifs/NF1_CTF/all_samples_14_9_30/changingPeaks/upPeaks.peakIndx'
 filename_nochange = '/home/sarah/GreenDragonRaid1/lab/sarah/SCLC_ATAC/140908_bams/mm9/motifs/NF1_CTF/all_samples_14_9_30/backgroundPeaks/backgroundPeaks.peakIndx'
 xvalues[0], yvalues[0] = seqfun.getCDF(siteScore)
 xvalues[1], yvalues[1] = seqfun.getCDF(siteScore[subsetIndices[1]])
 xvalues[2], yvalues[2] = seqfun.getCDF(siteScore[subsetIndices[0]])
 plt.figure(figsize=(4,4))
 plotfun.plot_manylines(yvalues, x=xvalues, labels=['all sites', 'up peaks', 'no change'])
 ax = plt.gca()
 plt.legend(loc='lower right')
 ax.set_xlabel('motif score')
 ax.set_ylabel('cumulative frequency')
 plt.tight_layout()
 plt.savefig('%s.sitescore.peakvalue.pdf'%outfile)
 
 # plot dendrogram
 plt.figure()
 signal_clusters = dendrogram(linkage(footprintVecs_norm, metric='euclidean'), labels=labels)
 plt.title('all profiles')