예제 #1
0
     plt.figure(figsize=(4,4))
     plotfun.plot_manylines(yvalues, x=xvalues, labels=['all sites', 'up peaks', 'no change'])
     ax = plt.gca()
     plt.legend(loc='lower right')
     ax.set_xlabel('motif score')
     ax.set_ylabel('cumulative frequency')
     plt.tight_layout()
     plt.savefig('%s.sitescore.peakvalue.pdf'%outfile)
     
     # plot dendrogram
     plt.figure()
     signal_clusters = dendrogram(linkage(footprintVecs_norm, metric='euclidean'), labels=labels)
     plt.title('all profiles')
     plt.savefig('%s.all.dendrogram.pdf'%(outfile)) 
     reorder = np.array(signal_clusters['leaves'])
     plotfun.plotAggregateSignal(footprintVecs_norm[reorder], labels=labels[reorder])
     plt.savefig('%s.all.normalizedbyinsertions.background_sub.pdf'%outfile)
     
 else:
     print "skipped plotting background-subtracted profile because no background was provided"
 
 # reduce by replicates
 footprintMats = np.load(options.m)
 footprintMats_red = np.zeros((2, footprintMats.shape[1], footprintMats.shape[2]))
 footprintMats_red[0] = np.sum(footprintMats[parameters.indices_high_noreplicates], 0)
 footprintMats_red[1] = np.sum(footprintMats[parameters.indices_low_noreplicates], 0)
 footprintMats = np.copy(footprintMats_red)
 
 labels = ['hyperaccessible', 'tumorigenic']
 numSamples = 2
 # find correlations
예제 #2
0
    ## from bw files, extract signal about 5kb away from bed data
    # print "finding baseline..."
    ## Do 5000 random points
    # numBackgroundSites = 5000
    # indx = np.unique(np.linspace(0, numSites-1, numBackgroundSites).astype(int))
    # baselineBed = np.copy(bedFile)[indx]
    # baselineBed[:,1] = (baselineBed[:,1].astype(int)-5000).astype(str)
    # baselineBed[:,2] = (baselineBed[:,2].astype(int)-5000).astype(str)
    # for i, bw in enumerate(bws):
    #    print "Extracting background signal for %s:\t%s"%(labels[i], os.path.basename(bw))
    #    pool = Pool(processes=options.p)
    #    baselineArray = pool.map(functools.partial(findInsertions, bw, baselineBed), range(0, numBackgroundSites))
    #    pool.close()
    #    baseline = processSignal(baselineArray)
    #    signals[i].baseline = baseline.final

    # plot and save

    finalSignal = np.array([signals[i].final for i in range(len(signals))])
    if options.tn5 is None:
        for i in range(len(signals)):
            finalSignal[i][:, :-1] = finalSignal[i][:, 1:]
    np.save(outfile, finalSignal)

    numSamples = len(bws)
    footprintVecs = np.array(
        [np.mean(finalSignal[i], 0) * np.mean(finalSignal) / np.mean(finalSignal[i]) for i in range(numSamples)]
    )
    plotfun.plotAggregateSignal(footprintVecs)
    plt.savefig(outfile + ".normalize.aggregate.pdf")