plt.figure(figsize=(4,4)) plotfun.plot_manylines(yvalues, x=xvalues, labels=['all sites', 'up peaks', 'no change']) ax = plt.gca() plt.legend(loc='lower right') ax.set_xlabel('motif score') ax.set_ylabel('cumulative frequency') plt.tight_layout() plt.savefig('%s.sitescore.peakvalue.pdf'%outfile) # plot dendrogram plt.figure() signal_clusters = dendrogram(linkage(footprintVecs_norm, metric='euclidean'), labels=labels) plt.title('all profiles') plt.savefig('%s.all.dendrogram.pdf'%(outfile)) reorder = np.array(signal_clusters['leaves']) plotfun.plotAggregateSignal(footprintVecs_norm[reorder], labels=labels[reorder]) plt.savefig('%s.all.normalizedbyinsertions.background_sub.pdf'%outfile) else: print "skipped plotting background-subtracted profile because no background was provided" # reduce by replicates footprintMats = np.load(options.m) footprintMats_red = np.zeros((2, footprintMats.shape[1], footprintMats.shape[2])) footprintMats_red[0] = np.sum(footprintMats[parameters.indices_high_noreplicates], 0) footprintMats_red[1] = np.sum(footprintMats[parameters.indices_low_noreplicates], 0) footprintMats = np.copy(footprintMats_red) labels = ['hyperaccessible', 'tumorigenic'] numSamples = 2 # find correlations
## from bw files, extract signal about 5kb away from bed data # print "finding baseline..." ## Do 5000 random points # numBackgroundSites = 5000 # indx = np.unique(np.linspace(0, numSites-1, numBackgroundSites).astype(int)) # baselineBed = np.copy(bedFile)[indx] # baselineBed[:,1] = (baselineBed[:,1].astype(int)-5000).astype(str) # baselineBed[:,2] = (baselineBed[:,2].astype(int)-5000).astype(str) # for i, bw in enumerate(bws): # print "Extracting background signal for %s:\t%s"%(labels[i], os.path.basename(bw)) # pool = Pool(processes=options.p) # baselineArray = pool.map(functools.partial(findInsertions, bw, baselineBed), range(0, numBackgroundSites)) # pool.close() # baseline = processSignal(baselineArray) # signals[i].baseline = baseline.final # plot and save finalSignal = np.array([signals[i].final for i in range(len(signals))]) if options.tn5 is None: for i in range(len(signals)): finalSignal[i][:, :-1] = finalSignal[i][:, 1:] np.save(outfile, finalSignal) numSamples = len(bws) footprintVecs = np.array( [np.mean(finalSignal[i], 0) * np.mean(finalSignal) / np.mean(finalSignal[i]) for i in range(numSamples)] ) plotfun.plotAggregateSignal(footprintVecs) plt.savefig(outfile + ".normalize.aggregate.pdf")