コード例 #1
0
# no expression change
noExprChange = np.loadtxt(options.b, dtype=bool)


# normalize by mean in no expression change
# readCountNorm = readCount/np.sum(readCount[noExprChange], 0)*np.mean(np.sum(readCount[noExprChange], 0))
readCountNorm = readCount/np.mean(readCount[noExprChange], 0)*10    # such that average count in TSS peaks is 10. 

# save file of normalized peakCounts
if not options.o:
    outputfile = os.path.splitext(options.c)[0]
else: outputfile = options.o
np.savetxt(outputfile+'.normalize.peakCount', readCountNorm)

# also save file with no replicates
readCountRed = filefun.reduceByReplicates(readCount, parameters.indices_for_reduce_by_replicates)
readCountNorm = readCountRed/np.mean(readCountRed[noExprChange], 0)*10
np.savetxt(outputfile+'.normalize.norepicates.peakCount', readCountNorm)

# plot and save correlation heat map
numSamples = readCount.shape[1]
readCountNorm = readCount/np.mean(readCount[noExprChange], 0)*10 
distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)])
plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
plt.savefig(outputfile+'.correlation_heatmap.all.pdf')

distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[noExprChange, i], readCountNorm[noExprChange, j]) for i in range(numSamples)] for j in range(numSamples)])
plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
plt.savefig(outputfile+'.correlation_heatmap.no_change.pdf')

# plot only old samples with replicates
コード例 #2
0
# no expression change
noExprChange = np.loadtxt(options.b, dtype=bool)


# normalize by mean in no expression change
# readCountNorm = readCount/np.sum(readCount[noExprChange], 0)*np.mean(np.sum(readCount[noExprChange], 0))
readCountNorm = readCount/np.mean(readCount[noExprChange], 0)*10    # such that average count in TSS peaks is 10. 

# save file of normalized peakCounts
if not options.o:
    outputfile = os.path.splitext(options.c)[0]
else: outputfile = options.o
np.savetxt(outputfile+'.normalize.peakCount', readCountNorm)

# also save file with no replicates
readCountRed = filefun.reduceByReplicates(readCount, np.array([[0, 1], [2, 3]]))
np.savetxt(outputfile+'.normalize.norepicates.peakCount', readCountRed/np.mean(readCountRed[noExprChange], 0)*10)

# plot and save correlation heat map
numSamples = readCount.shape[1]
distanceCorr = np.array([[getDistanceSpearmanr(readCountNorm[:, i], readCountNorm[:, j]) for i in range(numSamples)] for j in range(numSamples)])
plotHeatMap(distanceCorr, rowlabels=parameters.headers, columnlabels=parameters.headers_human, fontSize=11, cmap='RdGy_r', vmin=0, vmax=1)
plt.savefig(outputfile+'.correlation_heatmap.all.pdf')

"""
Reminder of script is making a lot of plots. Requires calls of significant versus not peaks
optional: run python script 'find_significant_peaks.py'

os.system('python %s -b %s -p %s --indx %s'%('scoring/140815_peaks.coverageCorr.all.bed', outputfile+'.normalize.replicate_red.peakCount', options.b))
"""