forked from davek44/utility
/
tss_bam_replot.py
executable file
·81 lines (67 loc) · 2.91 KB
/
tss_bam_replot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
from optparse import OptionParser
from rpy2.robjects.packages import importr
import rpy2.robjects as ro
import rpy2.robjects.lib.ggplot2 as ggplot2
grdevices = importr('grDevices')
################################################################################
# tss_bam_replot.py
#
# Make a new plot from the raw data generated by tss_bam_plot.py
################################################################################
################################################################################
# main
################################################################################
def main():
usage = 'usage: %prog [options] <raw file>'
parser = OptionParser(usage)
parser.add_option('-d', dest='downstream', default=2000, type='int', help='TSS downstream [Default: %default]')
parser.add_option('-o', dest='out_prefix', default='tss', help='Output prefix [Default: %default]')
parser.add_option('-u', dest='upstream', default=5000, type='int', help='TSS upstream [Default: %default]')
parser.add_option('--ymax', dest='ymax', default=None, type='float', help='Y-coordinate limit [Default: %default]')
(options,args) = parser.parse_args()
if len(args) != 1:
parser.error('Must provide raw file')
else:
raw_file = args[0]
# collect data
coords = []
main_cov = []
control_cov = []
for line in open(raw_file):
a = line.split()
coords.append(int(a[0]))
main_cov.append(float(a[1]))
control_cov.append(float(a[2]))
# data structures
tss_i = ro.IntVector(range(-options.upstream,options.downstream+1))
labels = ro.StrVector(['Main']*(options.upstream+options.downstream+1)+['Control']*(options.upstream+options.downstream+1))
cov = ro.FloatVector(main_cov + control_cov)
df = ro.DataFrame({'tss_i':tss_i, 'cov':cov, 'label':labels})
# plot
'''
gp = ggplot2.ggplot(df) + \
ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
ggplot2.geom_point() + \
ggplot2.scale_x_continuous('TSS index') + \
ggplot2.scale_colour_discrete('')
'''
gp = ggplot2.ggplot(df) + \
ggplot2.aes_string(x='tss_i', y='cov', colour='label') + \
ggplot2.geom_smooth(method='loess', size=1, span=0.2, se=False) + \
ggplot2.scale_x_continuous('TSS Position') + \
ggplot2.scale_colour_discrete('') + \
ggplot2.theme_bw()
if options.ymax == None:
gp += ggplot2.scale_y_continuous('Coverage')
else:
gp += ggplot2.scale_y_continuous('Coverage', limits=ro.FloatVector([0,options.ymax]))
# save to file
grdevices.pdf(file='%s_and.pdf' % options.out_prefix)
gp.plot()
grdevices.dev_off()
################################################################################
# __main__
################################################################################
if __name__ == '__main__':
main()