/
eisa_rna_pipe.py
73 lines (57 loc) · 2.14 KB
/
eisa_rna_pipe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import luigi
import os
import subprocess
import basic_rna_pipe as brp
class extract_exon_annotation(luigi.Task):
'''extract only exons from annotation file,
will be used to filter reads that align to an intron
'''
eisa_dir = '%s/eisa' % brp.configs().exp_dir
def run(self):
try:
os.makedirs(self.eisa_dir)
except OSError:
pass
with open('%s/exons.gtf' % self.eisa_dir, 'a') as f:
for line in open(brp.configs().genome_gtf):
if "##" in line:
print(line, end='', file=f)
else:
cols = line.split()
if cols[2] == 'exon':
print(line, end='', file=f)
def output(self):
return luigi.LocalTarget('%s/exons.gtf' % self.eisa_dir)
class filter_nonexon(luigi.Task):
'''Make bam file with reads that do not overlap any exons
Should end up with a bam file with reads that align to an intron region
or do not align at all
'''
sample = luigi.Parameter()
def output_dir(self):
return '%s/eisa/%s' % (brp.configs().exp_dir, self.sample)
def requires(self):
return brp.star_align(self.sample), extract_exon_annotation()
def run(self):
try:
os.makedirs(self.output_dir())
except OSError:
pass
bam_file = self.input()[0]
exon_gtf = self.input()[1]
intersect_command = ['bedtools', 'intersect',
'-a', bam_file.path,
'-b', exon_gtf.path,
'-v']
with open('%s/%s_intron.bam' % (self.output_dir(), self.sample), 'w') as f:
subprocess.call(intersect_command, stdout=f)
def output(self):
return luigi.LocalTarget('%s/%s_intron.bam' %
(self.output_dir(), self.sample))
class intron_counter(brp.gene_counter):
bam_generator = filter_nonexon
feature_to_count = 'gene'
output_name = 'intron'
class nascent_count_matrix(brp.postgres_count_matrix):
table = "intron_counts"
feature_counter = intron_counter