-
Notifications
You must be signed in to change notification settings - Fork 0
/
clear-pipeline.py
143 lines (131 loc) · 6.69 KB
/
clear-pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import re,os,sys,logging,time,datetime,subprocess;
import commands;
from optparse import OptionParser
from argparse import Action
ALIGNER="BWA"
ADAPTER="yes"
TRIM="yes"
GENOME="./genome/RefGenome.fa"
def main():
global ALIGNER,ADAPTER,TRIM,GENOME,PARA_MAP
usage = "usage: autopipeline.pl [arguments]\n\
\nOptional arguments: --help print help message --man print complete\
documentation --aligner Aligner (default : BWA). User can specify BWA,\
BOWTIE, TOPHAT or ALL to do alignment using all alingers. --adapter\
Remove adapters from raw reads. --trim Trim low quality reads. --genome\
Input reference genome with full path.\n\
sampleSheet.txt has to be in working directory."
parser = OptionParser(usage)
parser.add_option("--aligner",dest="aligner",default="BWA",help="Aligner to\
align sequencing reads to refrence sequences. \n\
The default aligner is BWA but user can specify BOWTIE and internally it \
will use BOWTIE 2, TOPHAT or choose ALL. \nIf no parameter are specified \
after prompt, the pipeline uses specified aligner with all default\
settings.")
parser.add_option("--man",dest="man",action="store_true",default=False,help="Print complete manual of the program")
parser.add_option("--adapter",dest="adapter",default="yes",help="Remove \
adapters from raw reads and perform quality control check on the resulting reads. \
\nThe default option is 'yes' but user can specify 'no' to skip this step. \
\nIf this parameter is set as yes, the pipeline will remove adaptors from raw \
reads and generate report after performing quality check.")
parser.add_option("--trim",dest="trim",default="yes",help="Trim the low quality reads. \
\nThe default option is 'yes' but user can specify 'no' to skip this step. \
\nIf this parameter is set as yes, the pipeline will remove the low quality \
reads and generate report after performing quality check.")
parser.add_option("--genome",dest="genome",default="./genome/RefGenome.fa",help="Reference genomepathway and file name. \
\nThe default option is './genome/RefGenome.fa'. This is a mandatory option. \
\nYou have to give pipepline the correct reference genome path and file name.")
parser.add_option("--para_map",dest="para_map",default="",help="parameters for mapping. \
\nThe default options is alinger defualt. This is an optional option.")
(options, args) = parser.parse_args()
ALIGNER=options.aligner
ADAPTER=options.adapter
TRIM=options.trim
GENOME=options.genome
PARA_MAP=options.para_map
if options.man:
print("SYNOPSIS:autopipeline.pl [arguments]\n\
\nOptional arguments: --help print help message --man print complete\
documentation --aligner Aligner (default : BWA). User can specify BWA,\
BOWTIE, TOPHAT or ALL to do alignment using all alingers. --adapter\
Remove adapters from raw reads. --trim Trim low quality reads. --genome\
Input reference genome with full path. sampleSheet.txt has to be in working\
directory.\n")
print(parser.format_option_help())
parser.exit()
if __name__ == '__main__':
main()
mydir = os.getcwd()
print("testing:"+mydir)
print("testing:"+ALIGNER)
print("testing:"+GENOME)
##################################################
####index genome#####
if ("$GENOME"):
#print refFile, aligner, mydir
### submit a job to build indices
jobqsub = 'qsub -v reffname='+GENOME+',align='+ALIGNER+',mycwd='+mydir+' -N job_buildIndex_'+GENOME+' /ingens/home/ypfeng/clear/Script/optionalindex.sh'
proc = subprocess.Popen(jobqsub, shell=True, stdout=subprocess.PIPE)
data = proc.stdout.readline()
#jobid_index = data
a = data.split('.', 1)
jobid_index = a[0]
print jobid_index
print 'jobid_indexr Submitted - Build Index'
else:
print "Referece genome: reference genome does not exists, stopping processing pipeline!"
exit (0)
################################################
####read samplesheet#####
# Function LoadSampleSheet
# Input: "file_name" is the name of the sample sheet file
# Output: a two dimensional list will be returned
# Call example: data = LoadSampleSheet("sampleSheet.txt");
# Specification: 1: sample sheet may contain comment lines, labeld with leading '#'
# 2: each line must have 6 fieds
# 3: the file should be tab-delimited
def LoadSampleSheet(file_name):
try:
fh = open(file_name, 'r')
except:
print "LoadSampleSheet: failed to open file %s." % file_name
sample_info = []
for line in fh:
if line[0] == '#':
continue
info = line.split()
if(len(info) != 6):
print "LoadSampleSheet: error in table size, file may be corrupted."
sample_info.append(info)
fh.close()
return sample_info
#################################################
####call mapping#####
# under the same directory
####fastq preprocessing
slist = LoadSampleSheet("sampleSheet.txt");
for i in range(0, len(slist)):
if slist[i][0]== 'p':
#print 'depend=afterok:'+jobid_index+''
jobqsubpp1 = 'qsub -W depend=afterok:'+jobid_index+' -v fname='+slist[i][1]+',mycwd='+mydir+',adapter='+ADAPTER+',tr='+TRIM+' -N prepro_'+slist[i][1]+' /ingens/solid/analysis/yaping/pipeline2/Script/job_prepro.sh'
proc = subprocess.Popen(jobqsubpp1, shell=True, stdout=subprocess.PIPE)
datapp1 = proc.stdout.readline()
a = datapp1.split('.', 1)
jobid_pp1 = a[0]
print ''+jobid_pp1+' submitted!'
jobqsubpp2 = 'qsub -W depend=afterok:'+jobid_index+' -v fname='+slist[i][2]+',mycwd='+mydir+',adapter='+ADAPTER+',tr='+TRIM+' -N prepro_'+slist[i][2]+' /ingens/solid/analysis/yaping/pipeline2/Script/job_prepro.sh'
proc = subprocess.Popen(jobqsubpp2, shell=True, stdout=subprocess.PIPE)
datapp2 = proc.stdout.readline()
a = datapp2.split('.', 1)
jobid_pp2 = a[0]
print ''+jobid_pp2+' submitted!'
####mapping
jobsubpe = 'qsub -W depend=afterok:'+jobid_pp1+':'+jobid_pp2+' -v fname1='+slist[i][1]+',fname2='+slist[i][2]+',align='+ALIGNER+',reffname='+GENOME+',mycwd='+mydir+',para='+PARA_MAP+',adapter='+ADAPTER+',tr='+TRIM+' -N mappe_'+slist[i][1]+' /ingens/solid/analysis/yaping/pipeline2/Script/job_pe.sh'
proc = subprocess.Popen(jobsubpe, shell=True, stdout=subprocess.PIPE)
datape = proc.stdout.readline()
a = datape.split('.', 1)
jobid_pe = a[0]
print ''+jobid_pe+' submitted!'
else:
print ''+jobid_pe+' preprocessing stopped!'
exit (0)