/
fastq_operations.py
97 lines (74 loc) · 3.6 KB
/
fastq_operations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
__author__ = 'ritesh'
import os, glob, logging
import subprocess
import read_xml as rx
def validateFiles(dataLocation):
# check if it's a file or dir
if os.path.exists(dataLocation):
if os.path.isdir(dataLocation):
logging.info('Input FASTQ data location ' + dataLocation + ' directory exists')
fq_files = glob.glob(dataLocation + '/*.fq')
if len(fq_files) < 1:
logging.info('Input FASTQ data location ' + dataLocation + ' doesn\'t contain any .fq file')
# check if .fastq extension files exist instead of .fq?
fq_files = glob.glob(dataLocation + '/*.fastq')
if len(fq_files) < 1:
print 'No .fq or .fastq files found'
logging.error('Input FASTQ data location ' + dataLocation + ' doesn\'t contain any .fq or .fastq file')
return None
else:
logging.info('Input FASTQ data location ' + dataLocation + ' contains ' + str(len(fq_files)) +' .fastq files')
return fq_files # Return a list of .fq files
else:
# check if this is fq file
(dirname, filename) = os.path.split(dataLocation)
(shortname, extension) = os.path.splitext(filename)
if extension == '.fq' or extension == '.fastq':
print('Input FASTQ data location ' + dataLocation + ' regular file exists')
logging.info('Input FASTQ data location ' + dataLocation + ' regular file exists')
return dataLocation # Return the file path
else:
print('Input FASTQ data location ' + dataLocation + ' doesn\'t have .fq or .fastq extension')
logging.info('Input FASTQ data location ' + dataLocation + ' doesn\'t have .fq or .fastq extension')
return None # Return None as no .fq present
else:
logging.error('Input FASTQ data location ' + dataLocation + ' doesn\'t exist')
return None
def executeFastqcCommand(rawReadsLocation, outputDir):
# Determine the location of executable
xmlFile = 'configs/executables.xml'
nodeName = 'FASTQC'
fastqc_path = rx.extractLocationFromExecutables(xmlFile,nodeName)
# create output dir if doesn't exist
if not os.path.exists(outputDir):
os.makedirs(outputDir)
logging.info('Directory created at ' + outputDir)
# create fastqc command and execute
for readFile in rawReadsLocation:
fastqc_command = [fastqc_path,"-o", outputDir, readFile]
logging.info('Fastqc command - ' + ' '.join(fastqc_command))
subprocess.call(fastqc_command)
def performFastqc(rawReadsDir, outputDir):
"""
Perform FASTQC operations
"""
fqFiles = validateFiles(rawReadsDir)
if fqFiles == None:
logging.error('Exiting...No .fq files found')
exit(0)
else:
executeFastqcCommand(fqFiles, outputDir)
if __name__=="__main__":
#dataLocationDir = '/home/ritesh/Ritesh_Work/Data/Test'
#validateFiles(dataLocationDir)
#dataLocationFile = '/home/ritesh/Ritesh_Work/Data/Test/test1.fq'
#validateFiles(dataLocationFile)
#dataLocationFile = '/home/ritesh/Ritesh_Work/Data/Test/test3.fa'
#validateFiles(dataLocationFile)
#dataLocationFile = '/home/ritesh/Ritesh_Work/Data/Test/test5.fa'
#validateFiles(dataLocationFile)
#dataLocationFile = '/home/ritesh/Ritesh_Work/Data/'
#validateFiles(dataLocationFile)
dataLocationDir = '/home/ritesh/Ritesh_Work/Data/Test'
outputDir = '/home/ritesh/Ritesh_Work/Data/Temp'
performFastqc(dataLocationDir, outputDir)