def executeFastqcCommand(rawReadsLocation, outputDir): # Determine the location of executable xmlFile = 'configs/executables.xml' nodeName = 'FASTQC' fastqc_path = rx.extractLocationFromExecutables(xmlFile,nodeName) # create output dir if doesn't exist if not os.path.exists(outputDir): os.makedirs(outputDir) logging.info('Directory created at ' + outputDir) # create fastqc command and execute for readFile in rawReadsLocation: fastqc_command = [fastqc_path,"-o", outputDir, readFile] logging.info('Fastqc command - ' + ' '.join(fastqc_command)) subprocess.call(fastqc_command)
def call_kmergenie(dataLocation, outputDir): ''' The function calls kmer-genie and captures screen output in report.txt file :param dataLocation: Location containing .fq/.fastq files :param outputDir: Location where the output will be written ''' # Determine the location of executable xmlFile = 'configs/executables.xml' nodeName = 'KMERGENIE' kmergenie_path = rx.extractLocationFromExecutables(xmlFile,nodeName) input_parameterFile = prepare_inputFile(dataLocation, outputDir) if input_parameterFile is None: logging.error('Error creating kmer-genie parameter file') exit(0) # Need to change dir as the tool doesn't provide option to specify output path os.chdir(outputDir) # Open file for recording kmer-genie output reportFile = os.path.join(outputDir,'report.txt') report = open(reportFile, mode='w') kmergenie_command = [kmergenie_path, input_parameterFile] print('kmergenie command - ' + ' '.join(kmergenie_command)) logging.info('kmergenie: kmergenie command - ' + ' '.join(kmergenie_command)) p = subprocess.Popen(kmergenie_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout: report.write(str(line)) p.stdout.flush() report.close() return 0
def executeFastqTrim_sickle(dataLocation, se_or_pe, outputDir, quality_type = 'illumina', quality_threhold = 20, *pairedFiles): ''' :param dataLocation: Folder containing input data as .fq or .fasta :param se_or_pe: 'S' or 'P' :param outputDir: :param quality_type: 'illumina', 'sanger'or 'solexa' :param quality_threhold: integer value param pairedFiles: Pairing of files as specified by user in a dictionary format {'ForwardReads.fq:ReverseReads.fq'}. ''' # Determine the location of executable xmlFile = 'configs/executables.xml' nodeName = 'SICKLE' tool_path = rx.extractLocationFromExecutables(xmlFile,nodeName) dataType = ['S','P'] if se_or_pe not in dataType: logging.error('executeFastqTrim_sickle: Incorrect specification for read data type. Specify S or P, opposed to the provided option - ' + se_or_pe) return None # create output dir if doesn't exist if not os.path.exists(outputDir): os.makedirs(outputDir) logging.info('executeFastqTrim_sickle: Directory created at ' + outputDir) if quality_type not in ['illumina', 'sanger', 'solexa']: logging.error('Quality type must be one of these : illumina, sanger, solexa') return None if se_or_pe == 'S': # expect a folder with .fq fq_files = utilities.get_fastq_files(dataLocation) if fq_files is None: logging.error('No FASTQ files found.') # Open file for recording cutadapt output reportFile = os.path.join(outputDir,'sickleReport.txt') report = open(reportFile, mode='w') for fq_file in fq_files: (dirname, filename) = os.path.split(fq_file) outPath = os.path.join(outputDir, filename) sickle_command = [tool_path, 'se', '-f', fq_file, '-t', quality_type, '-q', str(quality_threhold),'-o', outPath] print('sickle command - ' + ' '.join(sickle_command)) logging.info('executeFastqTrim_sickle: sickle command - ' + ' '.join(sickle_command)) p = subprocess.Popen(sickle_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout: report.write(str(line)) p.stdout.flush() report.close() return 0 else: # expect a folder and pairing of files. Files should be paired in same way as the adapters are specified. pairedFiles_dict = pairedFiles[0] if not isinstance(pairedFiles_dict, dict): logging.error('executeFastqTrim_sickle: requires dictionary format for using the Paired-end trimming') return None # Open file for recording cutadapt output reportFile = os.path.join(outputDir,'sickleReport.txt') report = open(reportFile, mode='w') strand1_files = pairedFiles_dict.keys() for st1_file in strand1_files: st2_file = pairedFiles_dict[st1_file] # Create full path for input files st1_file_path = os.path.join(dataLocation,st1_file) st2_file_path = os.path.join(dataLocation,st2_file) if os.path.exists(st1_file_path) & os.path.exists(st2_file_path): st1_outPath = os.path.join(outputDir, st1_file) st2_outPath = os.path.join(outputDir, st2_file) singles_outPath = os.path.join(outputDir,'trimmedSingles.fastq') sickle_command = [tool_path, 'pe', '-f', st1_file_path,'-r',st2_file_path, \ '-t', quality_type, '-q', str(quality_threhold), \ '-o', st1_outPath, '-p', st2_outPath, '-s',singles_outPath] print('sickle command - ' + ' '.join(sickle_command)) logging.info('executeFastqTrim_sickle: sickle command - ' + ' '.join(sickle_command)) p = subprocess.Popen(sickle_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout: report.write(str(line)) p.stdout.flush() else: # Return with error message if file(s) don't exist msg = 'Check if files exist: ' + st1_file_path + ',' + st2_file_path logging.error(msg) return None report.close() return 0
def removeAdaptors_cutadapt(dataLocation, se_or_pe, adapterDict, fixBaseNumber, outputDir, *pairedFiles): ''' :param adapterDict: Adaptar dictionary in form compatible with "cutadapt". Like [ADAPTER:-a] for a 3' adapter type, [ADAPTER:-g] for a 5' adapter,[ADAPTER$:-a] for anchored 3' adapter,[^ADAPTER:-g] for anchored 5' adapter, and [ADAPTER:b] for both 5' and 3' adapter type. Similarly, it can take -A, -B and -G for paired versions. Details - "http://cutadapt.readthedocs.org/en/stable/guide.html#removing-adapters" :param se_or_pe: Single or paired end data. Options - 'S' or 'P' :param fixBaseNumber: Remove fixed number of bases. -u option like -u 5 (remove first 5 bases) :param *pairedFiles: Pairing of files as specified by user in a dictionary format {'ForwardReads.fq:ReverseReads.fq'}. ''' dataType = ['S','P'] # Determine the location of executable xmlFile = 'configs/executables.xml' nodeName = 'CUTADAPT' tool_path = rx.extractLocationFromExecutables(xmlFile,nodeName) if se_or_pe not in dataType: logging.error('removeAdaptors_cutadapt: Incorrect specification for read data type. Specify S or P, opposed to the provided option - ' + se_or_pe) return None # create output dir if doesn't exist if not os.path.exists(outputDir): os.makedirs(outputDir) logging.info('removeAdaptors_cutadapt: Directory created at ' + outputDir) # check if correct adapter types specified allowedAdapterTypes = ['-a', '-g', '-b', '-A', '-G', '-B'] # As specified by cutadapt userProvidedAdapterTypes = adapterDict.values() for type in userProvidedAdapterTypes: if type not in allowedAdapterTypes: logging.error('removeAdaptors_cutadapt: Wrong keys provided for specifying adapters. Wrong entry found is : ' + type) return None # Arrange adapter list to a string adapters = adapterDict.keys() adapter_parameter_format = '' for adapter in adapters: adapter_parameter_format = adapter_parameter_format + adapterDict[adapter] + ' ' + adapter + ' ' print('Adapters and parameters :' + adapter_parameter_format) logging.info('removeAdaptors_cutadapt: Adapters and parameters :' + adapter_parameter_format) if se_or_pe == 'S': # expect a folder with .fq fq_files = utilities.get_fastq_files(dataLocation) if fq_files is None: logging.error('No FASTQ files found.') # Open file for recording cutadapt output reportFile = os.path.join(outputDir,'cutadaptReport.txt') report = open(reportFile, mode='w') for fq_file in fq_files: (dirname, filename) = os.path.split(fq_file) outPath = os.path.join(outputDir, filename) cutadapt_command = [tool_path, adapter_parameter_format, '-u',fixBaseNumber,'-o', outPath,fq_file] print('cutadapt command - ' + ' '.join(cutadapt_command)) logging.info('removeAdaptors_cutadapt: cutadapt command - ' + ' '.join(cutadapt_command)) p = subprocess.Popen(cutadapt_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout: report.write(str(line)) p.stdout.flush() report.close() return 0 else: # expect a folder and pairing of files. Files should be paired in same way as the adapters are specified. pairedFiles_dict = pairedFiles[0] if not isinstance(pairedFiles_dict, dict): logging.error('removeAdaptors_cutadapt: requires dictionary format for using the Paired-end trimming') return None # Open file for recording cutadapt output reportFile = os.path.join(outputDir,'cutadaptReport.txt') report = open(reportFile, mode='w') strand1_files = pairedFiles_dict.keys() for st1_file in strand1_files: st2_file = pairedFiles_dict[st1_file] # Create full path for input files st1_file_path = os.path.join(dataLocation,st1_file) st2_file_path = os.path.join(dataLocation,st2_file) if os.path.exists(st1_file_path) & os.path.exists(st2_file_path): st1_outPath = os.path.join(outputDir, st1_file) st2_outPath = os.path.join(outputDir, st2_file) cutadapt_command = [tool_path, adapter_parameter_format, '-u',fixBaseNumber,'-o', st1_outPath, '-p',st2_outPath, st1_file_path, st2_file_path] print('cutadapt command - ' + ' '.join(cutadapt_command)) logging.info('removeAdaptors_cutadapt: cutadapt command - ' + ' '.join(cutadapt_command)) p = subprocess.Popen(cutadapt_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout: report.write(str(line)) p.stdout.flush() else: # Return with error message if file(s) don't exist msg = 'Check if files exist: ' + st1_file_path + ',' + st2_file_path logging.error(msg) return None report.close() return 0