def blastn(self, outputdir): # create a dir for output create_outputdir(outputdir) # blastn can only run with fasta files, so input has to be converted if is_fastq(self.input): # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'convert fastq files', cut_path(self.input)) newline() self.input = convert_fastq(self.input, self.blast_dir, self.converter_exe) # blastn can only annotated one file, so input has to be merged to one file if is_paired(self.input): # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'merging reads to on file', cut_path(self.input)) newline() self.input = merge_files(self.input, self.blast_dir, 'merged', 'fasta') # define the outputformat for the blastn results outfile = outputdir + os.sep + blast_output(self.outfmt) # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'blast sequences against nt database', self.blast_parameter) newline() # start blastn and wait until completion # logfile is not requiered, because blastn has no log function and no output to stdout p = subprocess.Popen(shlex.split('%s -db %s -query %s -out %s -num_threads %s %s ' % (self.blastn_exe, self.blastn_db, to_string(self.input), outfile, self.threads, self.blast_parameter)), stderr = open_logfile(self.logdir + 'blastn.err.log')) # wait until process is complete p.wait() if p.returncode: raise BlastnException(self.logdir + 'blastn.err.log') else: # remove the temporary files: converted fastq files and the merged fasta files remove_file(outputdir + os.sep, 'converted', 'fasta') remove_file(outputdir + os.sep, 'merged', 'fasta') # remove unused error logs remove_empty_logfile(self.logdir + 'blastn.err.log') # print summary of the process after completion print_verbose('Annotation with blastn complete \n') print_running_time(self.time) newline()
def concatinate(self, outputdir): # create a dir for output create_outputdir(outputdir) # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Concatinate Reads', self.concat_parameter) newline() # open the logfile logfile = open_logfile(self.logdir + 'concatination.log') # start the program Flash with parameter from the conf file a # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s -t %d -d %s %s %s' % (self.flash_exe, self.threads, outputdir, self.concat_parameter, self.input)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'flash.err.log')) # during processing print Flash output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) logfile.write(p.stdout.readline()) else: print_compact(p.stdout.readline().rstrip('\n')) logfile.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise FlashException(self.logdir + 'flash.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'flash.err.log') # print summary of the process after completion newline() print_verbose('Concatination complete \n') print_running_time(self.time)
def parse_to_db(self, input, output): # create a dir for output create_outputdir(output) # generate filename for db outfile = output + os.sep + self.parser_name + '.db' # remove old databases with same name if os.path.exists(outfile): os.remove(outfile) # print actual informations about the step on stdout print_step(self.step_number, 'Analysis', 'Parse database from blast results', self.parser_parameter) newline() # start the parser and wait until completion p = subprocess.Popen(shlex.split('%s -o %s %s %s' % (self.parser_exe, outfile, self.parser_parameter, input)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'parser.err.log')) # print information about the status while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) else: print_compact(p.stdout.readline().rstrip('\n')) # wait until process is complete p.wait() # save the exit code for later function calls self.exitcode = p.returncode # raise Exception when an error occurs during processing if p.returncode: raise ParserException(self.logdir + 'parser.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'parser.err.log') # print summary of the process after completion print_verbose('Parsing of blast XML File complete \n') print_running_time(self.time) newline()
def assemble_reads(self, outputdir): # create a dir for output create_outputdir(outputdir) # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Creating Hashmaps', self.velveth_parameter) newline() # open the first logfile velveth_log = open_logfile(self.logdir + 'velveth.log') # start the program velveth with parameter from the conf file and automatic detection # of the input file format # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s %s %s %s -fmtAuto %s' % (self.velveth_exe, outputdir, self.kmer, self.velveth_parameter, self.input)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'velveth.err.log')) # during processing print velveth output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) velveth_log.write(p.stdout.readline()) else: #self.log.print_compact(p.stdout.readline()) velveth_log.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise VelvetHException(self.logdir + 'velveth.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'velveth.err.log') # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Creating Graph', self.velvetg_parameter) newline() # open the second logfile velvetg_log = open_logfile(self.logdir + 'velvetg.log') # start the program velvetg in the dir of velveth, with the parameter of the conf file # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s %s %s' % (self.velvetg_exe, outputdir, self.velvetg_parameter)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'velvetg.err.log')) # during processing print velveth output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) velvetg_log.write(p.stdout.readline()) else: #self.log.print_compact(p.stdout.readline()) velvetg_log.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise VelvetGException(self.logdir + 'velvetg.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'velvetg.err.log') # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Metagenomic Assembly', self.metavelvet_parameter) newline() # open the third logfile meta_log = open_logfile(self.logdir + 'metavelvet.log') # start the program meta-velvetg in the dir of velveth and velvetg, # with the parameter of the conf file # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s %s %s' % (self.metavelvet_exe, outputdir, self.metavelvet_parameter)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'metavelvet.err.log')) # during processing print velveth output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) meta_log.write(p.stdout.readline()) else: #self.log.print_compact(p.stdout.readline()) meta_log.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaVelvetException(self.logdir + 'metavelvet.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'metavelvet.err.log') newline() # print summary of the process after completion print_verbose('Assembly complete \n') print_running_time(self.time) newline()
def metacv(self, outputdir): # create a dir for output create_outputdir(outputdir) # select the input for metacv and convert it in an usable format if self.contigs is True: input = to_string(self.input) else: input = to_string(self.raw) # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Annotate bacterial reads with MetaCV', '%s %s %s' % (self.metacv_seq, self.metacv_mode, self.metacv_orf)) newline() # metacv has a maximum thread number of 64 # parameter has to be adjusted if self.threads > 64: threads = 64 else: threads = self.threads classify = open_logfile(self.logdir + 'metacv.classify.log') # start MetaCV function and wait until completion p = subprocess.Popen(shlex.split('%s classify %s %s %s %s %s %s --threads=%s' % (self.metacv_exe, self.metacv_db, input, self.metacv_name, self.metacv_seq, self.metacv_mode, self.metacv_orf, threads)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) classify.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) classify.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVException(self.logdir + 'metacv.classify.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.classify.log') # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Analyse the results of MetaCV', '%s %s %s' % (self.metacv_total_reads, self.metacv_min_qual, self.metacv_taxon)) newline() res2table = open_logfile(self.logdir + 'metacv.res2table.log') # start MetaCV's res2table function and wait until completion p = subprocess.Popen(shlex.split('%s res2table %s %s %s %s %s %s --threads=%s' % (self.metacv_exe, self.metacv_db, to_string(update_reads(outputdir,'metpipe','res')), self.metacv_name + '.res2table', self.metacv_total_reads, self.metacv_min_qual, self.metacv_taxon, threads)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) res2table.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) res2table.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVSumException(self.logdir + 'metacv.res2table.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.res2table.log') # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Summarize the results of MetaCV', self.metacv_min_qual) newline() res2sum = open_logfile(self.logdir + 'metacv.res2sum.log') # start MetaCV's res2sum function and wait until completion # the workingdir must be specified to maintain the correct # order of output files p = subprocess.Popen(shlex.split('%s res2sum %s %s %s %s' % (self.metacv_exe, self.metacv_db, to_string(update_reads(outputdir,'metpipe','res')), self.metacv_name + '.res2sum', self.metacv_min_qual)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) res2sum.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) res2sum.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVSumException(self.logdir + 'metacv.res2sum.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.res2sum.log') # print summary of the process after completion print_verbose('Annotation with MetaCV complete \n') print_running_time(self.time) newline()
def krona_report(self, input, output, parser_output): # create a dir for output create_outputdir(output) # generate path and name for output file outfile = output + os.sep + self.krona_name + '.html' # test type of input file if is_tabular(input): # print actual informations about the step on stdout print_step(self.step_number, 'Analysis', 'Create Overview from tabular output', self.krona_parameter) newline() # start the Krona import script for Blast tabular output # pipe all output for stdout in a logfile p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' % (self.perl_lib, self.krona_exe, outfile, self.krona_parameter, to_string(input))), stdout = open_logfile(self.logdir + 'krona.log'), stderr = open_logfile(self.logdir + 'krona.err.log')) # wait until process is complete p.wait() if p.returncode: raise KronaException(self.logdir + 'krona.err.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'krona.err.log') # print summary of the process after completion print_verbose('Creation of Krona Pie Chart complete \n') print_running_time(self.time) newline() elif is_xml(input) and is_db(parser_output): print_step(self.step_number, 'Analysis', 'Create Overview from XML output', self.krona_parameter) # convert the values from database to tabular format extract_tabular(to_string(parser_output), output) # set the new input input = update_reads(output, 'extracted_from_DB','tab') # start the Krona import script for Blast tabular output # pipe all output for stdout in a logfile p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' % (self.perl_lib, self.krona_exe, outfile, self.krona_parameter, to_string(input))), stdout = open_logfile(self.logdir + 'krona.log'), stderr = open_logfile(self.logdir + 'krona.err.log')) # wait until process is complete p.wait() if p.returncode: raise KronaException(self.logdir + 'krona.err.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'krona.err.log') # print summary of the process after completion print_verbose('Creation of Krona Pie Chart complete \n') print_running_time(self.time) newline() elif not is_tabular(input) or not is_xml(input): raise KronaFormatException() else: print_verbose('ERROR 25: Krona Report could not be generated, because of unknown reasons') sys.exit(1)