Example #1
0
 def subset_db(self, input, output, parser_out):
     
     # create a dir for output
     create_outputdir(output)
     
     # because of multiple possible classifier the database will be subseted in a loop,
     # so that every classifier can be processed
     for i in range(len(self.R_subset_classifier)):
         # print actual informations about the step on stdout    
         print_step(self.step_number, 
                    'Analysis', 
                    'Subset the database for %s' % (self.R_subset_classifier[i]),
                     '--bitscore %s --rank %s' % (self.R_subset_bitscore,
                                                  self.R_subset_rank[i]))
         newline()
         # generate name for database file
         outfile = '%s%s%s%s' % (output, os.sep, self.R_subset_classifier[i], '.db') 
         logfile = open_logfile(self.logdir + self.R_subset_classifier[i] + '.log')
         
         # remove old databases with the same name
         if os.path.exists(outfile):
             os.remove(outfile)
             
         # start the process with classifier i and the complete output from the annotation step before
         p = subprocess.Popen(shlex.split('%s -i %s -o %s --classifier %s --bitscore %s --rank %s --taxon %s --blast %s' 
                                          % (self.R_subset_exe, 
                                             to_string(input), 
                                             outfile,
                                             self.R_subset_classifier[i],
                                             self.R_subset_bitscore,
                                             self.R_subset_rank[i],
                                             self.R_subset_taxon_db,
                                             to_string(parser_out))),
                              stdout = subprocess.PIPE)
         
         # during processing print output in verbose mode and update the logfile
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stdout.readline())
                 logfile.write(p.stdout.readline())
             else:
                 logfile.write(p.stdout.readline())
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise SubsetDBException(self.logdir + self.R_subset_classifier[i] + '.log')
         
     # print summary of the process after completion
     print_verbose('Subsetting of annotated Blast database complete \n')
     print_running_time(self.time)
     newline()
Example #2
0
    def annotate_db(self, input, output):

        # create a dir for output
        create_outputdir(output)
        # generate filename for db
        outfile = output + os.sep + self.R_annotate_name + '.db'
        # open a logfile for annotation process
        logfile = open_logfile(self.logdir + 'annotation_of_db.log')
        
        # remove old databases with same name
        if os.path.exists(outfile):
            os.remove(outfile)
        
        # print actual informations about the step on stdout    
        print_step(self.step_number, 'Analysis', 
                   'Annotate taxonomical data to blast database',
                   self.R_annotate_parameter)
        newline()
        # start the parser and wait until completion
        p = subprocess.Popen(shlex.split('%s -i %s -o %s %s --taxon %s' 
                                         % (self.R_annotate_exe, 
                                            to_string(input), 
                                            outfile, 
                                            self.R_annotate_parameter,
                                            self.R_annotate_taxon_db)),
                             stdout = subprocess.PIPE)
        
        # print information about the status
        while p.poll() is None:
            if self.verbose:
                print_verbose(p.stdout.readline())
                logfile.write(p.stdout.readline())
            else:
                logfile.write(p.stdout.readline())
        # wait until process is complete
        p.wait()
        # save the exit code for later function calls 
        self.exitcode = p.returncode
        
        # raise Exception when an error occurs during processing
        if p.returncode:
            raise AnnotateDBException(self.logdir + 'annotation_of_db.log')
        else:
            # print summary of the process after completion
            print_verbose('Taxonomical annotation of blast database complete \n')
            print_running_time(self.time)
            newline()
Example #3
0
 def concatinate(self, outputdir):
     
     # create a dir for output
     create_outputdir(outputdir)
     
     # print actual informations about the step on stdout
     print_step(self.step_number, 
                'Assembly', 
                'Concatinate Reads',
                self.concat_parameter)
     newline()
     
     # open the logfile
     logfile = open_logfile(self.logdir + 'concatination.log')
     
     # start the program Flash with parameter from the conf file a
     # errors will be piped to extra error logfile
     p = subprocess.Popen(shlex.split('%s -t %d -d %s %s %s' % (self.flash_exe,
                                                                self.threads,
                                                                outputdir,
                                                                self.concat_parameter,
                                                                self.input)),
                         stdout = subprocess.PIPE, 
                         stderr = open_logfile(self.logdir + 'flash.err.log'))
     
     # during processing print Flash output in verbose mode and update the logfile
     while p.poll() is None:
         if self.verbose:
             print_verbose(p.stdout.readline())
             logfile.write(p.stdout.readline())
         else:
             print_compact(p.stdout.readline().rstrip('\n'))
             logfile.write(p.stdout.readline())
     # wait until process is finished        
     p.wait()
     
     if p.returncode:
        raise FlashException(self.logdir + 'flash.err.log')
     else:
         # remove empty error logs
         remove_empty_logfile(self.logdir + 'flash.err.log')
         # print summary of the process after completion
         newline()
         print_verbose('Concatination complete \n')
         print_running_time(self.time)
Example #4
0
    def parse_to_db(self, input, output):
        # create a dir for output
        create_outputdir(output)
        # generate filename for db
        outfile = output + os.sep + self.parser_name + '.db'
        # remove old databases with same name
        if os.path.exists(outfile):
            os.remove(outfile)
            
        # print actual informations about the step on stdout
        print_step(self.step_number, 'Analysis', 
                   'Parse database from blast results',
                   self.parser_parameter)
        newline()

        # start the parser and wait until completion
        p = subprocess.Popen(shlex.split('%s -o %s %s %s' % (self.parser_exe,
                                                            outfile,
                                                            self.parser_parameter,
                                                            input)),
                              stdout = subprocess.PIPE,
                              stderr = open_logfile(self.logdir + 'parser.err.log'))
        
        # print information about the status
        while p.poll() is None:
            if self.verbose:
                print_verbose(p.stdout.readline())
            else:
                print_compact(p.stdout.readline().rstrip('\n'))
        # wait until process is complete        
        p.wait()
        # save the exit code for later function calls 
        self.exitcode = p.returncode
        # raise Exception when an error occurs during processing
        if p.returncode:
            raise ParserException(self.logdir + 'parser.err.log')
        else:
            # remove empty error logs
            remove_empty_logfile(self.logdir + 'parser.err.log')
            # print summary of the process after completion
            print_verbose('Parsing of blast XML File complete \n')
            print_running_time(self.time)
            newline()
Example #5
0
 def blastn(self, outputdir):
         
     # create a dir for output
     create_outputdir(outputdir)
     
     # blastn can only run with fasta files, so input has to be converted
     if is_fastq(self.input):
         # print actual informations about the step on stdout
         print_step(self.step_number,
                    'Annotation', 
                    'convert fastq files',
                    cut_path(self.input))
         newline()
         self.input = convert_fastq(self.input, self.blast_dir, self.converter_exe)
     
     # blastn can only annotated one file, so input has to be merged to one file
     if is_paired(self.input):
         # print actual informations about the step on stdout
         print_step(self.step_number,
                    'Annotation',
                    'merging reads to on file',
                    cut_path(self.input))
         newline()
         self.input = merge_files(self.input, self.blast_dir, 'merged', 'fasta')
     
     # define the outputformat for the blastn results
     outfile = outputdir + os.sep + blast_output(self.outfmt)
     
     # print actual informations about the step on stdout
     print_step(self.step_number,
                'Annotation',
                'blast sequences against nt database',
                self.blast_parameter)
     newline()
     # start blastn and wait until completion
     # logfile is not requiered, because blastn has no log function and no output to stdout
     p = subprocess.Popen(shlex.split('%s -db %s -query %s -out %s -num_threads %s %s ' % 
                                      (self.blastn_exe,
                                       self.blastn_db,
                                       to_string(self.input),
                                       outfile,
                                       self.threads, 
                                       self.blast_parameter)),
                          stderr = open_logfile(self.logdir + 'blastn.err.log'))
     # wait until process is complete
     p.wait()
     
     if p.returncode:
         raise BlastnException(self.logdir + 'blastn.err.log')
     else:
         # remove the temporary files: converted fastq files and the merged fasta files
         remove_file(outputdir + os.sep, 'converted', 'fasta')
         remove_file(outputdir + os.sep, 'merged', 'fasta')
         # remove unused error logs
         remove_empty_logfile(self.logdir + 'blastn.err.log')
     
         # print summary of the process after completion
         print_verbose('Annotation with blastn complete \n')
         print_running_time(self.time)
         newline()
Example #6
0
 def trim_and_filter(self):
     
     # create a dir for output
     create_outputdir(self.trim_dir)
     
     # print actual informations about the step on stdout
     print_step(self.step_number, 
                'Preprocess', 
                'quality based trimming and filtering',
                self.trim_parameter)
     newline()
     
     # open the log file
     self.logfile = open_logfile(self.logdir + 'trimming.log')
     
     # start trim_galore with the given parameter and specified output dir
     p = subprocess.Popen(shlex.split('%s %s -o %s %s' % 
                                      (self.trim_exe,
                                       self.trim_parameter,
                                       self.trim_dir,
                                       to_string(self.input))),
                         stdout = subprocess.PIPE,
                         stderr = subprocess.PIPE)
     # wait until process is finished
     p.wait()
     # after processing write all generated output to the log file
     for line in p.stderr:
         if self.verbose:
             # in verbose mode additionally print output to stdout 
             print_verbose(line)
             self.logfile.write(line)
         else:
             self.logfile.write(line)
     if p.returncode:
         raise TrimGaloreException(self.logfile.name)
     else:
         # print summary of the process after completion
         print_verbose('Trimming and filtering complete \n')
         print_running_time(self.time)
         newline()
Example #7
0
    def qualityCheck(self):
        
        # create a dir for output
        create_outputdir(self.quality_dir)
        
        # print actual informations about the step on stdout
        print_step(self.step_number, 
                   'Preprocess', 
                   'quality analysis',
                   self.fastqc_parameter)
        newline()
        
        # run FastQC with the given parameter, in seperate threads and extract the output

        p = subprocess.Popen(shlex.split('%s -t %s -o %s --extract %s %s' 
                                         % (self.fastqc_exe,
                                            self.threads,
                                            self.quality_dir, 
                                            self.fastqc_parameter,
                                            to_string(self.input))),
                             stdout = subprocess.PIPE,
                             stderr = subprocess.PIPE)
        
        # during processing pipe the output and print it on screen
        while p.poll() is None:
            if self.verbose:
                print_verbose(p.stderr.readline())
            else:
                print_compact(p.stderr.readline().rstrip('\n'))
        # wait until process is finished
        p.wait()
        
        if p.returncode:
            raise FastQCException()
        else:
            # print summary of the process after completion
            print_verbose('Quality check complete for %s\n' % (self.input))
            print_running_time(self.time)
            newline()
Example #8
0
 def assemble_reads(self, outputdir):
     
     # create a dir for output
     create_outputdir(outputdir)
     
     # print actual informations about the step on stdout
     print_step(self.step_number, 
                'Assembly', 
                'Creating Hashmaps',
                self.velveth_parameter)
     newline()
     
     # open the first logfile
     velveth_log = open_logfile(self.logdir + 'velveth.log')
     
     # start the program velveth with parameter from the conf file and automatic detection
     # of the input file format
     # errors will be piped to extra error logfile
     p = subprocess.Popen(shlex.split('%s %s %s %s -fmtAuto %s' % (self.velveth_exe, 
                                                                   outputdir,
                                                                   self.kmer, 
                                                                   self.velveth_parameter,
                                                                   self.input)),
                         stdout = subprocess.PIPE, 
                         stderr = open_logfile(self.logdir + 'velveth.err.log')) 
     # during processing print velveth output in verbose mode and update the logfile
     while p.poll() is None:
        if self.verbose:
            print_verbose(p.stdout.readline())
            velveth_log.write(p.stdout.readline())
        else:
            #self.log.print_compact(p.stdout.readline())                         
            velveth_log.write(p.stdout.readline())
     # wait until process is finished
     p.wait()
     
     if p.returncode:
        raise VelvetHException(self.logdir + 'velveth.err.log')
     else:
         # remove empty error logs
         remove_empty_logfile(self.logdir + 'velveth.err.log')
         
         # print actual informations about the step on stdout
         print_step(self.step_number, 
                    'Assembly', 
                    'Creating Graph',
                    self.velvetg_parameter)
         newline()
     
         # open the second logfile
         velvetg_log = open_logfile(self.logdir + 'velvetg.log')
     
         # start the program velvetg in the dir of velveth, with the parameter of the conf file
         # errors will be piped to extra error logfile
         p = subprocess.Popen(shlex.split('%s %s %s' % (self.velvetg_exe, 
                                                        outputdir,
                                                        self.velvetg_parameter)),
                              stdout = subprocess.PIPE,
                              stderr = open_logfile(self.logdir + 'velvetg.err.log'))
         # during processing print velveth output in verbose mode and update the logfile
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stdout.readline())
                 velvetg_log.write(p.stdout.readline())
             else:
                 #self.log.print_compact(p.stdout.readline())
                 velvetg_log.write(p.stdout.readline())
         # wait until process is finished
         p.wait()
     
         if p.returncode:
             raise VelvetGException(self.logdir + 'velvetg.err.log')
         else:
             # remove empty error logs
             remove_empty_logfile(self.logdir + 'velvetg.err.log')
     
             # print actual informations about the step on stdout
             print_step(self.step_number, 
                        'Assembly', 
                        'Metagenomic Assembly',
                        self.metavelvet_parameter)
             newline()
            
             # open the third logfile
             meta_log = open_logfile(self.logdir + 'metavelvet.log')
     
             # start the program meta-velvetg in the dir of velveth and velvetg, 
             # with the parameter of the conf file
             # errors will be piped to extra error logfile
             p = subprocess.Popen(shlex.split('%s %s %s' % (self.metavelvet_exe, 
                                                            outputdir,
                                                            self.metavelvet_parameter)),
                                  stdout = subprocess.PIPE, 
                                  stderr = open_logfile(self.logdir + 'metavelvet.err.log'))
             # during processing print velveth output in verbose mode and update the logfile                         
             while p.poll() is None:
                 if self.verbose:
                     print_verbose(p.stdout.readline())
                     meta_log.write(p.stdout.readline())
                 else:
                     #self.log.print_compact(p.stdout.readline())
                     meta_log.write(p.stdout.readline())
             # wait until process is finished
             p.wait()
             
             if p.returncode:
                 raise MetaVelvetException(self.logdir + 'metavelvet.err.log')
             else:
                 # remove empty error logs
                 remove_empty_logfile(self.logdir + 'metavelvet.err.log')
                 newline()
                 # print summary of the process after completion
                 print_verbose('Assembly complete \n')
                 print_running_time(self.time)
                 newline()
Example #9
0
 def metacv(self, outputdir):
     
     # create a dir for output
     create_outputdir(outputdir)
     
     # select the input for metacv and convert it in an usable format
     if self.contigs is True:
         input = to_string(self.input)
     else:
         input = to_string(self.raw)
         
     # print actual informations about the step on stdout
     print_step(self.step_number, 
                'Annotation', 
                'Annotate bacterial reads with MetaCV',
                 '%s %s %s' % (self.metacv_seq,
                               self.metacv_mode,
                               self.metacv_orf))
     newline()
     
     # metacv has a maximum thread number of 64
     # parameter has to be adjusted
     if self.threads > 64:
             threads = 64
     else:
             threads = self.threads
     classify = open_logfile(self.logdir + 'metacv.classify.log')
     # start MetaCV function and wait until completion
     p = subprocess.Popen(shlex.split('%s classify %s %s %s %s %s %s --threads=%s' % 
                                     (self.metacv_exe,
                                      self.metacv_db,
                                      input,
                                      self.metacv_name,
                                      self.metacv_seq, 
                                      self.metacv_mode, 
                                      self.metacv_orf,
                                      threads)),
                         stderr = subprocess.PIPE, 
                         stdout = subprocess.PIPE,
                         cwd = outputdir + os.sep)
     # during processing pipe the output and print it on screen
     while p.poll() is None:
         if self.verbose:
             print_verbose(p.stderr.readline())
             classify.write(p.stderr.readline())
         else:
             print_compact(p.stderr.readline().rstrip('\n'))
             classify.write(p.stderr.readline())
     # wait until process is finished        
     p.wait()
     
     if p.returncode:
         raise MetaCVException(self.logdir + 'metacv.classify.log')
     else:
         # remove unused error logs
         remove_empty_logfile(self.logdir + 'metacv.classify.log')
         
         # print actual informations about the step on stdout
         print_step(self.step_number, 
                    'Annotation', 
                    'Analyse the results of MetaCV',
                    '%s %s %s' % (self.metacv_total_reads, 
                                  self.metacv_min_qual, 
                                  self.metacv_taxon))
         newline() 
         res2table = open_logfile(self.logdir + 'metacv.res2table.log')
         # start MetaCV's res2table function and wait until completion
         p = subprocess.Popen(shlex.split('%s res2table %s %s %s %s %s %s --threads=%s' % 
                                          (self.metacv_exe,
                                           self.metacv_db,
                                           to_string(update_reads(outputdir,'metpipe','res')),
                                           self.metacv_name + '.res2table',
                                           self.metacv_total_reads, 
                                           self.metacv_min_qual, 
                                           self.metacv_taxon,
                                           threads)),
                              stderr = subprocess.PIPE, 
                              stdout = subprocess.PIPE,
                              cwd = outputdir + os.sep)
         # during processing pipe the output and print it on screen
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stderr.readline())
                 res2table.write(p.stderr.readline())
                 
             else:
                 print_compact(p.stderr.readline().rstrip('\n'))
                 res2table.write(p.stderr.readline())
         # wait until process is finished
         p.wait()
     
         if p.returncode:
             raise MetaCVSumException(self.logdir + 'metacv.res2table.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'metacv.res2table.log')
         # print actual informations about the step on stdout
         print_step(self.step_number, 
                    'Annotation', 
                    'Summarize the results of MetaCV',
                    self.metacv_min_qual)
         newline()
         
         res2sum = open_logfile(self.logdir + 'metacv.res2sum.log')
         # start MetaCV's res2sum function and wait until completion
         # the workingdir must be specified to maintain the correct 
         # order of output files
         p = subprocess.Popen(shlex.split('%s res2sum %s %s %s %s' %
                                          (self.metacv_exe,
                                           self.metacv_db,
                                           to_string(update_reads(outputdir,'metpipe','res')),
                                           self.metacv_name + '.res2sum',
                                           self.metacv_min_qual)),
                              stderr = subprocess.PIPE, 
                              stdout = subprocess.PIPE,
                              cwd = outputdir + os.sep)
         # during processing pipe the output and print it on screen
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stderr.readline())
                 res2sum.write(p.stderr.readline())
             else:
                 print_compact(p.stderr.readline().rstrip('\n'))
                 res2sum.write(p.stderr.readline())
         # wait until process is finished
         p.wait()
     
         if p.returncode:
             raise MetaCVSumException(self.logdir + 'metacv.res2sum.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'metacv.res2sum.log')
     
         # print summary of the process after completion
         print_verbose('Annotation with MetaCV complete \n')
         print_running_time(self.time)
         newline()
Example #10
0
 def krona_report(self, input, output, parser_output):
     
     # create a dir for output
     create_outputdir(output)
     # generate path and name for output file
     outfile = output + os.sep + self.krona_name + '.html'
     
     # test type of input file
     if is_tabular(input):
         # print actual informations about the step on stdout    
         print_step(self.step_number, 
                    'Analysis', 
                    'Create Overview from tabular output',
                    self.krona_parameter)
         newline()
         
         # start the Krona import script for Blast tabular output
         # pipe all output for stdout in a logfile
         p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' 
                                          % (self.perl_lib,
                                             self.krona_exe,
                                             outfile,
                                             self.krona_parameter,
                                             to_string(input))),
                              stdout = open_logfile(self.logdir + 'krona.log'),
                              stderr = open_logfile(self.logdir + 'krona.err.log'))
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise KronaException(self.logdir + 'krona.err.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'krona.err.log')
             # print summary of the process after completion
             print_verbose('Creation of Krona Pie Chart complete \n')
             print_running_time(self.time)
             newline()
         
     elif is_xml(input) and is_db(parser_output):
         print_step(self.step_number, 
                    'Analysis', 
                    'Create Overview from XML output',
                    self.krona_parameter)
         # convert the values from database to tabular format
         extract_tabular(to_string(parser_output), output)
         # set the new input
         input = update_reads(output, 'extracted_from_DB','tab')
         
         # start the Krona import script for Blast tabular output
         # pipe all output for stdout in a logfile
         p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' 
                                          % (self.perl_lib,
                                             self.krona_exe,
                                             outfile,
                                             self.krona_parameter,
                                             to_string(input))),
                              stdout = open_logfile(self.logdir + 'krona.log'),
                              stderr = open_logfile(self.logdir + 'krona.err.log'))
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise KronaException(self.logdir + 'krona.err.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'krona.err.log')
             # print summary of the process after completion
             print_verbose('Creation of Krona Pie Chart complete \n')
             print_running_time(self.time)
             newline()
         
     elif not is_tabular(input) or not is_xml(input):
         raise KronaFormatException()
     else:
         print_verbose('ERROR 25: Krona Report could not be generated, because of unknown reasons')
         sys.exit(1)