Ejemplo n.º 1
0
 def manage_analysis(self):
     
    
     if self.annotation_mode in 'metacv':
         print_verbose("For a detailed analysis blastn with XML output is needed")
     else:
         # test for blastrun with outfmt 5 mode
         if is_xml(self.blast_output):
             # create a SQLite DB from the xml file
             self.parse_to_db(to_string(self.blast_output), self.parsed_db_out)
             # test the exit code, because next script need the output as input
             if self.exitcode is 0:
                 # update input 
                 parser_out = absolute_path(update_reads(self.parsed_db_out, 
                                                         self.parser_name, 
                                                         'db'))
                 # raise step_number
                 self.step_number += 1
                 # create a new database with taxonomical annotations
                 self.annotate_db(parser_out, self.annotated_db_out)
                 # test the exit code, because next script need the output as input
                 if self.exitcode is 0:
                     # update input
                     annotated_output = absolute_path(update_reads(self.annotated_db_out, 
                                                                   self.R_annotate_name, 
                                                                   'db'))
                     # raise step_number
                     self.step_number += 1
                     # subset the taxonomical database for a better and 
                     # faster analysis after the pipline has finished
                     self.subset_db(annotated_output, self.subseted_db_out, parser_out)
                     # raise step_number
                     self.step_number += 1
                 else:
                     print_verbose("ERROR: Annotated Database could not be subseted correctly") 
                 # create a pie chart of the blast data with Krona Webtools 
                 if self.krona:
                     self.krona_report(self.blast_output, self.krona_report_out, parser_out)
                     
                 return [parser_out, annotated_output]   
             else: 
                 print_verbose("ERROR: XML file could not be parsed")
         # test for blast tabular output
         elif is_tabular(self.blast_output) and self.krona is True:
             self.krona_report(self.blast_output, self.krona_report_out, '')
             return []
         else:
             print_verbose("ERROR: Blast output file is not in xml or tabular format.\n" +
                           "Please use outfmt 5 or 6 for Blast run")
             return []
Ejemplo n.º 2
0
 def manage_annotation(self):
     blastn_out = ''
     metacv_out = ''
     # run the annotation functions when the module is initialized
     if self.mode.lower() == 'blastn':
         # is executable existing and runnable?
         if is_executable(self.blastn_exe):
             # start annotation with blastn
             self.blastn(self.blast_dir)
             # set the output file for further steps
             blastn_out = update_reads(self.blast_dir,
                                      'blastn',
                                      blast_output(self.outfmt).split('.')[1])
             # raise step_number
             self.step_number += 1
             
     elif self.mode.lower() == 'metacv':
         # is executable existing and runnable?
         if is_executable(self.metacv_exe):
             # start annotation with metacv
             self.metacv(self.metacv_dir)
             # set the output file for further steps
             metacv_out = update_reads(self.metacv_dir,
                                       self.metacv_name,
                                       'res')
             # raise step_number
             self.step_number += 1
             
     else: 
         # is executable existing and runnable?
         if is_executable(self.blastn_exe) and is_executable(self.metacv_exe):
             # start annotation with both tools 
             self.blastn(self.blast_dir)
             # test for ending and set the right blast output
             blastn_out = update_reads(self.blast_dir,
                                      'blastn',
                                      blast_output(self.outfmt).split('.')[1])
             self.metacv(self.metacv_dir)
             metacv_out = update_reads(self.metacv_dir,
                                       self.metacv_name,
                                       'res')
             # raise step_number
             self.step_number += 1
     
     return [self.step_number, blastn_out, metacv_out]
Ejemplo n.º 3
0
    def manage_assembly(self):
        
        concatinated = ''
        assembled = ''
        # run the assembling functions when the module is initialized
        if self.mode.lower() == 'flash':
            # is executable existing and runnable?
            if is_executable(self.flash_exe):
                # start concatination and update the input for next step
                self.concatinate(self.concat_out)
                self.step_number += 1
                # merge the concatinated reads with non concatinated rest
                if (self.merge_uncombined):
                    self.input = merge_files([to_string(update_reads(self.concat_out, 'extendedFrags', 'fastq')),
                                             to_string(update_reads(self.concat_out, 'out.notCombined', 'fastq'))],
                                             self.concat_out,
                                             'merged_concat','fastq')
                else:
                    concatinated = update_reads(self.concat_out, 'extendedFrags', 'fastq')
                    self.input = concatinated
                

        if self.mode.lower() == 'metavelvet':
            # is executable existing and runnable?
            if is_executable(self.velveth_exe) and is_executable(self.velveth_exe) and is_executable(self.metavelvet_exe):
                # start assembly and update the input for next step
                self.assemble_reads(self.assembly_out)
                assembled = update_reads(self.assembly_out, 'meta-velvetg', 'fa')
                self.input = assembled
                self.step_number += 1
                
        if self.mode.lower() == 'both':
            #TODO: not working because of auto mode --> see logs
            
            # is executable existing and runnable?
            if is_executable(self.flash_exe) and is_executable(self.velveth_exe) and is_executable(self.velveth_exe) and is_executable(self.metavelvet_exe):
                # start processing and update the input for next step
                self.concatinate(self.concat_out)
                concatinated = update_reads(self.out, 'extendedFrags', 'fastq')
                self.input = concatinated
                self.assemble_reads(self.assembly_out)
                assembled = update_reads(self.assembly_out, 'meta-velvetg', 'fa')
                self.step_number += 1
                self.input = assembled
        return [self.step_number, self.input, concatinated, assembled]
Ejemplo n.º 4
0
 def manage_preprocessing(self):
     # run the preprocessing functions when the module is initialized
     try:
         is_fastq(self.input)
     except FastQException:
         self.quality = False
         self.trim = False
         
     if self.quality:
         # is executable existing and runnable?
         if is_executable(self.fastqc_exe):
             self.qualityCheck()
             # raise the step number for cmd output
             self.step_number += 1
             #self.files.set_quality_report()
               
     if self.trim:
         if is_executable(self.trim_exe):
             self.trim_and_filter()
             # raise the step number for cmd output
             self.step_number += 1
             return [self.step_number, update_reads(self.trim_dir, 'val', 'fq')]
     else:
         return [self.step_number]
Ejemplo n.º 5
0
 def metacv(self, outputdir):
     
     # create a dir for output
     create_outputdir(outputdir)
     
     # select the input for metacv and convert it in an usable format
     if self.contigs is True:
         input = to_string(self.input)
     else:
         input = to_string(self.raw)
         
     # print actual informations about the step on stdout
     print_step(self.step_number, 
                'Annotation', 
                'Annotate bacterial reads with MetaCV',
                 '%s %s %s' % (self.metacv_seq,
                               self.metacv_mode,
                               self.metacv_orf))
     newline()
     
     # metacv has a maximum thread number of 64
     # parameter has to be adjusted
     if self.threads > 64:
             threads = 64
     else:
             threads = self.threads
     classify = open_logfile(self.logdir + 'metacv.classify.log')
     # start MetaCV function and wait until completion
     p = subprocess.Popen(shlex.split('%s classify %s %s %s %s %s %s --threads=%s' % 
                                     (self.metacv_exe,
                                      self.metacv_db,
                                      input,
                                      self.metacv_name,
                                      self.metacv_seq, 
                                      self.metacv_mode, 
                                      self.metacv_orf,
                                      threads)),
                         stderr = subprocess.PIPE, 
                         stdout = subprocess.PIPE,
                         cwd = outputdir + os.sep)
     # during processing pipe the output and print it on screen
     while p.poll() is None:
         if self.verbose:
             print_verbose(p.stderr.readline())
             classify.write(p.stderr.readline())
         else:
             print_compact(p.stderr.readline().rstrip('\n'))
             classify.write(p.stderr.readline())
     # wait until process is finished        
     p.wait()
     
     if p.returncode:
         raise MetaCVException(self.logdir + 'metacv.classify.log')
     else:
         # remove unused error logs
         remove_empty_logfile(self.logdir + 'metacv.classify.log')
         
         # print actual informations about the step on stdout
         print_step(self.step_number, 
                    'Annotation', 
                    'Analyse the results of MetaCV',
                    '%s %s %s' % (self.metacv_total_reads, 
                                  self.metacv_min_qual, 
                                  self.metacv_taxon))
         newline() 
         res2table = open_logfile(self.logdir + 'metacv.res2table.log')
         # start MetaCV's res2table function and wait until completion
         p = subprocess.Popen(shlex.split('%s res2table %s %s %s %s %s %s --threads=%s' % 
                                          (self.metacv_exe,
                                           self.metacv_db,
                                           to_string(update_reads(outputdir,'metpipe','res')),
                                           self.metacv_name + '.res2table',
                                           self.metacv_total_reads, 
                                           self.metacv_min_qual, 
                                           self.metacv_taxon,
                                           threads)),
                              stderr = subprocess.PIPE, 
                              stdout = subprocess.PIPE,
                              cwd = outputdir + os.sep)
         # during processing pipe the output and print it on screen
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stderr.readline())
                 res2table.write(p.stderr.readline())
                 
             else:
                 print_compact(p.stderr.readline().rstrip('\n'))
                 res2table.write(p.stderr.readline())
         # wait until process is finished
         p.wait()
     
         if p.returncode:
             raise MetaCVSumException(self.logdir + 'metacv.res2table.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'metacv.res2table.log')
         # print actual informations about the step on stdout
         print_step(self.step_number, 
                    'Annotation', 
                    'Summarize the results of MetaCV',
                    self.metacv_min_qual)
         newline()
         
         res2sum = open_logfile(self.logdir + 'metacv.res2sum.log')
         # start MetaCV's res2sum function and wait until completion
         # the workingdir must be specified to maintain the correct 
         # order of output files
         p = subprocess.Popen(shlex.split('%s res2sum %s %s %s %s' %
                                          (self.metacv_exe,
                                           self.metacv_db,
                                           to_string(update_reads(outputdir,'metpipe','res')),
                                           self.metacv_name + '.res2sum',
                                           self.metacv_min_qual)),
                              stderr = subprocess.PIPE, 
                              stdout = subprocess.PIPE,
                              cwd = outputdir + os.sep)
         # during processing pipe the output and print it on screen
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stderr.readline())
                 res2sum.write(p.stderr.readline())
             else:
                 print_compact(p.stderr.readline().rstrip('\n'))
                 res2sum.write(p.stderr.readline())
         # wait until process is finished
         p.wait()
     
         if p.returncode:
             raise MetaCVSumException(self.logdir + 'metacv.res2sum.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'metacv.res2sum.log')
     
         # print summary of the process after completion
         print_verbose('Annotation with MetaCV complete \n')
         print_running_time(self.time)
         newline()
Ejemplo n.º 6
0
 def krona_report(self, input, output, parser_output):
     
     # create a dir for output
     create_outputdir(output)
     # generate path and name for output file
     outfile = output + os.sep + self.krona_name + '.html'
     
     # test type of input file
     if is_tabular(input):
         # print actual informations about the step on stdout    
         print_step(self.step_number, 
                    'Analysis', 
                    'Create Overview from tabular output',
                    self.krona_parameter)
         newline()
         
         # start the Krona import script for Blast tabular output
         # pipe all output for stdout in a logfile
         p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' 
                                          % (self.perl_lib,
                                             self.krona_exe,
                                             outfile,
                                             self.krona_parameter,
                                             to_string(input))),
                              stdout = open_logfile(self.logdir + 'krona.log'),
                              stderr = open_logfile(self.logdir + 'krona.err.log'))
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise KronaException(self.logdir + 'krona.err.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'krona.err.log')
             # print summary of the process after completion
             print_verbose('Creation of Krona Pie Chart complete \n')
             print_running_time(self.time)
             newline()
         
     elif is_xml(input) and is_db(parser_output):
         print_step(self.step_number, 
                    'Analysis', 
                    'Create Overview from XML output',
                    self.krona_parameter)
         # convert the values from database to tabular format
         extract_tabular(to_string(parser_output), output)
         # set the new input
         input = update_reads(output, 'extracted_from_DB','tab')
         
         # start the Krona import script for Blast tabular output
         # pipe all output for stdout in a logfile
         p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' 
                                          % (self.perl_lib,
                                             self.krona_exe,
                                             outfile,
                                             self.krona_parameter,
                                             to_string(input))),
                              stdout = open_logfile(self.logdir + 'krona.log'),
                              stderr = open_logfile(self.logdir + 'krona.err.log'))
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise KronaException(self.logdir + 'krona.err.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'krona.err.log')
             # print summary of the process after completion
             print_verbose('Creation of Krona Pie Chart complete \n')
             print_running_time(self.time)
             newline()
         
     elif not is_tabular(input) or not is_xml(input):
         raise KronaFormatException()
     else:
         print_verbose('ERROR 25: Krona Report could not be generated, because of unknown reasons')
         sys.exit(1)