Example #1
0
 def manage_analysis(self):
     
    
     if self.annotation_mode in 'metacv':
         print_verbose("For a detailed analysis blastn with XML output is needed")
     else:
         # test for blastrun with outfmt 5 mode
         if is_xml(self.blast_output):
             # create a SQLite DB from the xml file
             self.parse_to_db(to_string(self.blast_output), self.parsed_db_out)
             # test the exit code, because next script need the output as input
             if self.exitcode is 0:
                 # update input 
                 parser_out = absolute_path(update_reads(self.parsed_db_out, 
                                                         self.parser_name, 
                                                         'db'))
                 # raise step_number
                 self.step_number += 1
                 # create a new database with taxonomical annotations
                 self.annotate_db(parser_out, self.annotated_db_out)
                 # test the exit code, because next script need the output as input
                 if self.exitcode is 0:
                     # update input
                     annotated_output = absolute_path(update_reads(self.annotated_db_out, 
                                                                   self.R_annotate_name, 
                                                                   'db'))
                     # raise step_number
                     self.step_number += 1
                     # subset the taxonomical database for a better and 
                     # faster analysis after the pipline has finished
                     self.subset_db(annotated_output, self.subseted_db_out, parser_out)
                     # raise step_number
                     self.step_number += 1
                 else:
                     print_verbose("ERROR: Annotated Database could not be subseted correctly") 
                 # create a pie chart of the blast data with Krona Webtools 
                 if self.krona:
                     self.krona_report(self.blast_output, self.krona_report_out, parser_out)
                     
                 return [parser_out, annotated_output]   
             else: 
                 print_verbose("ERROR: XML file could not be parsed")
         # test for blast tabular output
         elif is_tabular(self.blast_output) and self.krona is True:
             self.krona_report(self.blast_output, self.krona_report_out, '')
             return []
         else:
             print_verbose("ERROR: Blast output file is not in xml or tabular format.\n" +
                           "Please use outfmt 5 or 6 for Blast run")
             return []
Example #2
0
 def __init__(self, threads, step_number, verbose, time, input , logdir,
              fastqc_exe,quality, quality_dir, fastqc_parameter,
              trim_exe, trim, trim_dir, trim_parameter):
        
     # define general  variables 
     self.threads = threads
     self.step_number = step_number
     self.verbose = verbose
     self.time = time
     self.logdir = logdir
     self.input = absolute_path(input)
     
     # define quality report variables
     self.fastqc_exe = fastqc_exe
     self.quality = quality
     self.quality_dir = quality_dir
     self.fastqc_parameter = fastqc_parameter
     
     # define trim variables
     self.trim_exe = trim_exe
     self.trim = trim
     self.trim_dir = trim_dir
     self.trim_parameter = trim_parameter
Example #3
0
def main(argv = None):

  # hardcode defaults
  RESULT_DIR = '%s%sresult' % (sys.path[0], os.sep)
  PARAM_FILE = '%s%sparameter.conf' % (sys.path[0], os.sep)
  STEPS = ['preprocessing', 'annotate', 'assembly', 'analysis']

  # Get the starting time
  starting_time = time.time()

  # setup Argument Parser for stdin arguments
  parser = argparse.ArgumentParser(add_help = True)

  # define arguments
  parser.add_argument('input', nargs = '+', action = 'store', 
                      help = 'single or paired input files in <fastq> format')
  parser.add_argument('--version', action = 'version', version = '%(prog)s 0.5')
  parser.add_argument('-v', dest = 'verbose', action = 'store_true', default = False,
                      help = 'more detailed output (default = False)')
  parser.add_argument('-t', dest = 'threads', type = int, action = 'store', 
                      default = multiprocessing.cpu_count() - 1,
                      help = 'number of threads to use (default = %d)' 
                      % (multiprocessing.cpu_count() - 1))
  parser.add_argument('-p', dest = 'param', action = 'store', default = PARAM_FILE,
                      help = 'use alternative config file (default = parameter.conf)')
  parser.add_argument('-s', dest = 'skip', action = 'store', default = '', 
                      choices = ['preprocessing', 'assembly', 'annotation','analysis'],
                      help = 'skip steps in the pipeline (default = None)')
  parser.add_argument('-o', dest = 'output', action = 'store', default = RESULT_DIR,
                      help = 'use alternative output folder')
  parser.add_argument('-a', dest = 'assembler', default = 'MetaVelvet', 
                      choices = ['metavelvet', 'flash','both'],
                      help = 'assembling program to use (default = MetaVelvet)')
  parser.add_argument('-c', dest = 'annotation', default = 'both',
                      choices = ['metacv', 'blastn', 'both'],
                      help = 'classifier to use for annotation (default = both)')     
  parser.add_argument('--use_contigs', dest = 'use_contigs', action = 'store_true', 
                      default = 'False',
                      help = 'should MetaCV use assembled Reads or RAW Reads (default = RAW')                                    
  parser.add_argument('--notrimming', dest = 'trim', action = 'store_false', default = True,
                      help = 'trim and filter input reads? (default = True)')
  parser.add_argument('--noquality', dest = 'quality', action = 'store_false', default = True,
                    help = 'create no quality report (default = True)')
  parser.add_argument('--noreport', dest = 'krona', action = 'store_false', default = True,
                      help = 'create no pie chart with the annotated taxonomical data (default = True)')
  parser.add_argument('--merge', dest = 'merge_uncombined', action = 'store_true', default = False,
                      help = 'merge concatinated reads with not concatinated (default = False)')

  args = parser.parse_args()
  # init the Pipeline
  RESULT_DIR = args.output if args.output else RESULT_DIR
  # check if param File exists
  if os.path.isfile(args.param):
      PARAM_FILE = args.param
  else:
      if os.path.isfile(PARAM_FILE):
        sys.stderr.write('ERROR 3: Parameter File could not be found!\n')
        sys.stderr.write('Use standard Parameter File:\n%s\n\n' % (PARAM_FILE))
      else:
          raise ParamFileNotFound(args.param)
    
  # check if input exists
  if not all(os.path.isfile(file) for file in args.input):
      raise InputNotFound(to_string(args.input))

  if __name__ == '__main__':   

    # create outputdir and log folder
    create_outputdir(RESULT_DIR)
    create_outputdir(RESULT_DIR + os.sep +'log')

    # create the global settings object
    settings = General(args.threads, args.verbose, args.skip, starting_time, args.trim, 
                       args.quality, args.krona, args.use_contigs, args.merge_uncombined, args.assembler, 
                       args.annotation, 1)

    # setup the input, outputs and important files
    files = FileSettings(absolute_path(args.input), os.path.normpath(RESULT_DIR), PARAM_FILE)

    exe = Executables(PARAM_FILE)
    # get the all skipped steps
    skip = to_string(settings.get_skip())

    try:
      print "hello"
      # START the modules of Pipeline and wait until completion
      if skip in 'preprocessing' and skip:
          skip_msg(skip)
      else:
          # init the preprocessing module
          pre = Preprocess(settings.get_threads(), 
                           settings.get_step_number(),
                           settings.get_verbose(),
                           settings.get_actual_time(),
                           files.get_input(),
                           files.get_logdir(),
                           exe.get_FastQC(),
                           settings.get_quality(),
                           files.get_quality_dir(),
                           parse_parameter(FastQC_Parameter(PARAM_FILE)),
                           exe.get_TrimGalore(),
                           settings.get_trim(),
                           files.get_trim_dir(), 
                           parse_parameter(TrimGalore_Parameter(PARAM_FILE)))
          # run preprocessing functions
          results = pre.manage_preprocessing()
          # update pipeline variables with results
          settings.set_step_number(results[0])
          if len(results) > 1:
              files.set_input(absolute_path(results[1]))
              files.set_preprocessed_output(absolute_path(results[1]))

      if skip in 'assembly' and skip:
        skip_msg(skip)
      else:
        # init the assembly module 
        assembly = Assembly(settings.get_threads(), 
                            settings.get_step_number(),
                            settings.get_verbose(),
                            settings.get_actual_time(),
                            files.get_logdir(),
                            files.get_input(),
                            settings.get_assembler(),
                            exe.get_Flash(),
                            files.get_concat_dir(),
                            parse_parameter(FLASH_Parameter(PARAM_FILE)),
                            settings.get_merge_uncombined(),
                            exe.get_Velveth(),
                            exe.get_Velvetg(),
                            exe.get_MetaVelvet(),
                            files.get_assembly_dir(),
                            Velveth_Parameter(PARAM_FILE).get_kmer(PARAM_FILE),
                            parse_parameter(Velveth_Parameter(PARAM_FILE)),
                            parse_parameter(Velvetg_Parameter(PARAM_FILE)),
                            parse_parameter(MetaVelvet_Parameter(PARAM_FILE)))
        # run assembly functions
        results = assembly.manage_assembly()
        # update pipeline variables with results
        settings.set_step_number(results[0])
        files.set_input(absolute_path(results[1]))
        files.set_concatinated_output(absolute_path(results[2]))
        files.set_assembled_output(absolute_path(results[3]))
  
      if skip in 'annotation'and skip:
          skip_msg(skip)
      else:
          # init the annotation module
          anno = Annotation(settings.get_threads(), 
                            settings.get_step_number(),
                            settings.get_verbose(),
                            settings.get_actual_time(),
                            files.get_logdir(),
                            files.get_input(),
                            files.get_raw(),
                            settings.get_annotation(),
                            settings.get_use_contigs(),
                            exe.get_Blastn(),
                            exe.get_Blastn_DB(),
                            exe.get_Converter(),
                            files.get_blastn_dir(),
                            Blastn_Parameter(PARAM_FILE).outfmt,
                            parse_parameter(Blastn_Parameter(PARAM_FILE)),
                            exe.get_MetaCV(),
                            exe.get_MetaCV_DB(),
                            files.get_metacv_dir(),
                            MetaCV_Parameter(PARAM_FILE).get_seq(),
                            MetaCV_Parameter(PARAM_FILE).get_mode(),
                            MetaCV_Parameter(PARAM_FILE).get_orf(),
                            MetaCV_Parameter(PARAM_FILE).get_total_reads(),
                            MetaCV_Parameter(PARAM_FILE).get_min_qual(),
                            MetaCV_Parameter(PARAM_FILE).get_taxon(),
                            MetaCV_Parameter(PARAM_FILE).get_name())

          # run the annotation functions
          results = anno.manage_annotation()
          settings.set_step_number(results[0])
          files.set_blastn_output(absolute_path(results[1]))
          files.set_metacv_output(absolute_path(results[2]))
      
      if skip in 'analysis' and skip:
          skip_msg(skip)
      else:
          # init the analysis module
          analysis = Analysis(settings.get_threads(),
                              settings.get_step_number(),
                              settings.get_verbose(),
                              settings.get_actual_time(),
                              files.get_logdir(),
                              settings.get_annotation(),
                              files.get_output(),
                              files.get_parsed_db_dir(),
                              files.get_annotated_db_dir(),
                              files.get_subseted_db_dir(),
                              files.get_krona_report_dir(),
                              files.get_blastn_output(),
                              files.get_metacv_output(),
                              exe.get_Parser(), 
                              parse_parameter(blastParser_Parameter(PARAM_FILE)),
                              blastParser_Parameter(PARAM_FILE).get_name(),
                              exe.get_Annotate(),
                              parse_parameter(Rannotate_Parameter(PARAM_FILE)),
                              Rannotate_Parameter(PARAM_FILE).get_name(),
                              Rannotate_Parameter(PARAM_FILE).get_taxon_db(),
                              exe.get_Subset(),
                              subsetDB_Parameter(PARAM_FILE).get_bitscore(),
                              subsetDB_Parameter(PARAM_FILE).get_classifier(),
                              subsetDB_Parameter(PARAM_FILE).get_rank(),
                              subsetDB_Parameter(PARAM_FILE).get_taxon_db(),
                              exe.get_Krona_Blast(),
                              parse_parameter(Krona_Parameter(PARAM_FILE)),
                              Krona_Parameter(PARAM_FILE).get_name(),
                              settings.get_krona(),
                              exe.get_Perl_lib())
          # run the analysis function
          results = analysis.manage_analysis()
          files.set_parser_output(absolute_path(results[0]))
          files.set_annotated_output(absolute_path(results[1]))    
        
    except KeyboardInterrupt:

     sys.stdout.write('\nERROR 1 : Operation cancelled by User!\n')
     sys.exit(1)

    # print ending message
    print_verbose('\nPIPELINE COMPLETE!\n\n')
    print_running_time(settings.get_actual_time())