Beispiel #1
0
def solid(args):
   '''Create solid assembly calls'''
   
   import mlst_modules
   paths = mlst_modules.setSystem()
   
   cmd = '%sassemble.pl' % paths['solid_home']
   
   if args.se:
      arg = ' %s %i -numcores %i' % (' '.join(args.se), args.rf, args.n)
   elif args.pe:
      arg = ' %s %s %i -f5 %s -f5qv %s -ins_length %i -ins_length_sd %i -numcores %i ' % (args.pe[0], args.pe[1], args.rf, args.pe[2], args.pe[3], args.ins_length, args.ins_length_sd, args.n)
   elif args.mp:
      arg = ' %s %s %i -r3 %s -r3qv %s -ins_length %i -ins_length_sd %i -numcores %i ' % (args.mp[0], args.mp[1], args.rf, args.mp[2], args.mp[3], args.ins_length, args.ins_length_sd, args.n)
   else:
      raise ValueError('Input must be given by --se, --pe or --mp')
   
   # add extra commands
   if args.add_solid: arg = arg + ' ' + args.add_solid
   cmds = [cmd+arg]
   
   ## write semaphore
   if args.sfile and args.sfile != 'None': cmds.append('echo "done" > %s' % args.sfile)   
   
   # write in bash script
   fh = open('solid_assembly.sh', 'w')
   fh.write('#!/bin/sh\n\n')
   for cmd in cmds:
      fh.write(cmd+'\n')
   fh.close()
   
   # return command (NB. add env. variable to run)
   return ['sh solid_assembly.sh']
Beispiel #2
0
def newbler_stats(args):
   '''Create newbler stats calls'''
   
   import mlst_modules
   paths = mlst_modules.setSystem()
   
   cmds = []
   assembly_path = '%s/assembly/' % args.outpath
   cmds.append('''perl -ne 'if ($_ =~ m/^>.+length=(\d+)/) { print $1, "\\n"}' %s > 454AllContigs.lengths ''' % (assembly_path + '454AllContigs.fna'))
   cmds.append('''perl -ne 'if ($_ =~ m/^>.+length=(\d+)/) { print $1, "\\n"}' %s > 454LargeContigs.lengths ''' % (assembly_path + '454LargeContigs.fna'))
   if args.pe:
      cmds.append('''perl -ne 'if ($_ =~ m/^>.+length=(\d+)/) { print $1, "\\n"}' %s > 454Scaffolds.lengths ''' % (assembly_path + '454Scaffolds.fna'))
      cmds.append('%sR --vanilla 454AllContigs.lengths 454LargeContigs.lengths 454Scaffolds.lengths assembly.stats.txt < %smlst_denovo_newbler_stats.R ' % (paths['R_home'], paths['mlst_home']))
   else:
      cmds.append('%sR --vanilla 454AllContigs.lengths 454LargeContigs.lengths NA assembly.stats.txt < %smlst_denovo_newbler_stats.R ' % (paths['R_home'], paths['mlst_home']))
   
   ## write semaphore
   if args.sfile and args.sfile != 'None': cmds.append('echo "done" > %s' % args.sfile)   
   
   # write in bash script
   fh = open('newbler_stats.sh', 'w')
   fh.write('#!/bin/sh\n\n')
   for cmd in cmds:
      fh.write(cmd+'\n')
   fh.close()
   
   # return command (NB. add env. variable to run)
   return ['sh newbler_stats.sh']
Beispiel #3
0
 def submit_xmsub(self, depends, logger):
    '''Submits jobs using xmsub'''
    
    import re
    import subprocess
    import time
    import os
    import mlst_modules
    
    home = os.getcwd()
    paths = mlst_modules.setSystem()
    
    ids = []
    for i in range(len(self.calls)):
       call = self.calls[i]
       stdout = '%s/log/%s%i.o' % (home, self.runname, i)
       stderr = '%s/log/%s%i.e' % (home, self.runname, i)
       
       # catch stdouts if call includes 'program infile > outfile', needs to be directed as -O instead of >
       pattern = re.compile(r'(^.+)>\s(.+)$')
       match = pattern.search(call)
       if match:
          call = match.group(1)
          stdout = '%s/%s' % (home, match.group(2))
       
       # create xmsub commands
       cmd = paths['mlst_home'] + 'xmsub'
       
       # toggle if job should be on hold or env variable should be added
       if self.hold: cmd = '%s -h ' % cmd
       if self.env: cmd = cmd + ' -v %s' % self.env
       
       if not self.depend:
          xmsub = cmd+' -d %s -l %s,partition=%s -O %s -E %s -r y -q %s -N %s -t %s' % (home, self.cpu, self.partition, stdout, stderr, self.queue, self.runname, call)
       else:
          xmsub = cmd+' -d %s -l %s,depend=%s,partition=%s -O %s -E %s -r y -q %s -N %s -t %s' % (home, self.cpu, depends[i], self.partition, stdout, stderr, self.queue, self.runname, call)
       
       time.sleep(1)
       if logger: logger.info(xmsub)
       
       # submit on different host if that is given
       if self.host:
          try:
             (id, stderr) = self.ssh_submit(self.host, xmsub)
          except:
             print stderr
             print 'Job error, waiting 1m'
             time.sleep(60)
             (id, stderr) = self.ssh_submit(self.host, xmsub)
          ids.append(id)
       else:
          try:
             id = subprocess.check_output(xmsub, shell=True)
          except:
             print 'Job error, waiting 1m'
             time.sleep(60)
             id = subprocess.check_output(xmsub, shell=True)
          ids.append(id.split('\n')[1])
    return ids
Beispiel #4
0
def create_jobs(prog, args, sfile, logger):
   '''Create an msub command from prog and args'''
   
   import subprocess
   import os
   import mlst_modules
   
   paths = mlst_modules.setSystem()
   
   # create commands
   msub = '%sxmsub -d %s -l nodes=1:ppn=1,mem=256mb,walltime=172800,partition=%s -q %s -r y -N run_%s -O run_%s.out -E run_%s.err -t' % (paths['mlst_home'], os.getcwd(), args.partition, args.q, args.assembler, args.assembler, args.assembler)
   cmd = [msub, prog]
   
   # create parameters for assembler
   for key, value in vars(args).items():
      # special cases
      if key == 'assembler':
         continue
      if value == None:
         continue
      if key == 'wait':
         cmd.append('--sfile %s' % sfile)
         continue
      if type(value) == bool:
         if value == True: cmd.append('--%s' % key)
         continue
      if key == 'sample':
         cmd.append('--sample None')
         continue
      if key == 'add_solid':
         cmd.append('--add_solid "%s"' % value)
         continue
      if key == 'add_velveth':
         cmd.append('--add_velveth "%s"' % value)
         continue
      if key == 'add_velvetg':
         cmd.append('--add_velvetg "%s"' % value)
         continue
      
      # key-value paramters
      cmd.append('--%s' %key)
      if type(value) == list:
         cmd.append(' '.join(value))
      elif type(value) == str or type(value) == int or type(value) == float:
         cmd.append('%s' %value)
      else:
         raise ValueError('%s, %s is a %s, should be either list, string or int' % (key, value, type(value)))
   
   # submit job
   cmd = ' '.join(cmd)
   logger.info(cmd)
   job = subprocess.check_output(cmd, shell=True)
   job = job.strip('\n')
   print 'Jobs are spawned by: %s' % job
Beispiel #5
0
def postprocess(args):
   '''Determine best assembly, remove other assemblies, clean up and write semaphore file (if required)'''
   
   import mlst_modules
   paths = mlst_modules.setSystem()
   
   calls = []
   if len(args.ksizes) > 1:
      ## parse_assemblies
      cmd = '%sR --vanilla ' % paths['R_home']
      
      # set argument
      if len(args.ksizes) == 1:
         arg = ' %s %s' % (args.outpath, args.ksizes[0])
      elif len(args.ksizes) >= 2:
         if len(args.ksizes) == 2:
            step = 2
         elif len(args.ksizes) == 3:
            step = args.ksizes[2]
         
         arg_list = []
         for k in range(int(args.ksizes[0]), int(args.ksizes[1]), int(step)):
            out = '%s_%s/stats.txt %s' % (args.outpath, k, k)
            arg_list.append(out)
         arg = ' '.join(arg_list)
      
      call = cmd + arg + ' < %smlst_denovo_velvet_parse.R' % (paths['mlst_home'])
      calls.append(call)
      
      ## accept assembly
      call = '%smlst_denovo_velvet_accept.py %s' % (paths['mlst_home'], args.outpath)
      calls.append(call)
      
   ## clean
   call = '%smlst_denovo_velvet_clean.py' % (paths['mlst_home'])
   calls.append(call)
   
   ## write semaphore
   if args.sfile and args.sfile != 'None': calls.append('echo "done" > %s' % args.sfile)
      
   ## write in bash script
   fh = open('postprocess.sh', 'w')
   fh.write('#!/bin/sh\n\n')
   for call in calls:
      fh.write(call+'\n')
   fh.close()
   
   return ['sh postprocess.sh']
Beispiel #6
0
 def wait(self):
    '''Wait for files to be created'''
    
    from time import sleep
    import string
    import random
    import os
    import mlst_modules
    import subprocess
    
    paths = mlst_modules.setSystem()
    
    # add directory and set semaphore filename
    if not os.path.exists('semaphores/'):
       os.makedirs('semaphores/')
    
    rand = ''.join(random.choice(string.ascii_uppercase + string.digits) for x in range(10))
    semaphore_file = 'semaphores/' + self.file_prefix + '.' + rand
    semaphore_file_err = 'log/' + self.file_prefix + '.' + rand + '.err'
    
    # create job 
    depends = ':'.join(self.semaphore_ids)
    xmsub = '%sxmsub -d %s -l ncpus=1,mem=10mb,walltime=180,depend=%s,partition=%s -O %s -q %s -N semaphores -E %s -r y -t echo done' % (paths['mlst_home'], self.home, depends, partition, semaphore_file, self.queue, semaphore_file_err)
    
    # submit job
    if self.host:
       dummy_id, stderr = self.ssh_submit(self.host, xmsub)
       if stderr:
          print stderr
    else:
       dummy_id = subprocess.check_output(xmsub, shell=True)
    
    # check for file to appear
    cnt = self.max_time
    while cnt > 0:
       if os.path.isfile(semaphore_file):
          break
       cnt -= self.check_interval
       sleep(self.check_interval)
    if cnt <= 0:
       raise SystemExit('%s did not finish in %is' % ())
Beispiel #7
0
def start_assembly(args, logger):
   '''start newbler assembly'''
   
   import mlst_modules
   from mlst_classes import Moab
   from mlst_classes import Semaphore   
   import os
   
   # set queueing
   paths = mlst_modules.setSystem()
   home = os.getcwd()
   if args.partition == 'uv':
      cpuV = 'ncpus=%i,mem=%s,walltime=172800' % (args.n, args.m)
      cpuA = 'ncpus=1,mem=512mb,walltime=172800'
      cpuC = 'ncpus=1,mem=2gb,walltime=172800'
      cpuE = 'ncpus=1,mem=5gb,walltime=172800'
      cpuF = 'ncpus=2,mem=2gb,walltime=172800'
      cpuB = 'ncpus=16,mem=10gb,walltime=172800'      
   else:
      cpuV = 'nodes=1:ppn=%i,mem=%s,walltime=172800' % (args.n, args.m)
      cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800'
      cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800'
      cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800'
      cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=172800'
      cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800'
   
   newbler_calls = newbler(args)
   newblerstats_calls = newbler_stats(args)
   
   # set environment variable (add newbler binaries to bin):
   env_var = 'PATH=/panvol1/simon/bin/454/bin/'
   
   # submit and release jobs
   print "Submitting jobs"
   newbler_moab = Moab(newbler_calls, logfile=logger, runname='run_mlst_newbler', queue=args.q, cpu=cpuV, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk')
   newblerstats_moab = Moab(newblerstats_calls, logfile=logger, runname='run_mlst_newblerstats', queue=args.q, cpu=cpuA, depend=True, depend_type='one2one', depend_val=[1], depend_ids=newbler_moab.ids, partition=args.partition, host='cge-s2.cbs.dtu.dk')
   
   # release jobs
   newbler_moab.release('cge-s2.cbs.dtu.dk')
   newblerstats_moab.release('cge-s2.cbs.dtu.dk')
Beispiel #8
0
def start_assembly(args, logger):
   '''Start assembly of solid reads'''
   
   import mlst_modules
   from mlst_classes import Moab
   from mlst_classes import Semaphore   
   import os
   
   # set queueing
   paths = mlst_modules.setSystem()
   home = os.getcwd()
   if args.partition == 'uv':
      cpuV = 'ncpus=%i,mem=%s,walltime=172800' % (args.n, args.m)
      cpuA = 'ncpus=1,mem=512mb,walltime=172800'
      cpuC = 'ncpus=1,mem=2gb,walltime=172800'
      cpuE = 'ncpus=1,mem=5gb,walltime=172800'
      cpuF = 'ncpus=2,mem=2gb,walltime=172800'
      cpuB = 'ncpus=16,mem=10gb,walltime=172800'      
   else:
      cpuV = 'nodes=1:ppn=%i,mem=%s,walltime=172800' % (args.n, args.m)
      cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800'
      cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800'
      cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800'
      cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=172800'
      cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800'
   
   solid_calls = solid(args)
   
   # set environment variable (add solid binaries to bin):
   env_var = 'denovo2=%s' % paths['solid_home']
   
   # submit and release jobs
   print "Submitting jobs"
   solid_moab = Moab(solid_calls, logfile=logger, runname='run_mlst_solid', queue=args.q, cpu=cpuV, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk')
   
   # release jobs
   solid_moab.release(host='cge-s2.cbs.dtu.dk')
Beispiel #9
0
def newbler(args):
   '''Creating newbler calls'''

   def convert_fastq(args, paths):
      '''If input is fastq convert to fasta+qual'''
      
      cmds = []
      se = []
      pe = []
      # identify file inputs
      if args.se:
         se_ftypes = map(mlst_modules.set_filetype, args.se)
         for i,f in enumerate(args.se):
            if se_ftypes[i] == 'fastq':
               cmds.append('%smlst_fastq2fastaqual.py --i %s --p %s' % (paths['mlst_home'], f, os.path.split(f)[1]))
               se.append(os.path.split(f)[1]+'.fasta')
            elif se_ftypes[i] == 'fasta':
               if f.endswith('.gz'):
                  fnew = os.path.splitext(os.path.split(f)[1])[0]
                  cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, f, fnew))
                  se.append(fnew)
                  
                  # look for qual file (dont add to path because newbler will pick it up)
                  possible_qual = os.path.splitext(os.path.splitext(f)[0])[0] + '.qual.gz'
                  if os.path.exists(possible_qual):
                     qnew = os.path.split(os.path.splitext(os.path.splitext(f)[0])[0] + '.qual')[1]
                     cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, possible_qual, qnew))
               else:
                  se.append(f)
            else:
               se.append(f)
      
      if args.pe:   
         pe_ftypes = map(mlst_modules.set_filetype, args.pe)
         for i,f in enumerate(args.pe):
            if pe_ftypes[i] == 'fastq':
               cmds.append('%smlst_fastq2fastaqual.py --i %s --p %s' % (paths['mlst_home'], f, os.path.split(f)[1]))
               pe.append(os.path.split(f)[1]+'.fasta')
            elif pe_ftypes[i] == 'fasta':
               if f.endswith('.gz'):
                  fnew = os.path.splitext(os.path.split(f)[1])[0]
                  cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, f, fnew))
                  fnew = os.path.splitext(f)[0]
                  pe.append(fnew)
                  
                  # look for qual file (dont add to path because newbler will pick it up)
                  possible_qual = os.path.splitext(os.path.splitext(f)[0])[0] + '.qual.gz'
                  if os.path.exists(possible_qual):
                     qnew = os.path.split(os.path.splitext(os.path.splitext(f)[0])[0] + '.qual')[1]
                     cmds.append('''%spigz -dc -p %s %s > %s''' % (paths['pigz_home'], args.n, possible_qual, qnew))
               else:
                  se.append(f)

            else:
               pe.append(f)
      
      return cmds, se, pe
   
   
   import mlst_modules
   paths = mlst_modules.setSystem()
   cmds = []
   cf = convert_fastq(args, paths)
   cmds.extend(cf[0])
   args.se = cf[1]
   args.pe = cf[2]
   
   cmds.append('newAssembly %s' % args.outpath)
   if args.se:
      for f in args.se:
         cmds.append('addRun -lib shotgun %s %s' % (args.outpath, f))
   if args.pe:
      for i,f in enumerate(args.pe):
         cmds.append('addRun -p -lib PE%i %s %s' % (i, args.outpath, f))
   cmds.append('runProject -cpu %s %s' % (args.n, args.outpath))
   
   # write in bash script
   fh = open('newbler.sh', 'w')
   fh.write('#!/bin/sh\n\n')
   for cmd in cmds:
      fh.write(cmd+'\n')
   fh.close()
   
   # return command (NB. add env. variable to run)
   return ['sh newbler.sh']
Beispiel #10
0
def start_assembly(args, logger):
   '''Start assembly'''
   
   import mlst_modules
   from mlst_classes import Moab
   from mlst_classes import Semaphore   
   import os
   
   # set queueing
   paths = mlst_modules.setSystem()
   home = os.getcwd()
   if args.partition == 'uv':
      cpuV = 'procs=%i,mem=%s,walltime=172800,flags=sharedmem' % (args.n, args.m)
      cpuA = 'procs=1,mem=512mb,walltime=172800,flags=sharedmem'
      cpuC = 'procs=1,mem=2gb,walltime=172800,flags=sharedmem'
      cpuE = 'procs=1,mem=5gb,walltime=172800,flags=sharedmem'
      cpuF = 'procs=2,mem=%s,walltime=172800,flags=sharedmem' % args.m
      cpuB = 'procs=16,mem=10gb,walltime=172800,flags=sharedmem'      
   else:
      cpuV = 'nodes=1:ppn=%i,mem=%s,walltime=172800' % (args.n, args.m)
      cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=172800'
      cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=172800'
      cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=172800'
      cpuF = 'nodes=1:ppn=2,mem=%s,walltime=172800' % args.m
      cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=172800'
      
   # set kmersizes (if auto)
   if args.ksizes == ['auto']:
      args.ksizes = set_kmersizes(args)
   
   # trimming calls
   if args.trim:
      illuminatrim_calls = illumina_trim(args, int(args.ksizes[0]), 15, 20, 15, False)
      if not os.path.exists('trimmed'):
         os.makedirs('trimmed')
      
   # velvet calls
   velvet_calls = create_velvet_calls(args)
      
   # velvet parse calls
   postprocess_calls = postprocess(args)
   
   # set environment variable:
   env_var = 'OMP_NUM_THREADS=%i' % int(args.n - 1)
   
   # submit and release jobs
   # NB: mlst_denovo_velvet is run from a compute node, it will then ssh to "host" and submit the jobs from there (cge-s2)
   print "Submitting jobs"
   
   # if trimming is needed
   if args.trim:
      illuminatrim_moab = Moab(illuminatrim_calls, logfile=logger, runname='run_mlst_trim', queue=args.q, cpu=cpuF, partition=args.partition, host='cge-s2.cbs.dtu.dk')
      velvet_moab = Moab(velvet_calls, logfile=logger, runname='run_mlst_velvet', queue=args.q, cpu=cpuV, depend=True, depend_type='all', depend_val=[1], depend_ids=illuminatrim_moab.ids, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk')
   # if no trimming
   else:
      velvet_moab = Moab(velvet_calls, logfile=logger, runname='run_mlst_velvet', queue=args.q, cpu=cpuV, env=env_var, partition=args.partition, host='cge-s2.cbs.dtu.dk')
   
   # submit job for postprocessing
   postprocess_moab = Moab(postprocess_calls, logfile=logger, runname='run_mlst_postprocess', queue=args.q, cpu=cpuA, depend=True, depend_type='conc', depend_val=[len(velvet_calls)], depend_ids=velvet_moab.ids, partition=args.partition, host='cge-s2.cbs.dtu.dk')
   
   # release jobs
   print "Releasing jobs"
   if args.trim and len(illuminatrim_calls) > 0: illuminatrim_moab.release(host='cge-s2.cbs.dtu.dk')
   velvet_moab.release('cge-s2.cbs.dtu.dk')
   postprocess_moab.release(host='cge-s2.cbs.dtu.dk')
Beispiel #11
0
def create_velvet_calls(args):
   '''Create velvet calls'''
   
   import mlst_modules
   paths = mlst_modules.setSystem()
   
   # VELVETH CALLS
   # create calls, outpath, ksizes, format, readtypes, reads
   cmd = '%svelveth' % paths['velvet_home']
   velveth_calls = []
   if len(args.ksizes) == 1:
      arg = ' %s %s -create_binary ' % (args.outpath, args.ksizes[0])
      if args.short: arg = arg + ' -short -%s %s' % (args.short[0], ' '.join(args.short[1:]))
      if args.short2: arg = arg + ' -short2 -%s %s' % (args.short2[0], ' '.join(args.short2[1:]))
      if args.shortPaired:
         if len(args.shortPaired) == 2:
            arg = arg + ' -shortPaired -%s %s' % (args.shortPaired[0], args.shortPaired[1])
         elif len(args.shortPaired) == 3:
            arg = arg + ' -shortPaired -separate -%s %s %s' % (args.shortPaired[0], args.shortPaired[1], args.shortPaired[2])
      if args.shortPaired2:
         if len(args.shortPaired2) == 2:
            arg = arg + ' -shortPaired2 -%s %s' % (args.shortPaired2[0], args.shortPaired2[1])
         elif len(args.shortPaired) == 3:
            arg = arg + ' -shortPaired2 -separate -%s %s %s' % (args.shortPaired2[0], args.shortPaired2[1], args.shortPaired2[2])
      if args.long: arg = arg + ' -long -%s %s' % (args.long[0], ' '.join(args.long[1:]))
      if args.longPaired:
         if len(args.longPaired) == 2:
            arg = arg + ' -longPaired -%s %s' % (args.longPaired[0], args.longPaired[1])
         elif len(args.longPaired) == 3:
            arg = arg + ' -longPaired -separate -%s %s %s' % (args.longPaired[0], args.longPaired[1], args.longPaired[2])
      if args.add_velveth: arg = arg + ' %s' % args.add_velveth
      call = cmd + arg
      velveth_calls.append(call)
   
   elif len(args.ksizes) >= 2 and len(args.ksizes) <= 3:
      if len(args.ksizes) == 2:
         step = 2
      elif len(args.ksizes) == 3:
         step = args.ksizes[2]
      
      # create calls, outpath, ksizes, format, readtypes, reads
      for k in range(int(args.ksizes[0]), int(args.ksizes[1]), int(step)):
         arg = ' %s_%s %s -create_binary ' % (args.outpath, k, k)
         if args.short: arg = arg + ' -short -%s %s' % (args.short[0], ' '.join(args.short[1:]))
         if args.short2: arg = arg + ' -short2 -%s %s' % (args.short2[0], ' '.join(args.short2[1:]))
         if args.shortPaired:
            if len(args.shortPaired) == 2:
               arg = arg + ' -shortPaired -%s %s' % (args.shortPaired[0], args.shortPaired[1])
            elif len(args.shortPaired) == 3:
               arg = arg + ' -shortPaired -separate -%s %s %s' % (args.shortPaired[0], args.shortPaired[1], args.shortPaired[2])
         if args.shortPaired2:
            if len(args.shortPaired2) == 2:
               arg = arg + ' -shortPaired2 -%s %s' % (args.shortPaired2[0], args.shortPaired2[1])
            elif len(args.shortPaired) == 3:
               arg = arg + ' -shortPaired2 -separate -%s %s %s' % (args.shortPaired2[0], args.shortPaired2[1], args.shortPaired2[2])
         if args.long: arg = arg + ' -long -%s %s' % (args.long[0], ' '.join(args.long[1:]))
         if args.longPaired:
            if len(args.longPaired) == 2:
               arg = arg + ' -longPaired -%s %s' % (args.longPaired[0], args.longPaired[1])
            elif len(args.longPaired) == 3:
               arg = arg + ' -longPaired -separate -%s %s %s' % (args.longPaired[0], args.longPaired[1], args.longPaired[2])
         if args.add_velveth: arg = arg + ' %s' % args.add_velveth
         call = cmd + arg
         velveth_calls.append(call)
   else:
      raise ValueError('ksizes must be one value giving ksize, two values giving lower and upper limit (step will be 2) or three values giving lower limit, upper limit and step')  
   
   # VELVETG CALLS
   # create cmd
   cmd = '%svelvetg' % paths['velvet_home']
   cmds = []
   if len(args.ksizes) == 1:
      cmd = '%svelvetg %s' % (paths['velvet_home'], args.outpath)
      cmds.append(cmd)
   elif len(args.ksizes) >= 2 and len(args.ksizes) <= 3:
      if len(args.ksizes) == 2:
         step = 2
      elif len(args.ksizes) == 3:
         step = args.ksizes[2]
      
      for k in range(int(args.ksizes[0]), int(args.ksizes[1]), int(step)):
         cmd = '%svelvetg %s_%s' % (paths['velvet_home'], args.outpath, k)
         cmds.append(cmd)
   
   # create arg: cov_cutoff, exp_cov, ins_length, add_velvetg
   velvetg_calls = []
   # add other parameters
   for i in range(len(cmds)):
      arg = ' -min_contig_lgth %i' % args.min_contig_lgth
      if args.cov_cutoff: arg = arg + ' -cov_cutoff %f' % args.cov_cutoff
      if args.exp_cov != "None": arg = arg + ' -exp_cov %s' % args.exp_cov
      if args.ins_length: arg = arg + ' -ins_length %i' % args.ins_length
      if args.add_velvetg: arg = arg + ' %s' % args.add_velvetg
      velvetg_calls.append(cmds[i]+arg)
   
   # COMBINE IN SH-FILES #
   sh_calls = []
   for i in range(len(velveth_calls)):
      fh = open('velvet%i.sh' % i, 'w')
      fh.write('#!/bin/sh\n\n')
      fh.write(velveth_calls[i]+'\n')
      fh.write(velvetg_calls[i]+'\n')
      fh.close()
      sh_calls.append('sh velvet%i.sh' %i)
   return sh_calls
Beispiel #12
0
def illumina_trim(args, min_length, min_baseq, min_avgq, min_adaptor_match, keep_n):
   '''Create single end trim calls'''
   
   import os
   import mlst_modules
   paths = mlst_modules.setSystem()
   
   cmd = '%smlst_illumina_trim_h.py' % (paths['mlst_home'])
   calls = []
   if args.short:
      if args.short[0] == 'fastq' or args.short[0] == 'fastq.gz':
         outfiles_short = []   
         for i,f in enumerate(args.short):
            if i == 0: continue
            outfile_short = 'trimmed/' + os.path.split(f)[1] + '.trim.fq'
            outfiles_short.append(outfile_short)
            arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s ' % (f, min_length,
               min_baseq, min_avgq, min_adaptor_match, outfile_short)
            if keep_n: arg = arg + ' --keep_n'
            calls.append(cmd+arg)
         args.short[1:] = outfiles_short
   
   if args.short2:
      if args.short2[0] == 'fastq' or args.short2[0] == 'fastq.gz':
         outfiles_short2 = []   
         for i,f in enumerate(args.short2):
            if i == 0: continue
            outfile_short2 = 'trimmed/' + os.path.split(f)[1] + '.trim.fq'
            outfiles_short2.append(outfile_short2)
            arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i  --min_adaptor_match %i --o %s ' % (f, min_length,
               min_baseq, min_avgq, min_adaptor_match, outfile_short2)
            if keep_n: arg = arg + ' --keep_n'
            calls.append(cmd+arg)
         args.short2[1:] = outfiles_short2
   
   if args.shortPaired and args.shortPaired[0].find('fastq') > -1:
      outfiles_shortPaired = []
      if len(args.shortPaired) == 3:
         outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired[1])[1] + '.trim.fq'
         outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired[2])[1] + '.trim.fq'
         outfiles_shortPaired.append(outfile_pe1)
         outfiles_shortPaired.append(outfile_pe2)
         arg = ' --i %s %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired[1], args.shortPaired[2], min_length, min_baseq, min_avgq,  min_adaptor_match, outfile_pe1, outfile_pe2)
      elif len(args.shortPaired) == 2:
         outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired[1])[1] + '_1.trim.fq'
         outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired[1])[1] + '_2.trim.fq'
         outfiles_shortPaired.append(outfile_pe1)
         outfiles_shortPaired.append(outfile_pe2)
         arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired[1], min_length, min_baseq, min_avgq,  min_adaptor_match, outfile_pe1, outfile_pe2)
      else:
         raise ValueError('Length of input to shortPaired is not correct')
      
      if keep_n: arg = arg + ' --keep_n'
      calls.append(cmd+arg)
      args.shortPaired[1:] = outfiles_shortPaired
   
   if args.shortPaired2 and args.shortPaired[0].find('fastq') > -1:
      outfiles_shortPaired2 = []
      if len(args.shortPaired2) == 3:
         outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired2[1])[1] + '.trim.fq'
         outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired2[2])[1] + '.trim.fq'
         outfiles_shortPaired2.append(outfile_pe1)
         outfiles_shortPaired2.append(outfile_pe2)
         arg = ' --i %s %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired2[1], args.shortPaired2[2], min_length, min_baseq, min_avgq,  min_adaptor_match, outfile_pe1, outfile_pe2)
      elif len(args.shortPaired2) == 2:
         outfile_pe1 = 'trimmed/' + os.path.split(args.shortPaired2[1])[1] + '_1.trim.fq'
         outfile_pe2 = 'trimmed/' + os.path.split(args.shortPaired2[1])[1] + '_2.trim.fq'
         outfiles_shortPaired2.append(outfile_pe1)
         outfiles_shortPaired2.append(outfile_pe2)
         arg = ' --i %s --min_length %i --min_baseq %i --min_avgq %i --min_adaptor_match %i --o %s %s' % (args.shortPaired2[1], min_length, min_baseq, min_avgq,  min_adaptor_match, outfile_pe1, outfile_pe2)
      else:
         raise ValueError('Length of input to shortPaired2 is not correct')
      if keep_n: arg = arg + ' --keep_n'
      calls.append(cmd+arg)
      args.shortPaired2[1:] = outfiles_shortPaired2
   
   if len(calls) > 0:
      if not os.path.exists('trimmed'):
         os.makedirs('trimmed')
   return calls
Beispiel #13
0
parser_solid.add_argument('--ins_length', help='estimate of mate/paired end insert length eg. (1200/170)', type=int, required=True)
parser_solid.add_argument('--ins_length_sd', help='estimate of mate/paired end insert length eg. (300/30)', type=int, required=True)
parser_solid.add_argument('--add_solid', help='additional parameters to solid assembler', default=None)

args = parser.parse_args()
#args = parser.parse_args('velvet --shortPaired Kleb-10-213361_2_1_sequence.txt Kleb-10-213361_2_2_sequence.txt --ksizes 41 55 4 --trim'.split())
#args = parser.parse_args('velvet --shortPaired Kleb-10-213361_2.interleaved.fastq --trim --sample Kleb_auto'.split())
#args = parser.parse_args('velvet --short 110601_I238_FCB067HABXX_L3_ESCqslRAADIAAPEI-2_1.fq --ksizes 45 75 --sample E_coli_TY2482_illumina --trim'.split())
#args = parser.parse_args('velvet --shortPaired test_kleb_1.fq test_kleb_2.fq --ksizes 41 55 4 --sample test_kleb --cov_cutoff 8'.split())
#args = parser.parse_args('newbler --se life_unimuenster_sff/*.sff --sample test_newbler --wait'.split())
#args = parser.parse_args('solid --mp ecoli_600x_F3.csfasta ecoli_600x_F3.qual ecoli_600x_R3.csfasta ecoli_600x_R3.qual --rf 5000000 --ins_length 1300 --ins_length_sd 300 --m 7gb --sample solid_test --wait'.split())
#args = parser.parse_args('velvet --shortPaired test_kleb_1.fq test_kleb_2.fq --ksizes 41 55 4'.split())

# set pythonpath
os.environ['PYTHONPATH'] = '/panvol1/simon/lib/python/:/home/panfs/cbs/projects/cge/servers/MLST/assembly/'
paths = mlst_modules.setSystem()

# If working dir is given, create and move to working directory else run where program is invoked
if args.sample:
   if not os.path.exists(args.sample):
      os.makedirs(args.sample)
   #os.chmod(args.sample, 0777)
   os.chdir(args.sample)
else:
   pass

# create log dir
if not os.path.exists('log'):
   os.makedirs('log')

# set logging