コード例 #1
0
def start_ab(args, logger):
    '''Perform alignment and bam processing'''

    import os
    import subprocess

    final_bam = args.outbam

    # initialize library file from given arguments
    library = genobox_modules.initialize_library(args.libfile, args.se,
                                                 args.pe1, args.pe2,
                                                 args.sample, args.mapq,
                                                 args.libs, args.pl)

    # start run
    if args.sample:
        print "--------------------------------------"
        print "Processing sample: %s" % args.sample
    print "--------------------------------------"

    print "Starting alignment"
    (bamfiles, library) = start_alignment(args, logger)
    print "Starting bam processing"
    final_bam = start_bamprocess(library,
                                 genobox_modules.unique(bamfiles.values()),
                                 args.mapq, args.libs, args.tmpdir, args.queue,
                                 final_bam, args.realignment, args.known,
                                 args.fa, args.sample, args.partition, logger)

    # remove queuing system outfiles
    genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])

    print "Done"
    print "--------------------------------------"
コード例 #2
0
ファイル: genobox_ab.py プロジェクト: srcbs/GenoBox
def start_ab(args, logger):
   '''Perform alignment and bam processing'''
   
   import os
   import subprocess
   
   final_bam = args.outbam
   
   # initialize library file from given arguments
   library = genobox_modules.initialize_library(args.libfile, args.se, args.pe1, args.pe2, args.sample, args.mapq, args.libs, args.pl)
   
   # start run
   if args.sample:
      print "--------------------------------------"
      print "Processing sample: %s" % args.sample
   print "--------------------------------------"
      
   print "Starting alignment"
   (bamfiles, library) = start_alignment(args, logger)
   print "Starting bam processing"
   final_bam = start_bamprocess(library, genobox_modules.unique(bamfiles.values()), args.mapq, args.libs, args.tmpdir, args.queue, final_bam, args.realignment, args.known, args.fa, args.sample, args.partition, logger)
   
   # remove queuing system outfiles
   genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])
   
   print "Done"
   print "--------------------------------------"
コード例 #3
0
ファイル: genobox_alignment.py プロジェクト: xshang/GenoBox
def start_alignment(args, logger):
   '''Start alignment of fastq files using BWA'''
   
   import genobox_modules
   from genobox_classes import Semaphore, Library
   import subprocess
   import os
   import random
   import string
   
   paths = genobox_modules.setSystem()
   home = os.getcwd()
   semaphore_ids = []
   bamfiles = dict()
   
   if not os.path.exists('alignment'):
      os.makedirs('alignment')
   
   # initialize library file from given arguments (if args.mapq is defined then its called from abgv, else it is called from alignment)
   if hasattr(args, 'mapq'):
      library = genobox_modules.initialize_library(args.libfile, args.se, args.pe1, args.pe2, args.sample, args.mapq, args.libs, args.pl)
   else:
      library = genobox_modules.initialize_library(args.libfile, args.se, args.pe1, args.pe2, args.sample, [30], args.libs, args.pl)
   
   # check for fa
   check_fa(args.fa, args.bwa6)
   
   # check for if trimming was performed (abgv only) and set correct files
   #(se_files, pe1_files, pe2_files) = check_trim(args)
   
   # start single end alignments
   if args.se:
            
      # get platform info
      (PL, PL2data) = library.getPL('Data')      
      
      print "Submitting single end alignments"
      for key,value in PL2data.items():
         if key == 'ILLUMINA' or key == 'HELICOS':
            fqtypes_se = []
            # filter to only contain single end files
            toalign = []
            for v in value:
               if v in args.se: toalign.append(v)
            for fq in toalign: fqtypes_se.append(check_formats_fq(fq, args.gz, args.bwa6))
            # submit
            (se_align_ids, bamfiles_se) = bwa_se_align(toalign, args.fa, fqtypes_se, args.qtrim, args.N, 'alignment/', args.bwa6, library, args.n, args.queue, args.add_aln, args.partition, logger)
            semaphore_ids.extend(se_align_ids)
            bamfiles.update(bamfiles_se)
         elif key == 'PACBIO':
            toalign = []
            for v in value:
               if v in args.se: toalign.append(v)
            fqtypes_se = []
            for fq in toalign: fqtypes_se.append(check_formats_fq(fq, args.gz, args.bwa6))
            (se_align_ids, bamfiles_se) = bwasw_pacbio(toalign, args.fa, fqtypes_se, 'alignment/', args.bwa6, library, args.n, args.queue, args.partition, logger)
            semaphore_ids.extend(se_align_ids)
            bamfiles.update(bamfiles_se)
         elif key == 'IONTORRENT':
            toalign = []
            for v in value:
               if v in args.se: toalign.append(v)
            fqtypes_se = []
            for fq in toalign: fqtypes_se.append(check_formats_fq(fq, args.gz, args.bwa6))
            (se_align_ids, bamfiles_se) = bwasw_iontorrent(toalign, args.fa, fqtypes_se, 'alignment/', args.bwa6, library, args.n, args.queue, args.partition, logger)
            semaphore_ids.extend(se_align_ids)
            bamfiles.update(bamfiles_se)
   
   # start paired end alignments
   if args.pe1:
      if len(args.pe1) != len(args.pe2):
         raise ValueError('Same number of files must be given to --pe1 and --pe2')
      
      # set fqtypes
      fqtypes_pe1 = []
      fqtypes_pe2 = []
      for fq in args.pe1: fqtypes_pe1.append(check_formats_fq(fq, args.gz, args.bwa6))
      for fq in args.pe2: fqtypes_pe2.append(check_formats_fq(fq, args.gz, args.bwa6))
      
      print "Submitting paired end alignments"
      (pe_align_ids, bamfiles_pe) = bwa_pe_align(args.pe1, args.pe2, args.fa, fqtypes_pe1, fqtypes_pe2, args.qtrim, args.N, 'alignment/', args.bwa6, args.a, library, args.n, args.queue, args.add_aln, args.partition, logger)            
      semaphore_ids.extend(pe_align_ids)
      bamfiles.update(bamfiles_pe)
   
   # update library
   library.update_with_tag('Data', 'BAM', bamfiles, True)
   
   # wait for jobs to finish
   print "Waiting for jobs to finish ..." 
   
   s = Semaphore(semaphore_ids, home, 'bwa_alignment', args.queue, 60, 172800)
   s.wait()
   
   print "--------------------------------------"
   
   # return bamfiles   
   return (bamfiles, library)
コード例 #4
0
def start_abgv(args, logger):
    '''Start alignment, bam processing, genotyping, vcffiltering, dbsnp annotation, bcf2ref'''

    import os
    import subprocess

    # check genome file
    genobox_modules.check_genome(args.genome)

    final_bam = 'alignment/%s.flt.sort.rmdup.bam' % args.sample
    final_bcf = 'genotyping/%s.all.bcf' % args.sample

    # initialize library file from given arguments
    library = genobox_modules.initialize_library(args.libfile, args.se,
                                                 args.pe1, args.pe2,
                                                 args.sample, args.mapq,
                                                 args.libs, args.pl)

    # start run
    if args.sample:
        print "--------------------------------------"
        print "Processing sample: %s" % args.sample
    print "--------------------------------------"

    # toggle start trimming
    #if args.no_trim == False:
    #   print "Starting trimming"
    #   (se_files, pe1_files, pe2_files) = start_trim(args, logger)
    #   library.update(Trim=se_files+pe1_files+pe2_files)

    print "Starting alignment"
    (bamfiles, library) = start_alignment(args, logger)

    print "Starting bam processing"
    final_bam = start_bamprocess(library,
                                 genobox_modules.unique(bamfiles.values()),
                                 args.mapq, args.libs, args.tmpdir, args.queue,
                                 final_bam, args.realignment, args.known,
                                 args.fa, args.sample, args.partition, logger)

    print "Starting bam stats"
    start_bamstats(args, final_bam, args.partition, logger, wait=False)

    print "Starting genotyping"
    if args.caller == 'samtools':
        final_bcf = start_genotyping(final_bam, args.genome, args.fa,
                                     args.prior, args.pp, args.queue,
                                     final_bcf, args.sample, args.partition,
                                     logger)
        print "Starting vcffiltering"
        final_vcf = start_vcffilter(final_bcf, args.genome, args.caller,
                                    args.Q, args.ex, args.rmsk, args.ab,
                                    args.prune, args.ovar, args.queue,
                                    args.sample, args.partition, logger)
        print "Start dbsnp"
        final_dbsnp_vcf = start_dbsnp(final_vcf, args.ex, args.dbsnp,
                                      args.ovar, args.queue, args.partition,
                                      logger)
        print "Start bcf2ref"
        start_bcf2ref(final_bcf, args.genome, args.Q, args.ex, args.dbsnp,
                      args.rmsk, 'genotyping/indels_for_filtering.vcf',
                      args.oref, args.queue, args.sample, args.partition,
                      logger)
    elif args.caller == 'gatk':
        print "Start genotyping (gatk)"
        vcffiles = start_genotyping_gatk(final_bam, args.genome, args.fa,
                                         args.dbsnp, args.call_conf,
                                         args.args.call_emit, args.output_mode,
                                         args.queue, args.sample,
                                         args.partition, logger)
        print "Start vcffiltering (gatk)"
        final_vcfs = start_vcffilter_gatk(vcffiles, args.genome, args.fa,
                                          args.Q, args.rmsk, args.ab,
                                          args.prune, args.queue, args.sample,
                                          args.partition, args.logger)

    # remove queuing system outfiles
    genobox_modules.rm_files(['run_genobox_*', 'semaphores.*'])

    print "Done"
    print "--------------------------------------"
    print "Raw genotyping is written in genotyping/all.bcf"
    print "High confidence variants: %s" % args.ovar
    print "High confidence reference: %s" % args.oref
    print "--------------------------------------"
コード例 #5
0
ファイル: genobox_abgv.py プロジェクト: srcbs/GenoBox
def start_abgv(args, logger):
    """Start alignment, bam processing, genotyping, vcffiltering, dbsnp annotation, bcf2ref"""

    import os
    import subprocess

    # check genome file
    genobox_modules.check_genome(args.genome)

    final_bam = "alignment/%s.flt.sort.rmdup.bam" % args.sample
    final_bcf = "genotyping/%s.all.bcf" % args.sample

    # initialize library file from given arguments
    library = genobox_modules.initialize_library(
        args.libfile, args.se, args.pe1, args.pe2, args.sample, args.mapq, args.libs, args.pl
    )

    # start run
    if args.sample:
        print "--------------------------------------"
        print "Processing sample: %s" % args.sample
    print "--------------------------------------"

    # toggle start trimming
    # if args.no_trim == False:
    #   print "Starting trimming"
    #   (se_files, pe1_files, pe2_files) = start_trim(args, logger)
    #   library.update(Trim=se_files+pe1_files+pe2_files)

    print "Starting alignment"
    (bamfiles, library) = start_alignment(args, logger)

    print "Starting bam processing"
    final_bam = start_bamprocess(
        library,
        genobox_modules.unique(bamfiles.values()),
        args.mapq,
        args.libs,
        args.tmpdir,
        args.queue,
        final_bam,
        args.realignment,
        args.known,
        args.fa,
        args.sample,
        args.partition,
        logger,
    )

    print "Starting bam stats"
    start_bamstats(args, final_bam, args.partition, logger, wait=False)

    print "Starting genotyping"
    if args.caller == "samtools":
        final_bcf = start_genotyping(
            final_bam,
            args.genome,
            args.fa,
            args.prior,
            args.pp,
            args.queue,
            final_bcf,
            args.sample,
            args.partition,
            logger,
        )
        print "Starting vcffiltering"
        final_vcf = start_vcffilter(
            final_bcf,
            args.genome,
            args.caller,
            args.Q,
            args.ex,
            args.rmsk,
            args.ab,
            args.prune,
            args.ovar,
            args.queue,
            args.sample,
            args.partition,
            logger,
        )
        print "Start dbsnp"
        final_dbsnp_vcf = start_dbsnp(final_vcf, args.ex, args.dbsnp, args.ovar, args.queue, args.partition, logger)
        print "Start bcf2ref"
        start_bcf2ref(
            final_bcf,
            args.genome,
            args.Q,
            args.ex,
            args.dbsnp,
            args.rmsk,
            "genotyping/indels_for_filtering.vcf",
            args.oref,
            args.queue,
            args.sample,
            args.partition,
            logger,
        )
    elif args.caller == "gatk":
        print "Start genotyping (gatk)"
        vcffiles = start_genotyping_gatk(
            final_bam,
            args.genome,
            args.fa,
            args.dbsnp,
            args.call_conf,
            args.args.call_emit,
            args.output_mode,
            args.queue,
            args.sample,
            args.partition,
            logger,
        )
        print "Start vcffiltering (gatk)"
        final_vcfs = start_vcffilter_gatk(
            vcffiles,
            args.genome,
            args.fa,
            args.Q,
            args.rmsk,
            args.ab,
            args.prune,
            args.queue,
            args.sample,
            args.partition,
            args.logger,
        )

    # remove queuing system outfiles
    genobox_modules.rm_files(["run_genobox_*", "semaphores.*"])

    print "Done"
    print "--------------------------------------"
    print "Raw genotyping is written in genotyping/all.bcf"
    print "High confidence variants: %s" % args.ovar
    print "High confidence reference: %s" % args.oref
    print "--------------------------------------"
コード例 #6
0
def start_bamprocess(library_file, bams, mapq, libs, tmpdir, queue, final_bam,
                     realignment, known, fa, sample, partition, logger):
    '''Starts bam processing of input files'''

    import subprocess
    import genobox_modules
    from genobox_classes import Moab, Semaphore, Library
    import os

    # set queueing
    paths = genobox_modules.setSystem()
    home = os.getcwd()
    cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=345600'
    cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=345600'
    cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=345600'
    cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=345600'
    cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=345600'
    cpuG = 'nodes=1:ppn=1,mem=6gb,walltime=345600'
    cpuH = 'nodes=1:ppn=2,mem=7gb,walltime=345600'

    # create library instance
    if library_file and library_file != 'None':
        if isinstance(library_file, Library):
            library = library_file
        else:
            library = Library(library_file)
            library.read()
    else:
        library = genobox_modules.initialize_library(libfile=library_file,
                                                     sample=sample,
                                                     mapq=mapq,
                                                     libs=libs,
                                                     bams=bams)

    (bam2lib, lib2bam) = library.getBamLibs()

    ## CREATE CALLS ##

    # filter bam and sort
    (filter_sort_calls,
     filter_sort_files) = bam_filter_sort(lib2bam, bam2lib, 1500000000)

    # merge to libs
    (merge_lib_calls, librarys) = merge_bam(lib2bam.keys(),
                                            lib2bam.values(),
                                            add_suffix=True,
                                            final_suffix='.flt.sort.bam',
                                            tmpdir=tmpdir)

    # rmdup on libs
    (rmdup_calls, rmdup_files) = rmdup(librarys, tmpdir)

    # optional: realignment
    if realignment:
        (merge_final_call, sample_file) = merge_bam([final_bam], [rmdup_files],
                                                    add_suffix=False)
        (realign_calls, final_file) = realign_bam(final_bam, final_bam, fa,
                                                  known)
    else:
        # merge to final file
        (merge_final_call, final_file) = merge_bam([final_bam], [rmdup_files],
                                                   add_suffix=False)

    ## SUBMIT JOBS ##

    print "Submitting jobs"
    filtersort_moab = Moab(filter_sort_calls,
                           logfile=logger,
                           runname='run_genobox_filtersort',
                           queue=queue,
                           cpu=cpuH,
                           partition=partition)
    mergelib_moab = Moab(merge_lib_calls,
                         logfile=logger,
                         runname='run_genobox_lib_merge',
                         queue=queue,
                         cpu=cpuE,
                         depend=True,
                         depend_type='complex',
                         depend_val=map(len, lib2bam.values()),
                         depend_ids=filtersort_moab.ids,
                         partition=partition)
    rmdup_moab = Moab(
        rmdup_calls,
        logfile=logger,
        runname='run_genobox_rmdup',
        queue=queue,
        cpu=cpuG,
        depend=True,
        depend_type='one2one',
        depend_val=[1],
        depend_ids=mergelib_moab.ids,
        partition=partition
    )  # NB: If memory should be changed, also change java memory spec in rmdup function
    mergefinal_moab = Moab(merge_final_call,
                           logfile=logger,
                           runname='run_genobox_final_merge',
                           queue=queue,
                           cpu=cpuC,
                           depend=True,
                           depend_type='conc',
                           depend_val=[len(rmdup_moab.ids)],
                           depend_ids=rmdup_moab.ids,
                           partition=partition)
    if realignment:
        realign_moab = Moab(realign_calls,
                            logfile=logger,
                            runname='run_genobox_realignment',
                            queue=queue,
                            cpu=cpuE,
                            depend=True,
                            depend_type='one2one',
                            depend_val=[1],
                            depend_ids=mergefinal_moab.ids,
                            partition=partition)
    # realignment calls needs to be written together in a shell-file or dependent on each other #

    # release jobs #
    print "Releasing jobs"
    #filtersort_moab.release()
    #mergelib_moab.release()
    #rmdup_moab.release()
    #mergefinal_moab.release()
    #if realignment: realign_moab.release()

    # semaphore
    print "Waiting for jobs to finish ..."
    if realignment:
        s = Semaphore(realign_moab.ids, home, 'bam_processing', queue, 20,
                      345600)
    else:
        s = Semaphore(mergefinal_moab.ids, home, 'bam_processing', queue, 20,
                      345600)
    s.wait()
    print "--------------------------------------"

    # return final bamfile
    return final_bam
コード例 #7
0
ファイル: genobox_bamprocess.py プロジェクト: xshang/GenoBox
def start_bamprocess(library_file, bams, mapq, libs, tmpdir, queue, final_bam, realignment, known, fa, sample, partition, logger):
   '''Starts bam processing of input files'''
   
   import subprocess
   import genobox_modules
   from genobox_classes import Moab, Semaphore, Library
   import os
   
   # set queueing
   paths = genobox_modules.setSystem()
   home = os.getcwd()
   cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=345600'
   cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=345600'
   cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=345600'
   cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=345600'
   cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=345600'
   cpuH = 'nodes=1:ppn=2,mem=7gb,walltime=345600'
   
   # create library instance
   if library_file and library_file != 'None':
      if isinstance(library_file, Library):
         library = library_file
      else:
         library = Library(library_file)
         library.read()
   else:
      library = genobox_modules.initialize_library(libfile=library_file, sample=sample, mapq=mapq, libs=libs, bams=bams)
   
   (bam2lib, lib2bam) = library.getBamLibs()
      
   ## CREATE CALLS ##
   
   # filter bam and sort
   (filter_sort_calls, filter_sort_files) = bam_filter_sort(lib2bam, bam2lib, 1500000000)
   
   # merge to libs
   (merge_lib_calls, librarys) = merge_bam(lib2bam.keys(), lib2bam.values(), add_suffix=True, final_suffix='.flt.sort.bam', tmpdir=tmpdir)
   
   # rmdup on libs
   (rmdup_calls, rmdup_files) = rmdup(librarys, tmpdir)
   
   # optional: realignment
   if realignment:
      (merge_final_call, sample_file) = merge_bam([final_bam], [rmdup_files], add_suffix=False)
      (realign_calls, final_file) = realign_bam(final_bam, final_bam, fa, known)
   else:
      # merge to final file
      (merge_final_call, final_file) = merge_bam([final_bam], [rmdup_files], add_suffix=False)
   
   
   ## SUBMIT JOBS ##
   
   print "Submitting jobs"
   filtersort_moab = Moab(filter_sort_calls, logfile=logger, runname='run_genobox_filtersort', queue=queue, cpu=cpuH, partition=partition)
   mergelib_moab = Moab(merge_lib_calls, logfile=logger, runname='run_genobox_lib_merge', queue=queue, cpu=cpuE, depend=True, depend_type='complex', depend_val=map(len, lib2bam.values()), depend_ids=filtersort_moab.ids, partition=partition)
   rmdup_moab = Moab(rmdup_calls, logfile=logger, runname='run_genobox_rmdup', queue=queue, cpu=cpuE, depend=True, depend_type='one2one', depend_val=[1], depend_ids=mergelib_moab.ids, partition=partition)          # NB: If memory should be changed, also change java memory spec in rmdup function
   mergefinal_moab = Moab(merge_final_call, logfile=logger, runname='run_genobox_final_merge', queue=queue, cpu=cpuC, depend=True, depend_type='conc', depend_val=[len(rmdup_moab.ids)], depend_ids=rmdup_moab.ids, partition=partition)
   if realignment:
      realign_moab = Moab(realign_calls, logfile=logger, runname='run_genobox_realignment', queue=queue, cpu=cpuE, depend=True, depend_type='one2one', depend_val=[1], depend_ids=mergefinal_moab.ids, partition=partition)
   # realignment calls needs to be written together in a shell-file or dependent on each other #
   
   # release jobs #
   print "Releasing jobs"
   #filtersort_moab.release()
   #mergelib_moab.release()
   #rmdup_moab.release()
   #mergefinal_moab.release()
   #if realignment: realign_moab.release()
   
   # semaphore
   print "Waiting for jobs to finish ..." 
   if realignment:
      s = Semaphore(realign_moab.ids, home, 'bam_processing', queue, 20, 2*86400)
   else:
      s = Semaphore(mergefinal_moab.ids, home, 'bam_processing', queue, 20, 2*86400)
   s.wait()
   print "--------------------------------------"
   
   # return final bamfile
   return final_bam
コード例 #8
0
ファイル: genobox_alignment.py プロジェクト: srcbs/GenoBox
def start_alignment(args, logger):
    '''Start alignment of fastq files using BWA'''

    import genobox_modules
    from genobox_classes import Semaphore, Library
    import subprocess
    import os
    import random
    import string

    paths = genobox_modules.setSystem()
    home = os.getcwd()
    semaphore_ids = []
    bamfiles = dict()

    if not os.path.exists('alignment'):
        os.makedirs('alignment')

    # initialize library file from given arguments (if args.mapq is defined then its called from abgv, else it is called from alignment)
    if hasattr(args, 'mapq'):
        library = genobox_modules.initialize_library(args.libfile, args.se,
                                                     args.pe1, args.pe2,
                                                     args.sample, args.mapq,
                                                     args.libs, args.pl)
    else:
        library = genobox_modules.initialize_library(args.libfile, args.se,
                                                     args.pe1, args.pe2,
                                                     args.sample, [30],
                                                     args.libs, args.pl)

    # check for fa
    check_fa(args.fa, args.bwa6)

    # check for if trimming was performed (abgv only) and set correct files
    #(se_files, pe1_files, pe2_files) = check_trim(args)

    # start single end alignments
    if args.se:

        # get platform info
        (PL, PL2data) = library.getPL('Data')

        print "Submitting single end alignments"
        for key, value in PL2data.items():
            if key == 'ILLUMINA' or key == 'HELICOS':
                fqtypes_se = []
                # filter to only contain single end files
                toalign = []
                for v in value:
                    if v in args.se: toalign.append(v)
                for fq in toalign:
                    if args.quals:
                        fqtypes_se.append(args.quals)
                    else:
                        fqtypes_se.append(
                            check_formats_fq(fq, args.gz, args.bwa6))

                # submit
                (se_align_ids, bamfiles_se) = bwa_se_align(
                    toalign, args.fa, fqtypes_se, args.qtrim, args.N,
                    'alignment/', args.bwa6, library, args.n, args.queue,
                    args.add_aln, args.partition, logger)
                semaphore_ids.extend(se_align_ids)
                bamfiles.update(bamfiles_se)
            elif key == 'PACBIO':
                toalign = []
                for v in value:
                    if v in args.se: toalign.append(v)
                fqtypes_se = []
                for fq in toalign:
                    if args.quals:
                        fqtypes_se.append(args.quals)
                    else:
                        fqtypes_se.append(
                            check_formats_fq(fq, args.gz, args.bwa6))

                # submit
                (se_align_ids, bamfiles_se) = bwasw_pacbio(
                    toalign, args.fa, fqtypes_se, 'alignment/', args.bwa6,
                    library, args.n, args.queue, args.partition, logger)
                semaphore_ids.extend(se_align_ids)
                bamfiles.update(bamfiles_se)
            elif key == 'IONTORRENT' or key == '454':
                toalign = []
                for v in value:
                    if v in args.se: toalign.append(v)
                fqtypes_se = []
                for fq in toalign:
                    if args.quals:
                        fqtypes_se.append(args.quals)
                    else:
                        fqtypes_se.append(
                            check_formats_fq(fq, args.gz, args.bwa6))

                # submit
                (se_align_ids, bamfiles_se) = bwasw_iontorrent(
                    toalign, args.fa, fqtypes_se, 'alignment/', args.bwa6,
                    library, args.n, args.queue, args.partition, logger)
                semaphore_ids.extend(se_align_ids)
                bamfiles.update(bamfiles_se)

    # start paired end alignments
    if args.pe1:
        if len(args.pe1) != len(args.pe2):
            raise ValueError(
                'Same number of files must be given to --pe1 and --pe2')

        # set fqtypes
        fqtypes_pe1 = []
        fqtypes_pe2 = []
        for fq in args.pe1:
            if args.quals:
                fqtypes_pe1.append(args.quals)
            else:
                fqtypes_pe1.append(check_formats_fq(fq, args.gz, args.bwa6))

        for fq in args.pe2:
            if args.quals:
                fqtypes_pe2.append(args.quals)
            else:
                fqtypes_pe2.append(check_formats_fq(fq, args.gz, args.bwa6))

        # submit
        print "Submitting paired end alignments"
        (pe_align_ids,
         bamfiles_pe) = bwa_pe_align(args.pe1, args.pe2, args.fa, fqtypes_pe1,
                                     fqtypes_pe2, args.qtrim, args.N,
                                     'alignment/', args.bwa6, args.a, library,
                                     args.n, args.queue, args.add_aln,
                                     args.partition, logger)
        semaphore_ids.extend(pe_align_ids)
        bamfiles.update(bamfiles_pe)

    # update library
    library.update_with_tag('Data', 'BAM', bamfiles, True)

    # wait for jobs to finish
    print "Waiting for jobs to finish ..."

    s = Semaphore(semaphore_ids, home, 'bwa_alignment', args.queue, 60, 345600)
    s.wait()

    print "--------------------------------------"

    # return bamfiles
    return (bamfiles, library)