Example #1
0
def start_unmapped_assembly():
    '''Start denovo assembly of unmapped reads given by args'''

    # args is different when from genobox.py
    from genobox_classes import Library
    import genobox_modules

    #library = Library(args.libfile)
    library = Library("libs.NA12891.txt.RISO17V9VS")
    library.read()
    #bamfiles = library.getValues('ID', 'BAM')
    libs = library.getValues('LB', 'BAM')

    (unmapped_calls, unmapped) = extract_unmapped_reads(bamfiles)
    # update library
    library.update_with_tag('ID', 'UNM', unmapped, force=True)
Example #2
0
def start_unmapped_assembly():
   '''Start denovo assembly of unmapped reads given by args'''
   
   # args is different when from genobox.py
   from genobox_classes import Library
   import genobox_modules
   
   #library = Library(args.libfile)
   library = Library("libs.NA12891.txt.RISO17V9VS")
   library.read()
   #bamfiles = library.getValues('ID', 'BAM')
   libs = library.getValues('LB', 'BAM')
   
   (unmapped_calls, unmapped) = extract_unmapped_reads(bamfiles)
   # update library
   library.update_with_tag('ID', 'UNM', unmapped, force=True)
Example #3
0
def initialize_library(
    libfile, se=[], pe1=[], pe2=[], sample="sample", mapq=[30], libs=["A"], pl=["ILLUMINA"], bams=None
):
    """Initiates library file from arguments"""

    from genobox_classes import Library
    import random
    import string

    def try_append(index, from_list, target_list):
        """Try to append value (indexed) from list to another list
         if the value does not exist reuse first value of list
         Converts all input values to strings
      """
        try:
            target_list.append(str(from_list[index]))
        except:
            target_list.append(str(from_list[0]))

    if libfile:
        # copy library file so that it can be edited
        rand = "".join(random.choice(string.ascii_uppercase + string.digits) for x in range(10))
        newlibfile = os.getcwd() + "/" + os.path.split(libfile)[1] + "." + rand
        returnmsg = subprocess.check_call("cp %s %s" % (libfile, newlibfile), shell=True)
        libfile = newlibfile

        # create instance and read in library file (Library(libfile) ; .read())
        library = Library(libfile)
        library.read()

        # remove all non-input lines from library file
        library.keep("Data", se + pe1 + pe2)
    else:
        # else create new from input
        library = Library("libs.%s.txt" % sample)

        # check if sample is None
        if not sample:
            sample = "sample"

        # create the library file
        f_count = 0
        (ID, Data, SM, MAPQ, LB, PL, BAM) = ([], [], [], [], [], [], [])
        if se and se != "None":
            for i, f in enumerate(se):
                ID.append(sample + "_%i" % f_count)
                Data.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if pe1 and pe1 != "None":
            for i, f in enumerate(pe1):
                ID.append(sample + "_%i" % f_count)
                Data.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if pe2 and pe2 != "None":
            for i, f in enumerate(pe2):
                ID.append(sample + "_%i" % f_count)
                Data.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if bams and bams != "None":
            for i, f in enumerate(bams):
                ID.append(sample + "_%i" % f_count)
                Data.append(f)
                BAM.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if bams and bams != "None":
            library.create(ID=ID, Data=Data, SM=SM, MAPQ=MAPQ, LB=LB, PL=PL, BAM=BAM)
        else:
            library.create(ID=ID, Data=Data, SM=SM, MAPQ=MAPQ, LB=LB, PL=PL)

    return library
Example #4
0
def start_bamprocess(library_file, bams, mapq, libs, tmpdir, queue, final_bam,
                     realignment, known, fa, sample, partition, logger):
    '''Starts bam processing of input files'''

    import subprocess
    import genobox_modules
    from genobox_classes import Moab, Semaphore, Library
    import os

    # set queueing
    paths = genobox_modules.setSystem()
    home = os.getcwd()
    cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=345600'
    cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=345600'
    cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=345600'
    cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=345600'
    cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=345600'
    cpuG = 'nodes=1:ppn=1,mem=6gb,walltime=345600'
    cpuH = 'nodes=1:ppn=2,mem=7gb,walltime=345600'

    # create library instance
    if library_file and library_file != 'None':
        if isinstance(library_file, Library):
            library = library_file
        else:
            library = Library(library_file)
            library.read()
    else:
        library = genobox_modules.initialize_library(libfile=library_file,
                                                     sample=sample,
                                                     mapq=mapq,
                                                     libs=libs,
                                                     bams=bams)

    (bam2lib, lib2bam) = library.getBamLibs()

    ## CREATE CALLS ##

    # filter bam and sort
    (filter_sort_calls,
     filter_sort_files) = bam_filter_sort(lib2bam, bam2lib, 1500000000)

    # merge to libs
    (merge_lib_calls, librarys) = merge_bam(lib2bam.keys(),
                                            lib2bam.values(),
                                            add_suffix=True,
                                            final_suffix='.flt.sort.bam',
                                            tmpdir=tmpdir)

    # rmdup on libs
    (rmdup_calls, rmdup_files) = rmdup(librarys, tmpdir)

    # optional: realignment
    if realignment:
        (merge_final_call, sample_file) = merge_bam([final_bam], [rmdup_files],
                                                    add_suffix=False)
        (realign_calls, final_file) = realign_bam(final_bam, final_bam, fa,
                                                  known)
    else:
        # merge to final file
        (merge_final_call, final_file) = merge_bam([final_bam], [rmdup_files],
                                                   add_suffix=False)

    ## SUBMIT JOBS ##

    print "Submitting jobs"
    filtersort_moab = Moab(filter_sort_calls,
                           logfile=logger,
                           runname='run_genobox_filtersort',
                           queue=queue,
                           cpu=cpuH,
                           partition=partition)
    mergelib_moab = Moab(merge_lib_calls,
                         logfile=logger,
                         runname='run_genobox_lib_merge',
                         queue=queue,
                         cpu=cpuE,
                         depend=True,
                         depend_type='complex',
                         depend_val=map(len, lib2bam.values()),
                         depend_ids=filtersort_moab.ids,
                         partition=partition)
    rmdup_moab = Moab(
        rmdup_calls,
        logfile=logger,
        runname='run_genobox_rmdup',
        queue=queue,
        cpu=cpuG,
        depend=True,
        depend_type='one2one',
        depend_val=[1],
        depend_ids=mergelib_moab.ids,
        partition=partition
    )  # NB: If memory should be changed, also change java memory spec in rmdup function
    mergefinal_moab = Moab(merge_final_call,
                           logfile=logger,
                           runname='run_genobox_final_merge',
                           queue=queue,
                           cpu=cpuC,
                           depend=True,
                           depend_type='conc',
                           depend_val=[len(rmdup_moab.ids)],
                           depend_ids=rmdup_moab.ids,
                           partition=partition)
    if realignment:
        realign_moab = Moab(realign_calls,
                            logfile=logger,
                            runname='run_genobox_realignment',
                            queue=queue,
                            cpu=cpuE,
                            depend=True,
                            depend_type='one2one',
                            depend_val=[1],
                            depend_ids=mergefinal_moab.ids,
                            partition=partition)
    # realignment calls needs to be written together in a shell-file or dependent on each other #

    # release jobs #
    print "Releasing jobs"
    #filtersort_moab.release()
    #mergelib_moab.release()
    #rmdup_moab.release()
    #mergefinal_moab.release()
    #if realignment: realign_moab.release()

    # semaphore
    print "Waiting for jobs to finish ..."
    if realignment:
        s = Semaphore(realign_moab.ids, home, 'bam_processing', queue, 20,
                      345600)
    else:
        s = Semaphore(mergefinal_moab.ids, home, 'bam_processing', queue, 20,
                      345600)
    s.wait()
    print "--------------------------------------"

    # return final bamfile
    return final_bam
Example #5
0
def start_bamprocess(library_file, bams, mapq, libs, tmpdir, queue, final_bam, realignment, known, fa, sample, partition, logger):
   '''Starts bam processing of input files'''
   
   import subprocess
   import genobox_modules
   from genobox_classes import Moab, Semaphore, Library
   import os
   
   # set queueing
   paths = genobox_modules.setSystem()
   home = os.getcwd()
   cpuA = 'nodes=1:ppn=1,mem=512mb,walltime=345600'
   cpuC = 'nodes=1:ppn=1,mem=2gb,walltime=345600'
   cpuE = 'nodes=1:ppn=1,mem=5gb,walltime=345600'
   cpuF = 'nodes=1:ppn=2,mem=2gb,walltime=345600'
   cpuB = 'nodes=1:ppn=16,mem=10gb,walltime=345600'
   cpuH = 'nodes=1:ppn=2,mem=7gb,walltime=345600'
   
   # create library instance
   if library_file and library_file != 'None':
      if isinstance(library_file, Library):
         library = library_file
      else:
         library = Library(library_file)
         library.read()
   else:
      library = genobox_modules.initialize_library(libfile=library_file, sample=sample, mapq=mapq, libs=libs, bams=bams)
   
   (bam2lib, lib2bam) = library.getBamLibs()
      
   ## CREATE CALLS ##
   
   # filter bam and sort
   (filter_sort_calls, filter_sort_files) = bam_filter_sort(lib2bam, bam2lib, 1500000000)
   
   # merge to libs
   (merge_lib_calls, librarys) = merge_bam(lib2bam.keys(), lib2bam.values(), add_suffix=True, final_suffix='.flt.sort.bam', tmpdir=tmpdir)
   
   # rmdup on libs
   (rmdup_calls, rmdup_files) = rmdup(librarys, tmpdir)
   
   # optional: realignment
   if realignment:
      (merge_final_call, sample_file) = merge_bam([final_bam], [rmdup_files], add_suffix=False)
      (realign_calls, final_file) = realign_bam(final_bam, final_bam, fa, known)
   else:
      # merge to final file
      (merge_final_call, final_file) = merge_bam([final_bam], [rmdup_files], add_suffix=False)
   
   
   ## SUBMIT JOBS ##
   
   print "Submitting jobs"
   filtersort_moab = Moab(filter_sort_calls, logfile=logger, runname='run_genobox_filtersort', queue=queue, cpu=cpuH, partition=partition)
   mergelib_moab = Moab(merge_lib_calls, logfile=logger, runname='run_genobox_lib_merge', queue=queue, cpu=cpuE, depend=True, depend_type='complex', depend_val=map(len, lib2bam.values()), depend_ids=filtersort_moab.ids, partition=partition)
   rmdup_moab = Moab(rmdup_calls, logfile=logger, runname='run_genobox_rmdup', queue=queue, cpu=cpuE, depend=True, depend_type='one2one', depend_val=[1], depend_ids=mergelib_moab.ids, partition=partition)          # NB: If memory should be changed, also change java memory spec in rmdup function
   mergefinal_moab = Moab(merge_final_call, logfile=logger, runname='run_genobox_final_merge', queue=queue, cpu=cpuC, depend=True, depend_type='conc', depend_val=[len(rmdup_moab.ids)], depend_ids=rmdup_moab.ids, partition=partition)
   if realignment:
      realign_moab = Moab(realign_calls, logfile=logger, runname='run_genobox_realignment', queue=queue, cpu=cpuE, depend=True, depend_type='one2one', depend_val=[1], depend_ids=mergefinal_moab.ids, partition=partition)
   # realignment calls needs to be written together in a shell-file or dependent on each other #
   
   # release jobs #
   print "Releasing jobs"
   #filtersort_moab.release()
   #mergelib_moab.release()
   #rmdup_moab.release()
   #mergefinal_moab.release()
   #if realignment: realign_moab.release()
   
   # semaphore
   print "Waiting for jobs to finish ..." 
   if realignment:
      s = Semaphore(realign_moab.ids, home, 'bam_processing', queue, 20, 2*86400)
   else:
      s = Semaphore(mergefinal_moab.ids, home, 'bam_processing', queue, 20, 2*86400)
   s.wait()
   print "--------------------------------------"
   
   # return final bamfile
   return final_bam
Example #6
0
def initialize_library(libfile,
                       se=[],
                       pe1=[],
                       pe2=[],
                       sample='sample',
                       mapq=[30],
                       libs=['A'],
                       pl=['ILLUMINA'],
                       bams=None):
    '''Initiates library file from arguments'''

    from genobox_classes import Library
    import random
    import string

    def try_append(index, from_list, target_list):
        '''Try to append value (indexed) from list to another list
         if the value does not exist reuse first value of list
         Converts all input values to strings
      '''
        try:
            target_list.append(str(from_list[index]))
        except:
            target_list.append(str(from_list[0]))

    if libfile:
        # copy library file so that it can be edited
        rand = ''.join(
            random.choice(string.ascii_uppercase + string.digits)
            for x in range(10))
        newlibfile = os.getcwd() + '/' + os.path.split(libfile)[1] + '.' + rand
        returnmsg = subprocess.check_call('cp %s %s' % (libfile, newlibfile),
                                          shell=True)
        libfile = newlibfile

        # create instance and read in library file (Library(libfile) ; .read())
        library = Library(libfile)
        library.read()

        # remove all non-input lines from library file
        library.keep('Data', se + pe1 + pe2)
    else:
        # else create new from input
        library = Library('libs.%s.txt' % sample)

        # check if sample is None
        if not sample:
            sample = 'sample'

        # create the library file
        f_count = 0
        (ID, Data, SM, MAPQ, LB, PL, BAM) = ([], [], [], [], [], [], [])
        if se and se != 'None':
            for i, f in enumerate(se):
                ID.append(sample + '_%i' % f_count)
                Data.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if pe1 and pe1 != 'None':
            for i, f in enumerate(pe1):
                ID.append(sample + '_%i' % f_count)
                Data.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if pe2 and pe2 != 'None':
            for i, f in enumerate(pe2):
                ID.append(sample + '_%i' % f_count)
                Data.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if bams and bams != 'None':
            for i, f in enumerate(bams):
                ID.append(sample + '_%i' % f_count)
                Data.append(f)
                BAM.append(f)
                SM.append(sample)
                try_append(f_count, mapq, MAPQ)
                try_append(f_count, libs, LB)
                try_append(f_count, pl, PL)
                f_count += 1

        if bams and bams != 'None':
            library.create(ID=ID,
                           Data=Data,
                           SM=SM,
                           MAPQ=MAPQ,
                           LB=LB,
                           PL=PL,
                           BAM=BAM)
        else:
            library.create(ID=ID, Data=Data, SM=SM, MAPQ=MAPQ, LB=LB, PL=PL)

    return library