Example #1
0
def post_basecalling(
      libsff_path,
      reverse_primer_dict,
      skipsfftrim,
      sfftrim_args,
      libKey,
      floworder,
      barcodeId,
      barcodesplit_filter,
      DIR_BC_FILES,
      barcodeList_path,
      bfmask_path,
      barcodeMask_path,
      generate_beadsummary,
      BASECALLER_RESULTS):

    if not os.path.exists(libsff_path):
        printtime("ERROR: %s does not exist" % libsff_path)
        open('badblock.txt', 'w').close()
        return


    ##################################################
    # Trim the SFF file if it has been requested     #
    ##################################################

    if not skipsfftrim:
        printtime("Attempting to trim the SFF file")

        libsff_untrimmed_path = libsff_path
        (head,tail) = os.path.split(libsff_untrimmed_path)
        libsff_trimmed_path = os.path.join(head,tail[:-4] + ".trimmed.sff")

        try:
            com = "SFFTrim"
            com += " --in-sff %s" % (libsff_untrimmed_path)
            com += " --out-sff %s" % (libsff_trimmed_path)
            if sfftrim_args:
                printtime("using non default args '%s'" % sfftrim_args)
                com += " " + sfftrim_args
            else:
                printtime("no special args found, using default args")

                # 3' adapter details
                qual_cutoff = reverse_primer_dict['qual_cutoff']
                qual_window = reverse_primer_dict['qual_window']
                adapter_cutoff = reverse_primer_dict['adapter_cutoff']
                adapter = reverse_primer_dict['sequence']

                com += " --flow-order %s" % (floworder)
                com += " --key %s" % (libKey)
                com += " --qual-cutoff %s" % (qual_cutoff)
                com += " --qual-window-size %s" % (qual_window)
                com += " --adapter-cutoff %s" % (adapter_cutoff)
                com += " --adapter %s" % (adapter)
                com += " --min-read-len 5"
                if generate_beadsummary:
                    com += " --bead-summary %s" % (os.path.join(BASECALLER_RESULTS, 'BaseCaller.json'))

            printtime("DEBUG: Calling '%s':" % com)
            ret = subprocess.call(com,shell=True)
            blockprocessing.add_status("SFFTrim", ret)
        except:
            printtime('Failed SFFTrim')
            traceback.print_exc()


        if os.path.exists(libsff_untrimmed_path):
            printtime ("DEBUG: remove untrimmed file %s" % libsff_untrimmed_path)
            os.remove(libsff_untrimmed_path)
        else:
            printtime ("ERROR: untrimmed file not found: %s" % libsff_untrimmed_path)

        if os.path.exists(libsff_trimmed_path):
            printtime ("DEBUG: Renaming %s to %s" % (libsff_trimmed_path,libsff_path))
            os.rename(libsff_trimmed_path,libsff_path)
    else:
        printtime("Not attempting to trim the SFF")


    #####################################################
    # Barcode trim SFF if barcodes have been specified  #
    # Creates one fastq per barcode, plus unknown reads #
    #####################################################

    if barcodeId != '':
        try:

            (head,tail) = os.path.split(libsff_path)
            libsff_bctrimmed_path = os.path.join(head,tail[:-4] + ".bctrimmed.sff")
            
            if not os.path.exists(DIR_BC_FILES):
              os.mkdir(DIR_BC_FILES)

            com = "barcodeSplit"
            com += " -s"
            com += " -i %s" % libsff_path
            com += " -b %s" % barcodeList_path
            com += " -k %s" % bfmask_path
            com += " -f %s" % floworder
            com += " -l %s" % barcodesplit_filter
            com += " -c %s" % barcodeMask_path
            com += " -d %s" % DIR_BC_FILES

            printtime("DEBUG: Calling '%s'" % com)
            ret = subprocess.call(com,shell=True)
            blockprocessing.add_status("barcodeSplit", ret)

            if int(ret) != 0:
                printtime("ERROR Failed barcodeSplit with return code %d" % int(ret))
            else:

                # barcodeSplit is producing "bctrimmed_"+libsff_path , rename

                (head,tail) = os.path.split(libsff_path)
                bcsff = os.path.join(DIR_BC_FILES,head,"bctrimmed_"+tail)
                if os.path.exists(bcsff):
                    printtime ("Renaming %s to %s" % (bcsff, libsff_bctrimmed_path))
                    os.rename(bcsff,libsff_bctrimmed_path)
                else:
                    printtime ("ERROR: Renaming: File not found: %s" % bcsff)

                if os.path.exists(libsff_path):
                    printtime ("DEBUG: remove file %s" % libsff_path)
                    os.remove(libsff_path)
                else:
                    printtime ("ERROR: Remove: File not found: %s" % libsff_path)
 
                #rename: libsff_path contains now the trimmed/bctrimmed data
                if os.path.exists(libsff_bctrimmed_path):
                    printtime ("Renaming %s to %s" % (libsff_bctrimmed_path,libsff_path))
                    os.rename(libsff_bctrimmed_path,libsff_path)

        except:
            printtime("ERROR Failed barcodeSplit")
            traceback.print_exc()

        # implement barcode filtering by moving filtered files
        if float(barcodesplit_filter) > 0:
            from ion.utils.filter_barcodes import filter_barcodes
            filter_barcodes(DIR_BC_FILES)

    ##################################################
    # Once we have the new SFF, run SFFSummary
    # to get the predicted quality scores
    ##################################################

    try:
        com = "SFFSummary"
        com += " -o %s" % os.path.join(BASECALLER_RESULTS, 'quality.summary')
        com += " --sff-file %s" % libsff_path
        com += " --read-length 50,100,150"
        com += " --min-length 0,0,0"
        com += " --qual 0,17,20"
        com += " -d %s" % os.path.join(BASECALLER_RESULTS, 'readLen.txt')

        printtime("DEBUG: Calling '%s'" % com)
        ret = subprocess.call(com,shell=True)
        blockprocessing.add_status("SFFSummary", ret)
    except:
        printtime('Failed SFFSummary')


    printtime("make the read length histogram")
    try:
        filepath_readLenHistogram = os.path.join(BASECALLER_RESULTS,'readLenHisto.png')
        trimmedReadLenHisto.trimmedReadLenHisto('readLen.txt',filepath_readLenHistogram)
    except:
        printtime("Failed to create %s" % filepath_readLenHistogram)


    #####################################################
    # make keypass.fastq file -c(cut key) -k(key flows) #
    #####################################################

    try:
        com = "SFFRead"
        com += " -q %s" % libsff_path.replace(".sff",".fastq")
        com += " %s" % libsff_path
        com += " > %s" % os.path.join(BASECALLER_RESULTS, 'keypass.summary')

        printtime("DEBUG: Calling '%s'" % com)
        ret = subprocess.call(com,shell=True)
        blockprocessing.add_status("SFFRead", ret)
    except:
        printtime('Failed SFFRead')
        printtime('Failed to convert SFF ' + str(libsff_path) + ' to fastq')
Example #2
0
def runBlock(env):
    STATUS = None
    basefolder = 'plugin_out'
    if not os.path.isdir(basefolder):
        os.umask(0000)   #grant write permission to plugin user
        os.mkdir(basefolder)
        os.umask(0002)
    pathprefix = env["prefix"]
    libsff_filename = "rawlib.sff"
    tfsff_filename = "rawtf.sff"
    fastq_filename = "raw.fastq"
    bctrimmed_libsff_filename = "bctrimmed_rawlib.sff"

    fastq_path = os.path.join(BASECALLER_RESULTS, fastq_filename)
    libsff_path = os.path.join(BASECALLER_RESULTS, libsff_filename)
    tfsff_path = os.path.join(BASECALLER_RESULTS, tfsff_filename)
    bctrimmed_libsff_path =  os.path.join(BASECALLER_RESULTS,bctrimmed_libsff_filename)
    tfmapperstats_path = os.path.join(BASECALLER_RESULTS,"TFMapper.stats")

    libKeyArg = "--libraryKey=%s" % env["libraryKey"]

    write_version()

    #-------------------------------------------------------------
    # Single Block data processing
    #-------------------------------------------------------------
    if runFromRaw:
        printtime("RUNNING SINGLE BLOCK ANALYSIS")
        command = "%s >> ReportLog.html 2>&1" % (env['analysisArgs'])
        printtime("Analysis command: " + command)
        sys.stdout.flush()
        sys.stderr.flush()
        status = subprocess.call(command,shell=True)
        #status = 2
        STATUS = None
        if int(status) == 2:
            STATUS = 'Checksum Error'
        elif int(status) == 3:
            STATUS = 'No Live Beads'
        elif int(status) != 0:
            STATUS = 'ERROR'

        if STATUS != None:
            printtime("Analysis finished with status '%s'" % STATUS)
            #TODO - maybe create file
            # uploadMetrics.updateStatus(STATUS)

        #TODO
        '''
        csp = os.path.join(env['pathToRaw'],'checksum_status.txt')
        if not os.path.exists(csp) and not env['skipchecksum'] and STATUS==None:
            try:
                os.umask(0002)
                f = open(csp, 'w')
                f.write(str(status))
                f.close()
            except:
                traceback.print_exc()
        '''
        printtime("Finished single block analysis")
    else:
        printtime('Skipping single block analysis')

    if runFromWells:
        tfKey = "ATCG"
        libKey = env['libraryKey']
        floworder = env['flowOrder']
        printtime("Using flow order: %s" % floworder)
        printtime("Using library key: %s" % libKey)

        if "block_" in mycwd:

            # Fix SFFTrim
            basecallerjson = os.path.join(BASECALLER_RESULTS, 'BaseCaller.json')
            r = subprocess.call(["ln", "-s", basecallerjson])
            if r:
                printtime("couldn't create symbolic link")

            # Fix SFFMerge
            r = subprocess.call(["ln", "-s", os.path.join('..', SIGPROC_RESULTS, 'processParameters.txt'), os.path.join(BASECALLER_RESULTS, 'processParameters.txt')])
            if r:
                printtime("couldn't create symbolic link")


        sys.stdout.flush()
        sys.stderr.flush()

        if not os.path.exists(libsff_path):
            printtime("ERROR: %s does not exist" % libsff_path)
            open('badblock.txt', 'w').close() 

        ##################################################
        # Unfiltered SFF
        ##################################################

        unfiltered_dir = "unfiltered"
        if os.path.exists(unfiltered_dir):

            top_dir = os.getcwd()

            #change to the unfiltered dir
            os.chdir(os.path.join(top_dir,unfiltered_dir))

            #grab the first file named untrimmed.sff
            try:
                untrimmed_sff = glob.glob("*.untrimmed.sff")[0]
            except IndexError:
                printtime("Error, unable to find the untrimmed sff file")

            #rename untrimmed to trimmed
            trimmed_sff = untrimmed_sff.replace("untrimmed.sff","trimmed.sff")

            # 3' adapter details
            qual_cutoff = env['reverse_primer_dict']['qual_cutoff']
            qual_window = env['reverse_primer_dict']['qual_window']
            adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff']
            adapter = env['reverse_primer_dict']['sequence']

            # If flow order is missing, assume classic flow order:
            if floworder == "0":
                floworder = "TACG"
                printtime("warning: floworder redefine required.  set to TACG")

            printtime("Unfiltered SFFTrim")
            try:
                com = "SFFTrim"
                com += " --in-sff %s --out-sff %s" % (untrimmed_sff,trimmed_sff)
                com += " --flow-order %s" % (floworder)
                com += " --key %s" % (libKey)
                com += " --qual-cutoff %s" % (qual_cutoff)
                com += " --qual-window-size %s" % (qual_window)
                com += " --adapter-cutoff %s" % (adapter_cutoff)
                com += " --adapter %s" % (adapter)
                com += " --min-read-len 5"
                printtime("DEBUG: Calling '%s'" % com)
                ret = subprocess.call(com,shell=True)
                if int(ret)!=0 and STATUS==None:
                    STATUS='ERROR'
            except:
                printtime('Failed Unfiltered SFFTrim')

            sffs = glob.glob("*.sff")
            for sff in sffs:
                try:
                    com = "SFFRead"
                    com += " -q %s" % sff.replace(".sff",".fastq")
                    com += " %s" % sff
                    printtime("DEBUG: Calling '%s'" % com)
                    ret = subprocess.call(com,shell=True)
                    if int(ret)!=0 and STATUS==None:
                        STATUS='ERROR'
                except:
                    printtime('Failed to convert SFF' + str(sff) + ' to fastq')

            #trim status
            for status in ["untrimmed","trimmed"]:
                os.chdir(os.path.join(top_dir,unfiltered_dir))
                if not os.path.exists(status):
                    os.makedirs(status)
                os.chdir(os.path.join(top_dir,unfiltered_dir,status))

                try:
                    printtime("Trim Status",)
                    align_full_chip_core("../*." + status + ".sff", libKey, tfKey, floworder, fastq_path, env['align_full'], -1, False, False, True, DIR_BC_FILES, env, ALIGNMENT_RESULTS)
                except OSError:
                    printtime('Trim Status Alignment Failed to start')
                    alignError = open("alignment.error", "w")
                    alignError.write(str(traceback.format_exc()))
                    alignError.close()
                    traceback.print_exc()

            os.chdir(top_dir)
        else:
            printtime("Directory unfiltered does not exist")

        sys.stdout.flush()
        sys.stderr.flush()

        ##################################################
        # Trim the SFF file if it has been requested     #
        ##################################################

        #only trim if SFF is false
        if not env['sfftrim']:
            printtime("Attempting to trim the SFF file")

            if not os.path.exists(libsff_path):
                printtime("ERROR: %s does not exist" % libsff_path)

            (head,tail) = os.path.split(libsff_path)
            libsff_trimmed_path = os.path.join(head,tail[:4] + "trimmed.sff")

            #we will always need the input and output files
            trimArgs = "--in-sff %s --out-sff %s" % (libsff_path,libsff_trimmed_path)

            qual_cutoff = env['reverse_primer_dict']['qual_cutoff']
            qual_window = env['reverse_primer_dict']['qual_window']
            adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff']
            adapter = env['reverse_primer_dict']['sequence']

            if not env['sfftrim_args']:
                printtime("no args found, using default args")
                trimArgs += " --flow-order %s --key %s" % (floworder, libKey)
                trimArgs += " --qual-cutoff %d --qual-window-size %d --adapter-cutoff %d --adapter %s" % (qual_cutoff,qual_window,adapter_cutoff,adapter)
                trimArgs += " --min-read-len 5 "
            else:
                printtime("using non default args" , env['sfftrim_args'])
                trimArgs += " " + env['sfftrim_args']

            try:
                com = "SFFTrim %s " % (trimArgs)
                printtime("DEBUG: call '%s':" % com)
                ret = subprocess.call(com,shell=True)
                if int(ret)!=0 and STATUS==None:
                    STATUS='ERROR'
            except:
                printtime('Failed SFFTrim')

            #if the trim did not fail then move the untrimmed file to untrimmed.expname.sff
            #and move trimmed to expname.sff to ensure backwards compatability

# don't rename, result will be useless for --fromsff runs

#            if os.path.exists(libsff_path):
#                try:
#                    os.rename(libsff_path, "untrimmed." + libsff_path) #todo
#                except:
#                    printtime("ERROR: renaming %s" % libsff_path)

#            if os.path.exists(libsff_trimmed_path):
#                try:
#                    os.rename(libsff_trimmed_path, libsff_path)
#                except:
#                    printtime("ERROR: renaming %s" % libsff_trimmed_path)
        else:
            printtime("Not attempting to trim the SFF")


        #####################################################
        # Barcode trim SFF if barcodes have been specified  #
        # Creates one fastq per barcode, plus unknown reads #
        #####################################################

        if env['barcodeId'] is not '':
            try:
                com = "barcodeSplit"
                com += " -s"
                com += " -i %s" % libsff_path
                com += " -b barcodeList.txt"
                com += " -c barcodeMask.bin"
                com += " -f %s" % floworder

                printtime("DEBUG: Calling '%s'" % com)
                ret = subprocess.call(com,shell=True)
                if int(ret) != 0 and STATUS==None:
                    STATUS='ERROR'
                else:
                    # Rename bc trimmed sff
                    if os.path.exists(bctrimmed_libsff_path):
                        os.rename(bctrimmed_libsff_path, libsff_path)
            except:
                printtime("Failed barcodeSplit")


        ##################################################
        # Once we have the new SFF, run SFFSummary
        # to get the predicted quality scores
        ##################################################

        try:
            com = "SFFSummary"
            com += " -o %s" % os.path.join(BASECALLER_RESULTS, 'quality.summary')
            com += " --sff-file %s" % libsff_path
            com += " --read-length 50,100,150"
            com += " --min-length 0,0,0"
            com += " --qual 0,17,20"
            com += " -d %s" % os.path.join(BASECALLER_RESULTS, 'readLen.txt')

            printtime("DEBUG: Calling '%s'" % com)
            ret = subprocess.call(com,shell=True)
            if int(ret)!=0 and STATUS==None:
                STATUS='ERROR'
        except:
            printtime('Failed SFFSummary')

        ##################################################
        #make keypass.fastq file -c(cut key) -k(key flows)#
        ##################################################
        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = green\n')
        f.write('sffread = yellow\n')
        f.write('alignment = grey')
        f.close()

        try:
            com = "SFFRead"
            com += " -q %s" % fastq_path
            com += " %s" % libsff_path
            com += " > %s" % os.path.join(BASECALLER_RESULTS, 'keypass.summary')

            printtime("DEBUG: Calling '%s'" % com)
            ret = subprocess.call(com,shell=True)
            if int(ret)!=0 and STATUS==None:
                STATUS='ERROR'
        except:
            printtime('Failed SFFRead')


        ##################################################
        #generate TF Metrics                             #
        ##################################################
        
        printtime("Calling TFPipeline.processBlock")
        TFPipeline.processBlock(tfsff_filename, BASECALLER_RESULTS, SIGPROC_RESULTS, tfKey, floworder)
        printtime("Completed TFPipeline.processBlock")

        #printtime("Calling TFMapper")
        #try:
        #    com = "TFMapper"
        #    com += " --logfile TFMapper.log"
        #    com += " --output-dir=%s" % (BASECALLER_RESULTS)
        #    com += " --wells-dir=%s" % (SIGPROC_RESULTS)
        #    com += " --sff-dir=%s" % (BASECALLER_RESULTS)
        #    com += " --tfkey=%s" % (tfKey)
        #    com += " %s" % (tfsff_filename)
        #    com += " ./"
        #    com += " > %s" % (tfmapperstats_path)
        #    printtime("DEBUG: Calling '%s'" % com)
        #    ret = subprocess.call(com,shell=True)
        #    if int(ret)!=0 and STATUS==None:
        #        STATUS='ERROR'
        #except:
        #    printtime("ERROR: TFMapper failed")

        ########################################################
        #generate the TF Metrics including plots               #
        ########################################################
        #printtime("generate the TF Metrics including plots")

        #if os.path.exists(tfmapperstats_path):
        #    try:
        #        # Q17 TF Read Length Plot
        #        tfMetrics = parseTFstats.generateMetricsData(tfmapperstats_path)
        #        tfGraphs.Q17(tfMetrics)
        #        tfGraphs.genCafieIonograms(tfMetrics,floworder)
        #    except Exception:
        #        printtime("ERROR: Metrics Gen Failed")
        #        traceback.print_exc()
        #else:
        #    printtime("ERROR: %s doesn't exist" % tfmapperstats_path)

        ########################################################
        #Generate Raw Data Traces for lib and TF keys          #
        ########################################################
        printtime("Generate Raw Data Traces for lib and TF keys(iontrace_Test_Fragment.png, iontrace_Library.png)")

        tfRawPath = 'avgNukeTrace_%s.txt' % tfKey
        libRawPath = 'avgNukeTrace_%s.txt' % libKey
        peakOut = 'raw_peak_signal'

        if os.path.exists(tfRawPath):
            try:
                kp = plotKey.KeyPlot(tfKey, floworder, 'Test Fragment')
                kp.parse(tfRawPath)
                kp.dump_max(peakOut)
                kp.plot()
            except:
                printtime("TF key graph didn't render")
                traceback.print_exc()

        if os.path.exists(libRawPath):
            try:
                kp = plotKey.KeyPlot(libKey, floworder, 'Library')
                kp.parse(libRawPath)
                kp.dump_max(peakOut)
                kp.plot()
            except:
                printtime("Lib key graph didn't render")
                traceback.print_exc()

        ########################################################
        #Make Bead Density Plots                               #
        ########################################################
        printtime("Make Bead Density Plots")
        bfmaskPath = os.path.join(SIGPROC_RESULTS,"bfmask.bin")
        maskpath = os.path.join(SIGPROC_RESULTS,"MaskBead.mask")

        if os.path.isfile(bfmaskPath):
            com = "BeadmaskParse"
            com += " -m MaskBead"
            com += " %s" % bfmaskPath
            ret = subprocess.call(com,shell=True)
            if int(ret)!=0 and STATUS==None:
                STATUS='ERROR'
            #TODO
            try:
                shutil.move('MaskBead.mask', maskpath)
            except:
                printtime("ERROR: MaskBead.mask already moved")
        else:
            printtime("Warning: no bfmask.bin file exists.")

        if os.path.exists(maskpath):
            try:
                # Makes Bead_density_contour.png
                beadDensityPlot.genHeatmap(maskpath, BASECALLER_RESULTS)
    #            os.remove(maskpath)
            except:
                traceback.print_exc()
        else:
            printtime("Warning: no MaskBead.mask file exists.")

        sys.stdout.flush()
        sys.stderr.flush()

        ########################################################
        # Make per region key incorporation traces             #
        ########################################################
        printtime("Make per region key incorporation traces")
        perRegionTF = "averagedKeyTraces_TF.txt"
        perRegionLib = "averagedKeyTraces_Lib.txt"
        if os.path.exists(perRegionTF):
            pr = plotRawRegions.PerRegionKey(tfKey, floworder,'TFTracePerRegion.png')
            pr.parse(perRegionTF)
            pr.plot()

        if os.path.exists(perRegionLib):
            pr = plotRawRegions.PerRegionKey(libKey, floworder,'LibTracePerRegion.png')
            pr.parse(perRegionLib)
            pr.plot()


        sys.stdout.flush()
        sys.stderr.flush()
    else:
        printtime('Skipping SFF Processing')

    if runFromSFF:
        ########################################################
        #Attempt to align                                      #
        ########################################################
        printtime("Attempt to align")

        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = green\n')
        f.write('sffread = green\n')
        f.write('alignment = yellow')
        f.close()

        try:
            align_full_chip(libsff_path, libKey, tfKey, floworder, fastq_path, env['align_full'], DIR_BC_FILES, env, ALIGNMENT_RESULTS)
        except Exception:
            printtime("ERROR: Alignment Failed")
            traceback.print_exc()

        printtime("make the read length histogram")
        try:
            filepath_readLenHistogram = os.path.join(ALIGNMENT_RESULTS,'readLenHisto.png')
            trimmedReadLenHisto.trimmedReadLenHisto('readLen.txt',filepath_readLenHistogram)
        except:
            printtime("Failed to create %s" % filepath_readLenHistogram)

        ########################################################
        #ParseFiles                                            #
        ########################################################
        printtime('ParseFiles')

        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = green\n')
        f.write('sffread = green\n')
        f.write('alignment = green')
        f.close()

    else:
        printtime('Skipping TMAP Processing')