Exemple #1
0
def tf_processing(
      tf_basecaller_bam_path,
      tfKey,
      floworder,
      BASECALLER_RESULTS,
      analysis_dir):


    ##################################################
    #generate TF Metrics                             #
    ##################################################

    printtime("Calling TFPipeline.processBlock")
    TFPipeline.processBlock(tf_basecaller_bam_path, BASECALLER_RESULTS, tfKey, floworder, analysis_dir)
    printtime("Completed TFPipeline.processBlock")

    printtime("Finished tf processing")
Exemple #2
0
def mergeBasecallerResults(dirs, QualityPath, merged_bead_mask_path, floworder, libsff, tfsff, BASECALLER_RESULTS):
    ############################################
    # Merge individual quality.summary files #
    ############################################
    printtime("Merging individual quality.summary files")

    config_out = ConfigParser.RawConfigParser()
    config_out.optionxform = str # don't convert to lowercase
    config_out.add_section('global')

    numberkeys = ['Number of 50BP Reads',
                  'Number of 100BP Reads',
                  'Number of 150BP Reads',
                  'Number of Reads at Q0',
                  'Number of Bases at Q0',
                  'Number of 50BP Reads at Q0',
                  'Number of 100BP Reads at Q0',
                  'Number of 150BP Reads at Q0',
                  'Number of Reads at Q17',
                  'Number of Bases at Q17',
                  'Number of 50BP Reads at Q17',
                  'Number of 150BP Reads at Q17',
                  'Number of 100BP Reads at Q17',
                  'Number of Reads at Q20',
                  'Number of Bases at Q20',
                  'Number of 50BP Reads at Q20',
                  'Number of 100BP Reads at Q20',
                  'Number of 150BP Reads at Q20']

    maxkeys = ['Max Read Length at Q0',
               'Max Read Length at Q17',
               'Max Read Length at Q20']

    meankeys = ['System SNR',
                'Mean Read Length at Q0',
                'Mean Read Length at Q17',
                'Mean Read Length at Q20']

    config_in = MyConfigParser()
    config_in.optionxform = str # don't convert to lowercase
    doinit = True
    for i,subdir in enumerate(dirs):
        if isbadblock(subdir, "Merging quality.summary"):
            continue
        summaryfile=os.path.join(BASECALLER_RESULTS, subdir, 'quality.summary')
        if os.path.exists(summaryfile):
            printtime("INFO: process %s" % summaryfile)
            config_in.read(summaryfile)
            for key in numberkeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, int(value_in) + int(value_out))
            for key in maxkeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, max(int(value_in),int(value_out)))
            for key in meankeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, float(value_out)+float(value_in)/len(dirs))
            doinit = False
        else:
            printtime("ERROR: skipped %s" % summaryfile)

    with open(QualityPath, 'wb') as configfile:
        config_out.write(configfile)

    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")

    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)

    except:
        printtime("ERROR: Merging TFMapper metrics failed")


    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ########################################
    # Merge individual block SFF files     #
    ########################################
    printtime("Merging Library SFF files")
    try:
        cmd = 'SFFProtonMerge'
        cmd = cmd + ' -i rawlib.sff'
        cmd = cmd + ' -o %s ' % libsff
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            if isbadblock(subdir, "Merging Library SFF files"):
                continue
            rawlibsff = os.path.join(subdir,'rawlib.sff')
            if os.path.exists(rawlibsff):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % rawlibsff)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("SFFProtonMerge failed (library)")

    printtime("Merging Test Fragment SFF files")
    try:
        cmd = 'SFFProtonMerge'
        cmd = cmd + ' -i rawtf.sff'
        cmd = cmd + ' -o %s ' % tfsff
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            if isbadblock(subdir, "Merging Test Fragment SFF files"):
                continue
            rawtfsff = os.path.join(subdir,'rawtf.sff')
            if os.path.exists(rawtfsff):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % rawtfsff)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("SFFProtonMerge failed (test fragments)")
Exemple #3
0
def tf_processing(
      SIGPROC_RESULTS,
      tfsff_path,
      libKey,
      tfKey,
      floworder,
      BASECALLER_RESULTS):


    ##################################################
    #generate TF Metrics                             #
    ##################################################

    printtime("Calling TFPipeline.processBlock")
    TFPipeline.processBlock(tfsff_path, BASECALLER_RESULTS, SIGPROC_RESULTS, tfKey, floworder)
    printtime("Completed TFPipeline.processBlock")



    ########################################################
    #Generate Raw Data Traces for lib and TF keys          #
    ########################################################
    printtime("Generate Raw Data Traces for lib and TF keys(iontrace_Test_Fragment.png, iontrace_Library.png)")

    tfRawPath = 'avgNukeTrace_%s.txt' % tfKey
    libRawPath = 'avgNukeTrace_%s.txt' % libKey
    peakOut = 'raw_peak_signal'

    if os.path.exists(tfRawPath):
        try:
            kp = plotKey.KeyPlot(tfKey, floworder, 'Test Fragment')
            kp.parse(tfRawPath)
            kp.dump_max(peakOut)
            kp.plot()
        except:
            printtime("TF key graph didn't render")
            traceback.print_exc()

    if os.path.exists(libRawPath):
        try:
            kp = plotKey.KeyPlot(libKey, floworder, 'Library')
            kp.parse(libRawPath)
            kp.dump_max(peakOut)
            kp.plot()
        except:
            printtime("Lib key graph didn't render")
            traceback.print_exc()


    ########################################################
    # Make per region key incorporation traces             #
    ########################################################
    printtime("Make per region key incorporation traces")
    perRegionTF = "averagedKeyTraces_TF.txt"
    perRegionLib = "averagedKeyTraces_Lib.txt"
    if os.path.exists(perRegionTF):
        pr = plotRawRegions.PerRegionKey(tfKey, floworder,'TFTracePerRegion.png')
        pr.parse(perRegionTF)
        pr.plot()

    if os.path.exists(perRegionLib):
        pr = plotRawRegions.PerRegionKey(libKey, floworder,'LibTracePerRegion.png')
        pr.parse(perRegionLib)
        pr.plot()
Exemple #4
0
def runBlock(env):
    STATUS = None
    basefolder = 'plugin_out'
    if not os.path.isdir(basefolder):
        os.umask(0000)   #grant write permission to plugin user
        os.mkdir(basefolder)
        os.umask(0002)
    pathprefix = env["prefix"]
    libsff_filename = "rawlib.sff"
    tfsff_filename = "rawtf.sff"
    fastq_filename = "raw.fastq"
    bctrimmed_libsff_filename = "bctrimmed_rawlib.sff"

    fastq_path = os.path.join(BASECALLER_RESULTS, fastq_filename)
    libsff_path = os.path.join(BASECALLER_RESULTS, libsff_filename)
    tfsff_path = os.path.join(BASECALLER_RESULTS, tfsff_filename)
    bctrimmed_libsff_path =  os.path.join(BASECALLER_RESULTS,bctrimmed_libsff_filename)
    tfmapperstats_path = os.path.join(BASECALLER_RESULTS,"TFMapper.stats")

    libKeyArg = "--libraryKey=%s" % env["libraryKey"]

    write_version()

    #-------------------------------------------------------------
    # Single Block data processing
    #-------------------------------------------------------------
    if runFromRaw:
        printtime("RUNNING SINGLE BLOCK ANALYSIS")
        command = "%s >> ReportLog.html 2>&1" % (env['analysisArgs'])
        printtime("Analysis command: " + command)
        sys.stdout.flush()
        sys.stderr.flush()
        status = subprocess.call(command,shell=True)
        #status = 2
        STATUS = None
        if int(status) == 2:
            STATUS = 'Checksum Error'
        elif int(status) == 3:
            STATUS = 'No Live Beads'
        elif int(status) != 0:
            STATUS = 'ERROR'

        if STATUS != None:
            printtime("Analysis finished with status '%s'" % STATUS)
            #TODO - maybe create file
            # uploadMetrics.updateStatus(STATUS)

        #TODO
        '''
        csp = os.path.join(env['pathToRaw'],'checksum_status.txt')
        if not os.path.exists(csp) and not env['skipchecksum'] and STATUS==None:
            try:
                os.umask(0002)
                f = open(csp, 'w')
                f.write(str(status))
                f.close()
            except:
                traceback.print_exc()
        '''
        printtime("Finished single block analysis")
    else:
        printtime('Skipping single block analysis')

    if runFromWells:
        tfKey = "ATCG"
        libKey = env['libraryKey']
        floworder = env['flowOrder']
        printtime("Using flow order: %s" % floworder)
        printtime("Using library key: %s" % libKey)

        if "block_" in mycwd:

            # Fix SFFTrim
            basecallerjson = os.path.join(BASECALLER_RESULTS, 'BaseCaller.json')
            r = subprocess.call(["ln", "-s", basecallerjson])
            if r:
                printtime("couldn't create symbolic link")

            # Fix SFFMerge
            r = subprocess.call(["ln", "-s", os.path.join('..', SIGPROC_RESULTS, 'processParameters.txt'), os.path.join(BASECALLER_RESULTS, 'processParameters.txt')])
            if r:
                printtime("couldn't create symbolic link")


        sys.stdout.flush()
        sys.stderr.flush()

        if not os.path.exists(libsff_path):
            printtime("ERROR: %s does not exist" % libsff_path)
            open('badblock.txt', 'w').close() 

        ##################################################
        # Unfiltered SFF
        ##################################################

        unfiltered_dir = "unfiltered"
        if os.path.exists(unfiltered_dir):

            top_dir = os.getcwd()

            #change to the unfiltered dir
            os.chdir(os.path.join(top_dir,unfiltered_dir))

            #grab the first file named untrimmed.sff
            try:
                untrimmed_sff = glob.glob("*.untrimmed.sff")[0]
            except IndexError:
                printtime("Error, unable to find the untrimmed sff file")

            #rename untrimmed to trimmed
            trimmed_sff = untrimmed_sff.replace("untrimmed.sff","trimmed.sff")

            # 3' adapter details
            qual_cutoff = env['reverse_primer_dict']['qual_cutoff']
            qual_window = env['reverse_primer_dict']['qual_window']
            adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff']
            adapter = env['reverse_primer_dict']['sequence']

            # If flow order is missing, assume classic flow order:
            if floworder == "0":
                floworder = "TACG"
                printtime("warning: floworder redefine required.  set to TACG")

            printtime("Unfiltered SFFTrim")
            try:
                com = "SFFTrim"
                com += " --in-sff %s --out-sff %s" % (untrimmed_sff,trimmed_sff)
                com += " --flow-order %s" % (floworder)
                com += " --key %s" % (libKey)
                com += " --qual-cutoff %s" % (qual_cutoff)
                com += " --qual-window-size %s" % (qual_window)
                com += " --adapter-cutoff %s" % (adapter_cutoff)
                com += " --adapter %s" % (adapter)
                com += " --min-read-len 5"
                printtime("DEBUG: Calling '%s'" % com)
                ret = subprocess.call(com,shell=True)
                if int(ret)!=0 and STATUS==None:
                    STATUS='ERROR'
            except:
                printtime('Failed Unfiltered SFFTrim')

            sffs = glob.glob("*.sff")
            for sff in sffs:
                try:
                    com = "SFFRead"
                    com += " -q %s" % sff.replace(".sff",".fastq")
                    com += " %s" % sff
                    printtime("DEBUG: Calling '%s'" % com)
                    ret = subprocess.call(com,shell=True)
                    if int(ret)!=0 and STATUS==None:
                        STATUS='ERROR'
                except:
                    printtime('Failed to convert SFF' + str(sff) + ' to fastq')

            #trim status
            for status in ["untrimmed","trimmed"]:
                os.chdir(os.path.join(top_dir,unfiltered_dir))
                if not os.path.exists(status):
                    os.makedirs(status)
                os.chdir(os.path.join(top_dir,unfiltered_dir,status))

                try:
                    printtime("Trim Status",)
                    align_full_chip_core("../*." + status + ".sff", libKey, tfKey, floworder, fastq_path, env['align_full'], -1, False, False, True, DIR_BC_FILES, env, ALIGNMENT_RESULTS)
                except OSError:
                    printtime('Trim Status Alignment Failed to start')
                    alignError = open("alignment.error", "w")
                    alignError.write(str(traceback.format_exc()))
                    alignError.close()
                    traceback.print_exc()

            os.chdir(top_dir)
        else:
            printtime("Directory unfiltered does not exist")

        sys.stdout.flush()
        sys.stderr.flush()

        ##################################################
        # Trim the SFF file if it has been requested     #
        ##################################################

        #only trim if SFF is false
        if not env['sfftrim']:
            printtime("Attempting to trim the SFF file")

            if not os.path.exists(libsff_path):
                printtime("ERROR: %s does not exist" % libsff_path)

            (head,tail) = os.path.split(libsff_path)
            libsff_trimmed_path = os.path.join(head,tail[:4] + "trimmed.sff")

            #we will always need the input and output files
            trimArgs = "--in-sff %s --out-sff %s" % (libsff_path,libsff_trimmed_path)

            qual_cutoff = env['reverse_primer_dict']['qual_cutoff']
            qual_window = env['reverse_primer_dict']['qual_window']
            adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff']
            adapter = env['reverse_primer_dict']['sequence']

            if not env['sfftrim_args']:
                printtime("no args found, using default args")
                trimArgs += " --flow-order %s --key %s" % (floworder, libKey)
                trimArgs += " --qual-cutoff %d --qual-window-size %d --adapter-cutoff %d --adapter %s" % (qual_cutoff,qual_window,adapter_cutoff,adapter)
                trimArgs += " --min-read-len 5 "
            else:
                printtime("using non default args" , env['sfftrim_args'])
                trimArgs += " " + env['sfftrim_args']

            try:
                com = "SFFTrim %s " % (trimArgs)
                printtime("DEBUG: call '%s':" % com)
                ret = subprocess.call(com,shell=True)
                if int(ret)!=0 and STATUS==None:
                    STATUS='ERROR'
            except:
                printtime('Failed SFFTrim')

            #if the trim did not fail then move the untrimmed file to untrimmed.expname.sff
            #and move trimmed to expname.sff to ensure backwards compatability

# don't rename, result will be useless for --fromsff runs

#            if os.path.exists(libsff_path):
#                try:
#                    os.rename(libsff_path, "untrimmed." + libsff_path) #todo
#                except:
#                    printtime("ERROR: renaming %s" % libsff_path)

#            if os.path.exists(libsff_trimmed_path):
#                try:
#                    os.rename(libsff_trimmed_path, libsff_path)
#                except:
#                    printtime("ERROR: renaming %s" % libsff_trimmed_path)
        else:
            printtime("Not attempting to trim the SFF")


        #####################################################
        # Barcode trim SFF if barcodes have been specified  #
        # Creates one fastq per barcode, plus unknown reads #
        #####################################################

        if env['barcodeId'] is not '':
            try:
                com = "barcodeSplit"
                com += " -s"
                com += " -i %s" % libsff_path
                com += " -b barcodeList.txt"
                com += " -c barcodeMask.bin"
                com += " -f %s" % floworder

                printtime("DEBUG: Calling '%s'" % com)
                ret = subprocess.call(com,shell=True)
                if int(ret) != 0 and STATUS==None:
                    STATUS='ERROR'
                else:
                    # Rename bc trimmed sff
                    if os.path.exists(bctrimmed_libsff_path):
                        os.rename(bctrimmed_libsff_path, libsff_path)
            except:
                printtime("Failed barcodeSplit")


        ##################################################
        # Once we have the new SFF, run SFFSummary
        # to get the predicted quality scores
        ##################################################

        try:
            com = "SFFSummary"
            com += " -o %s" % os.path.join(BASECALLER_RESULTS, 'quality.summary')
            com += " --sff-file %s" % libsff_path
            com += " --read-length 50,100,150"
            com += " --min-length 0,0,0"
            com += " --qual 0,17,20"
            com += " -d %s" % os.path.join(BASECALLER_RESULTS, 'readLen.txt')

            printtime("DEBUG: Calling '%s'" % com)
            ret = subprocess.call(com,shell=True)
            if int(ret)!=0 and STATUS==None:
                STATUS='ERROR'
        except:
            printtime('Failed SFFSummary')

        ##################################################
        #make keypass.fastq file -c(cut key) -k(key flows)#
        ##################################################
        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = green\n')
        f.write('sffread = yellow\n')
        f.write('alignment = grey')
        f.close()

        try:
            com = "SFFRead"
            com += " -q %s" % fastq_path
            com += " %s" % libsff_path
            com += " > %s" % os.path.join(BASECALLER_RESULTS, 'keypass.summary')

            printtime("DEBUG: Calling '%s'" % com)
            ret = subprocess.call(com,shell=True)
            if int(ret)!=0 and STATUS==None:
                STATUS='ERROR'
        except:
            printtime('Failed SFFRead')


        ##################################################
        #generate TF Metrics                             #
        ##################################################
        
        printtime("Calling TFPipeline.processBlock")
        TFPipeline.processBlock(tfsff_filename, BASECALLER_RESULTS, SIGPROC_RESULTS, tfKey, floworder)
        printtime("Completed TFPipeline.processBlock")

        #printtime("Calling TFMapper")
        #try:
        #    com = "TFMapper"
        #    com += " --logfile TFMapper.log"
        #    com += " --output-dir=%s" % (BASECALLER_RESULTS)
        #    com += " --wells-dir=%s" % (SIGPROC_RESULTS)
        #    com += " --sff-dir=%s" % (BASECALLER_RESULTS)
        #    com += " --tfkey=%s" % (tfKey)
        #    com += " %s" % (tfsff_filename)
        #    com += " ./"
        #    com += " > %s" % (tfmapperstats_path)
        #    printtime("DEBUG: Calling '%s'" % com)
        #    ret = subprocess.call(com,shell=True)
        #    if int(ret)!=0 and STATUS==None:
        #        STATUS='ERROR'
        #except:
        #    printtime("ERROR: TFMapper failed")

        ########################################################
        #generate the TF Metrics including plots               #
        ########################################################
        #printtime("generate the TF Metrics including plots")

        #if os.path.exists(tfmapperstats_path):
        #    try:
        #        # Q17 TF Read Length Plot
        #        tfMetrics = parseTFstats.generateMetricsData(tfmapperstats_path)
        #        tfGraphs.Q17(tfMetrics)
        #        tfGraphs.genCafieIonograms(tfMetrics,floworder)
        #    except Exception:
        #        printtime("ERROR: Metrics Gen Failed")
        #        traceback.print_exc()
        #else:
        #    printtime("ERROR: %s doesn't exist" % tfmapperstats_path)

        ########################################################
        #Generate Raw Data Traces for lib and TF keys          #
        ########################################################
        printtime("Generate Raw Data Traces for lib and TF keys(iontrace_Test_Fragment.png, iontrace_Library.png)")

        tfRawPath = 'avgNukeTrace_%s.txt' % tfKey
        libRawPath = 'avgNukeTrace_%s.txt' % libKey
        peakOut = 'raw_peak_signal'

        if os.path.exists(tfRawPath):
            try:
                kp = plotKey.KeyPlot(tfKey, floworder, 'Test Fragment')
                kp.parse(tfRawPath)
                kp.dump_max(peakOut)
                kp.plot()
            except:
                printtime("TF key graph didn't render")
                traceback.print_exc()

        if os.path.exists(libRawPath):
            try:
                kp = plotKey.KeyPlot(libKey, floworder, 'Library')
                kp.parse(libRawPath)
                kp.dump_max(peakOut)
                kp.plot()
            except:
                printtime("Lib key graph didn't render")
                traceback.print_exc()

        ########################################################
        #Make Bead Density Plots                               #
        ########################################################
        printtime("Make Bead Density Plots")
        bfmaskPath = os.path.join(SIGPROC_RESULTS,"bfmask.bin")
        maskpath = os.path.join(SIGPROC_RESULTS,"MaskBead.mask")

        if os.path.isfile(bfmaskPath):
            com = "BeadmaskParse"
            com += " -m MaskBead"
            com += " %s" % bfmaskPath
            ret = subprocess.call(com,shell=True)
            if int(ret)!=0 and STATUS==None:
                STATUS='ERROR'
            #TODO
            try:
                shutil.move('MaskBead.mask', maskpath)
            except:
                printtime("ERROR: MaskBead.mask already moved")
        else:
            printtime("Warning: no bfmask.bin file exists.")

        if os.path.exists(maskpath):
            try:
                # Makes Bead_density_contour.png
                beadDensityPlot.genHeatmap(maskpath, BASECALLER_RESULTS)
    #            os.remove(maskpath)
            except:
                traceback.print_exc()
        else:
            printtime("Warning: no MaskBead.mask file exists.")

        sys.stdout.flush()
        sys.stderr.flush()

        ########################################################
        # Make per region key incorporation traces             #
        ########################################################
        printtime("Make per region key incorporation traces")
        perRegionTF = "averagedKeyTraces_TF.txt"
        perRegionLib = "averagedKeyTraces_Lib.txt"
        if os.path.exists(perRegionTF):
            pr = plotRawRegions.PerRegionKey(tfKey, floworder,'TFTracePerRegion.png')
            pr.parse(perRegionTF)
            pr.plot()

        if os.path.exists(perRegionLib):
            pr = plotRawRegions.PerRegionKey(libKey, floworder,'LibTracePerRegion.png')
            pr.parse(perRegionLib)
            pr.plot()


        sys.stdout.flush()
        sys.stderr.flush()
    else:
        printtime('Skipping SFF Processing')

    if runFromSFF:
        ########################################################
        #Attempt to align                                      #
        ########################################################
        printtime("Attempt to align")

        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = green\n')
        f.write('sffread = green\n')
        f.write('alignment = yellow')
        f.close()

        try:
            align_full_chip(libsff_path, libKey, tfKey, floworder, fastq_path, env['align_full'], DIR_BC_FILES, env, ALIGNMENT_RESULTS)
        except Exception:
            printtime("ERROR: Alignment Failed")
            traceback.print_exc()

        printtime("make the read length histogram")
        try:
            filepath_readLenHistogram = os.path.join(ALIGNMENT_RESULTS,'readLenHisto.png')
            trimmedReadLenHisto.trimmedReadLenHisto('readLen.txt',filepath_readLenHistogram)
        except:
            printtime("Failed to create %s" % filepath_readLenHistogram)

        ########################################################
        #ParseFiles                                            #
        ########################################################
        printtime('ParseFiles')

        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = green\n')
        f.write('sffread = green\n')
        f.write('alignment = green')
        f.close()

    else:
        printtime('Skipping TMAP Processing')
Exemple #5
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder):

    ########################################################
    # Merge datasets_basecaller.json                       #
    ########################################################
    
    block_datasets_json = []
    combined_datasets_json = {}
    
    for dir in dirs:
        current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json')
        try:
            f = open(current_datasets_path,'r')
            block_datasets_json.append(json.load(f))
            f.close()
        except:
            printtime("ERROR: skipped %s" % current_datasets_path)
    
    if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]):
        printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting")
        return

    combined_datasets_json = copy.deepcopy(block_datasets_json[0])
    
    for dataset_idx in range(len(combined_datasets_json['datasets'])):
        combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0)
    
    for read_group in combined_datasets_json['read_groups'].iterkeys():
        combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['total_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['read_count'] = 0;
        combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0)
            combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0)
            combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0)
            combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True)
    
    try:
        f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w")
        json.dump(combined_datasets_json, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged datasets_basecaller.json")
        traceback.print_exc()



    ########################################################
    # Merge ionstats_basecaller.json:                      #
    # First across blocks, then across barcodes            #
    ########################################################

    try:
        composite_filename_list = []
        for dataset in combined_datasets_json["datasets"]:
            composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')
            barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
        ionstats.generate_legacy_basecaller_files(
                os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS,''))
    except:
        printtime("ERROR: Failed to merge ionstats_basecaller.json")
        traceback.print_exc()



    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")
    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)
    except:
        printtime("ERROR: Merging TFMapper metrics failed")

    
    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ###############################################
    # Generate composite plots
    ###############################################

    printtime("Build composite basecaller graphs")
    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    # Plot read length sparkline
    for dataset in combined_datasets_json["datasets"]:
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))
    

    try:
        wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS)
    except:
        printtime ("ERROR: Wells beadogram generation failed")
        traceback.print_exc()

    printtime("Finished merging basecaller stats")
Exemple #6
0
                                                     env['SIGPROC_RESULTS'])
        except:
            printtime("ERROR: Wells beadogram generation failed")
            traceback.print_exc()

        set_result_status('TF Processing')

        try:
            # TODO basecaller_results/datasets_tf.json might contain read_count : 0
            if os.path.exists(
                    os.path.join(env['BASECALLER_RESULTS'],
                                 'rawtf.basecaller.bam')):

                TFPipeline.processBlock(
                    os.path.join(env['BASECALLER_RESULTS'],
                                 'rawtf.basecaller.bam'),
                    env['BASECALLER_RESULTS'], env['tfKey'], env['flowOrder'],
                    '.')
        except:
            traceback.print_exc()

        # Process unfiltered reads

        if do_unfiltered_processing:
            set_result_status('Process Unfiltered BAM')

            bidirectional = False
            activate_barcode_filter = False
            create_index = False

            for unfiltered_directory in [
Exemple #7
0
                alignment.create_plots('ionstats_alignment.json', graph_max_x)
            except:
                traceback.print_exc()

        try:
            wells_beadogram.generate_wells_beadogram(env['BASECALLER_RESULTS'], env['SIGPROC_RESULTS'])
        except:
            printtime ("ERROR: Wells beadogram generation failed")
            traceback.print_exc()

        set_result_status('TF Processing')

        try:
            TFPipeline.processBlock(
                os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam'),
                env['BASECALLER_RESULTS'],
                env['tfKey'],
                env['flowOrder'],
                '.')
            #add_status("TF Processing", 0)
        except:
            traceback.print_exc()
            #add_status("TF Processing", 1)


        # Process unfiltered reads

        if do_unfiltered_processing:
            set_result_status('Process Unfiltered BAM')

            bidirectional = False
            activate_barcode_filter = False