def post_basecalling( libsff_path, reverse_primer_dict, skipsfftrim, sfftrim_args, libKey, floworder, barcodeId, barcodesplit_filter, DIR_BC_FILES, barcodeList_path, bfmask_path, barcodeMask_path, generate_beadsummary, BASECALLER_RESULTS): if not os.path.exists(libsff_path): printtime("ERROR: %s does not exist" % libsff_path) open('badblock.txt', 'w').close() return ################################################## # Trim the SFF file if it has been requested # ################################################## if not skipsfftrim: printtime("Attempting to trim the SFF file") libsff_untrimmed_path = libsff_path (head,tail) = os.path.split(libsff_untrimmed_path) libsff_trimmed_path = os.path.join(head,tail[:-4] + ".trimmed.sff") try: com = "SFFTrim" com += " --in-sff %s" % (libsff_untrimmed_path) com += " --out-sff %s" % (libsff_trimmed_path) if sfftrim_args: printtime("using non default args '%s'" % sfftrim_args) com += " " + sfftrim_args else: printtime("no special args found, using default args") # 3' adapter details qual_cutoff = reverse_primer_dict['qual_cutoff'] qual_window = reverse_primer_dict['qual_window'] adapter_cutoff = reverse_primer_dict['adapter_cutoff'] adapter = reverse_primer_dict['sequence'] com += " --flow-order %s" % (floworder) com += " --key %s" % (libKey) com += " --qual-cutoff %s" % (qual_cutoff) com += " --qual-window-size %s" % (qual_window) com += " --adapter-cutoff %s" % (adapter_cutoff) com += " --adapter %s" % (adapter) com += " --min-read-len 5" if generate_beadsummary: com += " --bead-summary %s" % (os.path.join(BASECALLER_RESULTS, 'BaseCaller.json')) printtime("DEBUG: Calling '%s':" % com) ret = subprocess.call(com,shell=True) blockprocessing.add_status("SFFTrim", ret) except: printtime('Failed SFFTrim') traceback.print_exc() if os.path.exists(libsff_untrimmed_path): printtime ("DEBUG: remove untrimmed file %s" % libsff_untrimmed_path) os.remove(libsff_untrimmed_path) else: printtime ("ERROR: untrimmed file not found: %s" % libsff_untrimmed_path) if os.path.exists(libsff_trimmed_path): printtime ("DEBUG: Renaming %s to %s" % (libsff_trimmed_path,libsff_path)) os.rename(libsff_trimmed_path,libsff_path) else: printtime("Not attempting to trim the SFF") ##################################################### # Barcode trim SFF if barcodes have been specified # # Creates one fastq per barcode, plus unknown reads # ##################################################### if barcodeId != '': try: (head,tail) = os.path.split(libsff_path) libsff_bctrimmed_path = os.path.join(head,tail[:-4] + ".bctrimmed.sff") if not os.path.exists(DIR_BC_FILES): os.mkdir(DIR_BC_FILES) com = "barcodeSplit" com += " -s" com += " -i %s" % libsff_path com += " -b %s" % barcodeList_path com += " -k %s" % bfmask_path com += " -f %s" % floworder com += " -l %s" % barcodesplit_filter com += " -c %s" % barcodeMask_path com += " -d %s" % DIR_BC_FILES printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) blockprocessing.add_status("barcodeSplit", ret) if int(ret) != 0: printtime("ERROR Failed barcodeSplit with return code %d" % int(ret)) else: # barcodeSplit is producing "bctrimmed_"+libsff_path , rename (head,tail) = os.path.split(libsff_path) bcsff = os.path.join(DIR_BC_FILES,head,"bctrimmed_"+tail) if os.path.exists(bcsff): printtime ("Renaming %s to %s" % (bcsff, libsff_bctrimmed_path)) os.rename(bcsff,libsff_bctrimmed_path) else: printtime ("ERROR: Renaming: File not found: %s" % bcsff) if os.path.exists(libsff_path): printtime ("DEBUG: remove file %s" % libsff_path) os.remove(libsff_path) else: printtime ("ERROR: Remove: File not found: %s" % libsff_path) #rename: libsff_path contains now the trimmed/bctrimmed data if os.path.exists(libsff_bctrimmed_path): printtime ("Renaming %s to %s" % (libsff_bctrimmed_path,libsff_path)) os.rename(libsff_bctrimmed_path,libsff_path) except: printtime("ERROR Failed barcodeSplit") traceback.print_exc() # implement barcode filtering by moving filtered files if float(barcodesplit_filter) > 0: from ion.utils.filter_barcodes import filter_barcodes filter_barcodes(DIR_BC_FILES) ################################################## # Once we have the new SFF, run SFFSummary # to get the predicted quality scores ################################################## try: com = "SFFSummary" com += " -o %s" % os.path.join(BASECALLER_RESULTS, 'quality.summary') com += " --sff-file %s" % libsff_path com += " --read-length 50,100,150" com += " --min-length 0,0,0" com += " --qual 0,17,20" com += " -d %s" % os.path.join(BASECALLER_RESULTS, 'readLen.txt') printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) blockprocessing.add_status("SFFSummary", ret) except: printtime('Failed SFFSummary') printtime("make the read length histogram") try: filepath_readLenHistogram = os.path.join(BASECALLER_RESULTS,'readLenHisto.png') trimmedReadLenHisto.trimmedReadLenHisto('readLen.txt',filepath_readLenHistogram) except: printtime("Failed to create %s" % filepath_readLenHistogram) ##################################################### # make keypass.fastq file -c(cut key) -k(key flows) # ##################################################### try: com = "SFFRead" com += " -q %s" % libsff_path.replace(".sff",".fastq") com += " %s" % libsff_path com += " > %s" % os.path.join(BASECALLER_RESULTS, 'keypass.summary') printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) blockprocessing.add_status("SFFRead", ret) except: printtime('Failed SFFRead') printtime('Failed to convert SFF ' + str(libsff_path) + ' to fastq')
def runBlock(env): STATUS = None basefolder = 'plugin_out' if not os.path.isdir(basefolder): os.umask(0000) #grant write permission to plugin user os.mkdir(basefolder) os.umask(0002) pathprefix = env["prefix"] libsff_filename = "rawlib.sff" tfsff_filename = "rawtf.sff" fastq_filename = "raw.fastq" bctrimmed_libsff_filename = "bctrimmed_rawlib.sff" fastq_path = os.path.join(BASECALLER_RESULTS, fastq_filename) libsff_path = os.path.join(BASECALLER_RESULTS, libsff_filename) tfsff_path = os.path.join(BASECALLER_RESULTS, tfsff_filename) bctrimmed_libsff_path = os.path.join(BASECALLER_RESULTS,bctrimmed_libsff_filename) tfmapperstats_path = os.path.join(BASECALLER_RESULTS,"TFMapper.stats") libKeyArg = "--libraryKey=%s" % env["libraryKey"] write_version() #------------------------------------------------------------- # Single Block data processing #------------------------------------------------------------- if runFromRaw: printtime("RUNNING SINGLE BLOCK ANALYSIS") command = "%s >> ReportLog.html 2>&1" % (env['analysisArgs']) printtime("Analysis command: " + command) sys.stdout.flush() sys.stderr.flush() status = subprocess.call(command,shell=True) #status = 2 STATUS = None if int(status) == 2: STATUS = 'Checksum Error' elif int(status) == 3: STATUS = 'No Live Beads' elif int(status) != 0: STATUS = 'ERROR' if STATUS != None: printtime("Analysis finished with status '%s'" % STATUS) #TODO - maybe create file # uploadMetrics.updateStatus(STATUS) #TODO ''' csp = os.path.join(env['pathToRaw'],'checksum_status.txt') if not os.path.exists(csp) and not env['skipchecksum'] and STATUS==None: try: os.umask(0002) f = open(csp, 'w') f.write(str(status)) f.close() except: traceback.print_exc() ''' printtime("Finished single block analysis") else: printtime('Skipping single block analysis') if runFromWells: tfKey = "ATCG" libKey = env['libraryKey'] floworder = env['flowOrder'] printtime("Using flow order: %s" % floworder) printtime("Using library key: %s" % libKey) if "block_" in mycwd: # Fix SFFTrim basecallerjson = os.path.join(BASECALLER_RESULTS, 'BaseCaller.json') r = subprocess.call(["ln", "-s", basecallerjson]) if r: printtime("couldn't create symbolic link") # Fix SFFMerge r = subprocess.call(["ln", "-s", os.path.join('..', SIGPROC_RESULTS, 'processParameters.txt'), os.path.join(BASECALLER_RESULTS, 'processParameters.txt')]) if r: printtime("couldn't create symbolic link") sys.stdout.flush() sys.stderr.flush() if not os.path.exists(libsff_path): printtime("ERROR: %s does not exist" % libsff_path) open('badblock.txt', 'w').close() ################################################## # Unfiltered SFF ################################################## unfiltered_dir = "unfiltered" if os.path.exists(unfiltered_dir): top_dir = os.getcwd() #change to the unfiltered dir os.chdir(os.path.join(top_dir,unfiltered_dir)) #grab the first file named untrimmed.sff try: untrimmed_sff = glob.glob("*.untrimmed.sff")[0] except IndexError: printtime("Error, unable to find the untrimmed sff file") #rename untrimmed to trimmed trimmed_sff = untrimmed_sff.replace("untrimmed.sff","trimmed.sff") # 3' adapter details qual_cutoff = env['reverse_primer_dict']['qual_cutoff'] qual_window = env['reverse_primer_dict']['qual_window'] adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff'] adapter = env['reverse_primer_dict']['sequence'] # If flow order is missing, assume classic flow order: if floworder == "0": floworder = "TACG" printtime("warning: floworder redefine required. set to TACG") printtime("Unfiltered SFFTrim") try: com = "SFFTrim" com += " --in-sff %s --out-sff %s" % (untrimmed_sff,trimmed_sff) com += " --flow-order %s" % (floworder) com += " --key %s" % (libKey) com += " --qual-cutoff %s" % (qual_cutoff) com += " --qual-window-size %s" % (qual_window) com += " --adapter-cutoff %s" % (adapter_cutoff) com += " --adapter %s" % (adapter) com += " --min-read-len 5" printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed Unfiltered SFFTrim') sffs = glob.glob("*.sff") for sff in sffs: try: com = "SFFRead" com += " -q %s" % sff.replace(".sff",".fastq") com += " %s" % sff printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed to convert SFF' + str(sff) + ' to fastq') #trim status for status in ["untrimmed","trimmed"]: os.chdir(os.path.join(top_dir,unfiltered_dir)) if not os.path.exists(status): os.makedirs(status) os.chdir(os.path.join(top_dir,unfiltered_dir,status)) try: printtime("Trim Status",) align_full_chip_core("../*." + status + ".sff", libKey, tfKey, floworder, fastq_path, env['align_full'], -1, False, False, True, DIR_BC_FILES, env, ALIGNMENT_RESULTS) except OSError: printtime('Trim Status Alignment Failed to start') alignError = open("alignment.error", "w") alignError.write(str(traceback.format_exc())) alignError.close() traceback.print_exc() os.chdir(top_dir) else: printtime("Directory unfiltered does not exist") sys.stdout.flush() sys.stderr.flush() ################################################## # Trim the SFF file if it has been requested # ################################################## #only trim if SFF is false if not env['sfftrim']: printtime("Attempting to trim the SFF file") if not os.path.exists(libsff_path): printtime("ERROR: %s does not exist" % libsff_path) (head,tail) = os.path.split(libsff_path) libsff_trimmed_path = os.path.join(head,tail[:4] + "trimmed.sff") #we will always need the input and output files trimArgs = "--in-sff %s --out-sff %s" % (libsff_path,libsff_trimmed_path) qual_cutoff = env['reverse_primer_dict']['qual_cutoff'] qual_window = env['reverse_primer_dict']['qual_window'] adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff'] adapter = env['reverse_primer_dict']['sequence'] if not env['sfftrim_args']: printtime("no args found, using default args") trimArgs += " --flow-order %s --key %s" % (floworder, libKey) trimArgs += " --qual-cutoff %d --qual-window-size %d --adapter-cutoff %d --adapter %s" % (qual_cutoff,qual_window,adapter_cutoff,adapter) trimArgs += " --min-read-len 5 " else: printtime("using non default args" , env['sfftrim_args']) trimArgs += " " + env['sfftrim_args'] try: com = "SFFTrim %s " % (trimArgs) printtime("DEBUG: call '%s':" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed SFFTrim') #if the trim did not fail then move the untrimmed file to untrimmed.expname.sff #and move trimmed to expname.sff to ensure backwards compatability # don't rename, result will be useless for --fromsff runs # if os.path.exists(libsff_path): # try: # os.rename(libsff_path, "untrimmed." + libsff_path) #todo # except: # printtime("ERROR: renaming %s" % libsff_path) # if os.path.exists(libsff_trimmed_path): # try: # os.rename(libsff_trimmed_path, libsff_path) # except: # printtime("ERROR: renaming %s" % libsff_trimmed_path) else: printtime("Not attempting to trim the SFF") ##################################################### # Barcode trim SFF if barcodes have been specified # # Creates one fastq per barcode, plus unknown reads # ##################################################### if env['barcodeId'] is not '': try: com = "barcodeSplit" com += " -s" com += " -i %s" % libsff_path com += " -b barcodeList.txt" com += " -c barcodeMask.bin" com += " -f %s" % floworder printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret) != 0 and STATUS==None: STATUS='ERROR' else: # Rename bc trimmed sff if os.path.exists(bctrimmed_libsff_path): os.rename(bctrimmed_libsff_path, libsff_path) except: printtime("Failed barcodeSplit") ################################################## # Once we have the new SFF, run SFFSummary # to get the predicted quality scores ################################################## try: com = "SFFSummary" com += " -o %s" % os.path.join(BASECALLER_RESULTS, 'quality.summary') com += " --sff-file %s" % libsff_path com += " --read-length 50,100,150" com += " --min-length 0,0,0" com += " --qual 0,17,20" com += " -d %s" % os.path.join(BASECALLER_RESULTS, 'readLen.txt') printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed SFFSummary') ################################################## #make keypass.fastq file -c(cut key) -k(key flows)# ################################################## # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = green\n') f.write('sffread = yellow\n') f.write('alignment = grey') f.close() try: com = "SFFRead" com += " -q %s" % fastq_path com += " %s" % libsff_path com += " > %s" % os.path.join(BASECALLER_RESULTS, 'keypass.summary') printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed SFFRead') ################################################## #generate TF Metrics # ################################################## printtime("Calling TFPipeline.processBlock") TFPipeline.processBlock(tfsff_filename, BASECALLER_RESULTS, SIGPROC_RESULTS, tfKey, floworder) printtime("Completed TFPipeline.processBlock") #printtime("Calling TFMapper") #try: # com = "TFMapper" # com += " --logfile TFMapper.log" # com += " --output-dir=%s" % (BASECALLER_RESULTS) # com += " --wells-dir=%s" % (SIGPROC_RESULTS) # com += " --sff-dir=%s" % (BASECALLER_RESULTS) # com += " --tfkey=%s" % (tfKey) # com += " %s" % (tfsff_filename) # com += " ./" # com += " > %s" % (tfmapperstats_path) # printtime("DEBUG: Calling '%s'" % com) # ret = subprocess.call(com,shell=True) # if int(ret)!=0 and STATUS==None: # STATUS='ERROR' #except: # printtime("ERROR: TFMapper failed") ######################################################## #generate the TF Metrics including plots # ######################################################## #printtime("generate the TF Metrics including plots") #if os.path.exists(tfmapperstats_path): # try: # # Q17 TF Read Length Plot # tfMetrics = parseTFstats.generateMetricsData(tfmapperstats_path) # tfGraphs.Q17(tfMetrics) # tfGraphs.genCafieIonograms(tfMetrics,floworder) # except Exception: # printtime("ERROR: Metrics Gen Failed") # traceback.print_exc() #else: # printtime("ERROR: %s doesn't exist" % tfmapperstats_path) ######################################################## #Generate Raw Data Traces for lib and TF keys # ######################################################## printtime("Generate Raw Data Traces for lib and TF keys(iontrace_Test_Fragment.png, iontrace_Library.png)") tfRawPath = 'avgNukeTrace_%s.txt' % tfKey libRawPath = 'avgNukeTrace_%s.txt' % libKey peakOut = 'raw_peak_signal' if os.path.exists(tfRawPath): try: kp = plotKey.KeyPlot(tfKey, floworder, 'Test Fragment') kp.parse(tfRawPath) kp.dump_max(peakOut) kp.plot() except: printtime("TF key graph didn't render") traceback.print_exc() if os.path.exists(libRawPath): try: kp = plotKey.KeyPlot(libKey, floworder, 'Library') kp.parse(libRawPath) kp.dump_max(peakOut) kp.plot() except: printtime("Lib key graph didn't render") traceback.print_exc() ######################################################## #Make Bead Density Plots # ######################################################## printtime("Make Bead Density Plots") bfmaskPath = os.path.join(SIGPROC_RESULTS,"bfmask.bin") maskpath = os.path.join(SIGPROC_RESULTS,"MaskBead.mask") if os.path.isfile(bfmaskPath): com = "BeadmaskParse" com += " -m MaskBead" com += " %s" % bfmaskPath ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' #TODO try: shutil.move('MaskBead.mask', maskpath) except: printtime("ERROR: MaskBead.mask already moved") else: printtime("Warning: no bfmask.bin file exists.") if os.path.exists(maskpath): try: # Makes Bead_density_contour.png beadDensityPlot.genHeatmap(maskpath, BASECALLER_RESULTS) # os.remove(maskpath) except: traceback.print_exc() else: printtime("Warning: no MaskBead.mask file exists.") sys.stdout.flush() sys.stderr.flush() ######################################################## # Make per region key incorporation traces # ######################################################## printtime("Make per region key incorporation traces") perRegionTF = "averagedKeyTraces_TF.txt" perRegionLib = "averagedKeyTraces_Lib.txt" if os.path.exists(perRegionTF): pr = plotRawRegions.PerRegionKey(tfKey, floworder,'TFTracePerRegion.png') pr.parse(perRegionTF) pr.plot() if os.path.exists(perRegionLib): pr = plotRawRegions.PerRegionKey(libKey, floworder,'LibTracePerRegion.png') pr.parse(perRegionLib) pr.plot() sys.stdout.flush() sys.stderr.flush() else: printtime('Skipping SFF Processing') if runFromSFF: ######################################################## #Attempt to align # ######################################################## printtime("Attempt to align") # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = green\n') f.write('sffread = green\n') f.write('alignment = yellow') f.close() try: align_full_chip(libsff_path, libKey, tfKey, floworder, fastq_path, env['align_full'], DIR_BC_FILES, env, ALIGNMENT_RESULTS) except Exception: printtime("ERROR: Alignment Failed") traceback.print_exc() printtime("make the read length histogram") try: filepath_readLenHistogram = os.path.join(ALIGNMENT_RESULTS,'readLenHisto.png') trimmedReadLenHisto.trimmedReadLenHisto('readLen.txt',filepath_readLenHistogram) except: printtime("Failed to create %s" % filepath_readLenHistogram) ######################################################## #ParseFiles # ######################################################## printtime('ParseFiles') # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = green\n') f.write('sffread = green\n') f.write('alignment = green') f.close() else: printtime('Skipping TMAP Processing')