def tf_processing( tf_basecaller_bam_path, tfKey, floworder, BASECALLER_RESULTS, analysis_dir): ################################################## #generate TF Metrics # ################################################## printtime("Calling TFPipeline.processBlock") TFPipeline.processBlock(tf_basecaller_bam_path, BASECALLER_RESULTS, tfKey, floworder, analysis_dir) printtime("Completed TFPipeline.processBlock") printtime("Finished tf processing")
def mergeBasecallerResults(dirs, QualityPath, merged_bead_mask_path, floworder, libsff, tfsff, BASECALLER_RESULTS): ############################################ # Merge individual quality.summary files # ############################################ printtime("Merging individual quality.summary files") config_out = ConfigParser.RawConfigParser() config_out.optionxform = str # don't convert to lowercase config_out.add_section('global') numberkeys = ['Number of 50BP Reads', 'Number of 100BP Reads', 'Number of 150BP Reads', 'Number of Reads at Q0', 'Number of Bases at Q0', 'Number of 50BP Reads at Q0', 'Number of 100BP Reads at Q0', 'Number of 150BP Reads at Q0', 'Number of Reads at Q17', 'Number of Bases at Q17', 'Number of 50BP Reads at Q17', 'Number of 150BP Reads at Q17', 'Number of 100BP Reads at Q17', 'Number of Reads at Q20', 'Number of Bases at Q20', 'Number of 50BP Reads at Q20', 'Number of 100BP Reads at Q20', 'Number of 150BP Reads at Q20'] maxkeys = ['Max Read Length at Q0', 'Max Read Length at Q17', 'Max Read Length at Q20'] meankeys = ['System SNR', 'Mean Read Length at Q0', 'Mean Read Length at Q17', 'Mean Read Length at Q20'] config_in = MyConfigParser() config_in.optionxform = str # don't convert to lowercase doinit = True for i,subdir in enumerate(dirs): if isbadblock(subdir, "Merging quality.summary"): continue summaryfile=os.path.join(BASECALLER_RESULTS, subdir, 'quality.summary') if os.path.exists(summaryfile): printtime("INFO: process %s" % summaryfile) config_in.read(summaryfile) for key in numberkeys: value_in = config_in.get('global',key) if doinit: value_out = 0 else: value_out = config_out.get('global', key) config_out.set('global', key, int(value_in) + int(value_out)) for key in maxkeys: value_in = config_in.get('global',key) if doinit: value_out = 0 else: value_out = config_out.get('global', key) config_out.set('global', key, max(int(value_in),int(value_out))) for key in meankeys: value_in = config_in.get('global',key) if doinit: value_out = 0 else: value_out = config_out.get('global', key) config_out.set('global', key, float(value_out)+float(value_in)/len(dirs)) doinit = False else: printtime("ERROR: skipped %s" % summaryfile) with open(QualityPath, 'wb') as configfile: config_out.write(configfile) ################################################## #generate TF Metrics # #look for both keys and append same file # ################################################## printtime("Merging TFMapper metrics and generating TF plots") try: TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder) except: printtime("ERROR: Merging TFMapper metrics failed") ############################################### # Merge BaseCaller.json files # ############################################### printtime("Merging BaseCaller.json files") try: basecallerfiles = [] for subdir in dirs: subdir = os.path.join(BASECALLER_RESULTS,subdir) printtime("DEBUG: %s:" % subdir) if isbadblock(subdir, "Merging BaseCaller.json files"): continue basecallerjson = os.path.join(subdir,'BaseCaller.json') if os.path.exists(basecallerjson): basecallerfiles.append(subdir) else: printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson) mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS) except: printtime("Merging BaseCaller.json files failed") ######################################## # Merge individual block SFF files # ######################################## printtime("Merging Library SFF files") try: cmd = 'SFFProtonMerge' cmd = cmd + ' -i rawlib.sff' cmd = cmd + ' -o %s ' % libsff for subdir in dirs: subdir = os.path.join(BASECALLER_RESULTS,subdir) if isbadblock(subdir, "Merging Library SFF files"): continue rawlibsff = os.path.join(subdir,'rawlib.sff') if os.path.exists(rawlibsff): cmd = cmd + ' %s' % subdir else: printtime("ERROR: skipped %s" % rawlibsff) printtime("DEBUG: Calling '%s'" % cmd) subprocess.call(cmd,shell=True) except: printtime("SFFProtonMerge failed (library)") printtime("Merging Test Fragment SFF files") try: cmd = 'SFFProtonMerge' cmd = cmd + ' -i rawtf.sff' cmd = cmd + ' -o %s ' % tfsff for subdir in dirs: subdir = os.path.join(BASECALLER_RESULTS,subdir) if isbadblock(subdir, "Merging Test Fragment SFF files"): continue rawtfsff = os.path.join(subdir,'rawtf.sff') if os.path.exists(rawtfsff): cmd = cmd + ' %s' % subdir else: printtime("ERROR: skipped %s" % rawtfsff) printtime("DEBUG: Calling '%s'" % cmd) subprocess.call(cmd,shell=True) except: printtime("SFFProtonMerge failed (test fragments)")
def tf_processing( SIGPROC_RESULTS, tfsff_path, libKey, tfKey, floworder, BASECALLER_RESULTS): ################################################## #generate TF Metrics # ################################################## printtime("Calling TFPipeline.processBlock") TFPipeline.processBlock(tfsff_path, BASECALLER_RESULTS, SIGPROC_RESULTS, tfKey, floworder) printtime("Completed TFPipeline.processBlock") ######################################################## #Generate Raw Data Traces for lib and TF keys # ######################################################## printtime("Generate Raw Data Traces for lib and TF keys(iontrace_Test_Fragment.png, iontrace_Library.png)") tfRawPath = 'avgNukeTrace_%s.txt' % tfKey libRawPath = 'avgNukeTrace_%s.txt' % libKey peakOut = 'raw_peak_signal' if os.path.exists(tfRawPath): try: kp = plotKey.KeyPlot(tfKey, floworder, 'Test Fragment') kp.parse(tfRawPath) kp.dump_max(peakOut) kp.plot() except: printtime("TF key graph didn't render") traceback.print_exc() if os.path.exists(libRawPath): try: kp = plotKey.KeyPlot(libKey, floworder, 'Library') kp.parse(libRawPath) kp.dump_max(peakOut) kp.plot() except: printtime("Lib key graph didn't render") traceback.print_exc() ######################################################## # Make per region key incorporation traces # ######################################################## printtime("Make per region key incorporation traces") perRegionTF = "averagedKeyTraces_TF.txt" perRegionLib = "averagedKeyTraces_Lib.txt" if os.path.exists(perRegionTF): pr = plotRawRegions.PerRegionKey(tfKey, floworder,'TFTracePerRegion.png') pr.parse(perRegionTF) pr.plot() if os.path.exists(perRegionLib): pr = plotRawRegions.PerRegionKey(libKey, floworder,'LibTracePerRegion.png') pr.parse(perRegionLib) pr.plot()
def runBlock(env): STATUS = None basefolder = 'plugin_out' if not os.path.isdir(basefolder): os.umask(0000) #grant write permission to plugin user os.mkdir(basefolder) os.umask(0002) pathprefix = env["prefix"] libsff_filename = "rawlib.sff" tfsff_filename = "rawtf.sff" fastq_filename = "raw.fastq" bctrimmed_libsff_filename = "bctrimmed_rawlib.sff" fastq_path = os.path.join(BASECALLER_RESULTS, fastq_filename) libsff_path = os.path.join(BASECALLER_RESULTS, libsff_filename) tfsff_path = os.path.join(BASECALLER_RESULTS, tfsff_filename) bctrimmed_libsff_path = os.path.join(BASECALLER_RESULTS,bctrimmed_libsff_filename) tfmapperstats_path = os.path.join(BASECALLER_RESULTS,"TFMapper.stats") libKeyArg = "--libraryKey=%s" % env["libraryKey"] write_version() #------------------------------------------------------------- # Single Block data processing #------------------------------------------------------------- if runFromRaw: printtime("RUNNING SINGLE BLOCK ANALYSIS") command = "%s >> ReportLog.html 2>&1" % (env['analysisArgs']) printtime("Analysis command: " + command) sys.stdout.flush() sys.stderr.flush() status = subprocess.call(command,shell=True) #status = 2 STATUS = None if int(status) == 2: STATUS = 'Checksum Error' elif int(status) == 3: STATUS = 'No Live Beads' elif int(status) != 0: STATUS = 'ERROR' if STATUS != None: printtime("Analysis finished with status '%s'" % STATUS) #TODO - maybe create file # uploadMetrics.updateStatus(STATUS) #TODO ''' csp = os.path.join(env['pathToRaw'],'checksum_status.txt') if not os.path.exists(csp) and not env['skipchecksum'] and STATUS==None: try: os.umask(0002) f = open(csp, 'w') f.write(str(status)) f.close() except: traceback.print_exc() ''' printtime("Finished single block analysis") else: printtime('Skipping single block analysis') if runFromWells: tfKey = "ATCG" libKey = env['libraryKey'] floworder = env['flowOrder'] printtime("Using flow order: %s" % floworder) printtime("Using library key: %s" % libKey) if "block_" in mycwd: # Fix SFFTrim basecallerjson = os.path.join(BASECALLER_RESULTS, 'BaseCaller.json') r = subprocess.call(["ln", "-s", basecallerjson]) if r: printtime("couldn't create symbolic link") # Fix SFFMerge r = subprocess.call(["ln", "-s", os.path.join('..', SIGPROC_RESULTS, 'processParameters.txt'), os.path.join(BASECALLER_RESULTS, 'processParameters.txt')]) if r: printtime("couldn't create symbolic link") sys.stdout.flush() sys.stderr.flush() if not os.path.exists(libsff_path): printtime("ERROR: %s does not exist" % libsff_path) open('badblock.txt', 'w').close() ################################################## # Unfiltered SFF ################################################## unfiltered_dir = "unfiltered" if os.path.exists(unfiltered_dir): top_dir = os.getcwd() #change to the unfiltered dir os.chdir(os.path.join(top_dir,unfiltered_dir)) #grab the first file named untrimmed.sff try: untrimmed_sff = glob.glob("*.untrimmed.sff")[0] except IndexError: printtime("Error, unable to find the untrimmed sff file") #rename untrimmed to trimmed trimmed_sff = untrimmed_sff.replace("untrimmed.sff","trimmed.sff") # 3' adapter details qual_cutoff = env['reverse_primer_dict']['qual_cutoff'] qual_window = env['reverse_primer_dict']['qual_window'] adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff'] adapter = env['reverse_primer_dict']['sequence'] # If flow order is missing, assume classic flow order: if floworder == "0": floworder = "TACG" printtime("warning: floworder redefine required. set to TACG") printtime("Unfiltered SFFTrim") try: com = "SFFTrim" com += " --in-sff %s --out-sff %s" % (untrimmed_sff,trimmed_sff) com += " --flow-order %s" % (floworder) com += " --key %s" % (libKey) com += " --qual-cutoff %s" % (qual_cutoff) com += " --qual-window-size %s" % (qual_window) com += " --adapter-cutoff %s" % (adapter_cutoff) com += " --adapter %s" % (adapter) com += " --min-read-len 5" printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed Unfiltered SFFTrim') sffs = glob.glob("*.sff") for sff in sffs: try: com = "SFFRead" com += " -q %s" % sff.replace(".sff",".fastq") com += " %s" % sff printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed to convert SFF' + str(sff) + ' to fastq') #trim status for status in ["untrimmed","trimmed"]: os.chdir(os.path.join(top_dir,unfiltered_dir)) if not os.path.exists(status): os.makedirs(status) os.chdir(os.path.join(top_dir,unfiltered_dir,status)) try: printtime("Trim Status",) align_full_chip_core("../*." + status + ".sff", libKey, tfKey, floworder, fastq_path, env['align_full'], -1, False, False, True, DIR_BC_FILES, env, ALIGNMENT_RESULTS) except OSError: printtime('Trim Status Alignment Failed to start') alignError = open("alignment.error", "w") alignError.write(str(traceback.format_exc())) alignError.close() traceback.print_exc() os.chdir(top_dir) else: printtime("Directory unfiltered does not exist") sys.stdout.flush() sys.stderr.flush() ################################################## # Trim the SFF file if it has been requested # ################################################## #only trim if SFF is false if not env['sfftrim']: printtime("Attempting to trim the SFF file") if not os.path.exists(libsff_path): printtime("ERROR: %s does not exist" % libsff_path) (head,tail) = os.path.split(libsff_path) libsff_trimmed_path = os.path.join(head,tail[:4] + "trimmed.sff") #we will always need the input and output files trimArgs = "--in-sff %s --out-sff %s" % (libsff_path,libsff_trimmed_path) qual_cutoff = env['reverse_primer_dict']['qual_cutoff'] qual_window = env['reverse_primer_dict']['qual_window'] adapter_cutoff = env['reverse_primer_dict']['adapter_cutoff'] adapter = env['reverse_primer_dict']['sequence'] if not env['sfftrim_args']: printtime("no args found, using default args") trimArgs += " --flow-order %s --key %s" % (floworder, libKey) trimArgs += " --qual-cutoff %d --qual-window-size %d --adapter-cutoff %d --adapter %s" % (qual_cutoff,qual_window,adapter_cutoff,adapter) trimArgs += " --min-read-len 5 " else: printtime("using non default args" , env['sfftrim_args']) trimArgs += " " + env['sfftrim_args'] try: com = "SFFTrim %s " % (trimArgs) printtime("DEBUG: call '%s':" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed SFFTrim') #if the trim did not fail then move the untrimmed file to untrimmed.expname.sff #and move trimmed to expname.sff to ensure backwards compatability # don't rename, result will be useless for --fromsff runs # if os.path.exists(libsff_path): # try: # os.rename(libsff_path, "untrimmed." + libsff_path) #todo # except: # printtime("ERROR: renaming %s" % libsff_path) # if os.path.exists(libsff_trimmed_path): # try: # os.rename(libsff_trimmed_path, libsff_path) # except: # printtime("ERROR: renaming %s" % libsff_trimmed_path) else: printtime("Not attempting to trim the SFF") ##################################################### # Barcode trim SFF if barcodes have been specified # # Creates one fastq per barcode, plus unknown reads # ##################################################### if env['barcodeId'] is not '': try: com = "barcodeSplit" com += " -s" com += " -i %s" % libsff_path com += " -b barcodeList.txt" com += " -c barcodeMask.bin" com += " -f %s" % floworder printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret) != 0 and STATUS==None: STATUS='ERROR' else: # Rename bc trimmed sff if os.path.exists(bctrimmed_libsff_path): os.rename(bctrimmed_libsff_path, libsff_path) except: printtime("Failed barcodeSplit") ################################################## # Once we have the new SFF, run SFFSummary # to get the predicted quality scores ################################################## try: com = "SFFSummary" com += " -o %s" % os.path.join(BASECALLER_RESULTS, 'quality.summary') com += " --sff-file %s" % libsff_path com += " --read-length 50,100,150" com += " --min-length 0,0,0" com += " --qual 0,17,20" com += " -d %s" % os.path.join(BASECALLER_RESULTS, 'readLen.txt') printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed SFFSummary') ################################################## #make keypass.fastq file -c(cut key) -k(key flows)# ################################################## # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = green\n') f.write('sffread = yellow\n') f.write('alignment = grey') f.close() try: com = "SFFRead" com += " -q %s" % fastq_path com += " %s" % libsff_path com += " > %s" % os.path.join(BASECALLER_RESULTS, 'keypass.summary') printtime("DEBUG: Calling '%s'" % com) ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' except: printtime('Failed SFFRead') ################################################## #generate TF Metrics # ################################################## printtime("Calling TFPipeline.processBlock") TFPipeline.processBlock(tfsff_filename, BASECALLER_RESULTS, SIGPROC_RESULTS, tfKey, floworder) printtime("Completed TFPipeline.processBlock") #printtime("Calling TFMapper") #try: # com = "TFMapper" # com += " --logfile TFMapper.log" # com += " --output-dir=%s" % (BASECALLER_RESULTS) # com += " --wells-dir=%s" % (SIGPROC_RESULTS) # com += " --sff-dir=%s" % (BASECALLER_RESULTS) # com += " --tfkey=%s" % (tfKey) # com += " %s" % (tfsff_filename) # com += " ./" # com += " > %s" % (tfmapperstats_path) # printtime("DEBUG: Calling '%s'" % com) # ret = subprocess.call(com,shell=True) # if int(ret)!=0 and STATUS==None: # STATUS='ERROR' #except: # printtime("ERROR: TFMapper failed") ######################################################## #generate the TF Metrics including plots # ######################################################## #printtime("generate the TF Metrics including plots") #if os.path.exists(tfmapperstats_path): # try: # # Q17 TF Read Length Plot # tfMetrics = parseTFstats.generateMetricsData(tfmapperstats_path) # tfGraphs.Q17(tfMetrics) # tfGraphs.genCafieIonograms(tfMetrics,floworder) # except Exception: # printtime("ERROR: Metrics Gen Failed") # traceback.print_exc() #else: # printtime("ERROR: %s doesn't exist" % tfmapperstats_path) ######################################################## #Generate Raw Data Traces for lib and TF keys # ######################################################## printtime("Generate Raw Data Traces for lib and TF keys(iontrace_Test_Fragment.png, iontrace_Library.png)") tfRawPath = 'avgNukeTrace_%s.txt' % tfKey libRawPath = 'avgNukeTrace_%s.txt' % libKey peakOut = 'raw_peak_signal' if os.path.exists(tfRawPath): try: kp = plotKey.KeyPlot(tfKey, floworder, 'Test Fragment') kp.parse(tfRawPath) kp.dump_max(peakOut) kp.plot() except: printtime("TF key graph didn't render") traceback.print_exc() if os.path.exists(libRawPath): try: kp = plotKey.KeyPlot(libKey, floworder, 'Library') kp.parse(libRawPath) kp.dump_max(peakOut) kp.plot() except: printtime("Lib key graph didn't render") traceback.print_exc() ######################################################## #Make Bead Density Plots # ######################################################## printtime("Make Bead Density Plots") bfmaskPath = os.path.join(SIGPROC_RESULTS,"bfmask.bin") maskpath = os.path.join(SIGPROC_RESULTS,"MaskBead.mask") if os.path.isfile(bfmaskPath): com = "BeadmaskParse" com += " -m MaskBead" com += " %s" % bfmaskPath ret = subprocess.call(com,shell=True) if int(ret)!=0 and STATUS==None: STATUS='ERROR' #TODO try: shutil.move('MaskBead.mask', maskpath) except: printtime("ERROR: MaskBead.mask already moved") else: printtime("Warning: no bfmask.bin file exists.") if os.path.exists(maskpath): try: # Makes Bead_density_contour.png beadDensityPlot.genHeatmap(maskpath, BASECALLER_RESULTS) # os.remove(maskpath) except: traceback.print_exc() else: printtime("Warning: no MaskBead.mask file exists.") sys.stdout.flush() sys.stderr.flush() ######################################################## # Make per region key incorporation traces # ######################################################## printtime("Make per region key incorporation traces") perRegionTF = "averagedKeyTraces_TF.txt" perRegionLib = "averagedKeyTraces_Lib.txt" if os.path.exists(perRegionTF): pr = plotRawRegions.PerRegionKey(tfKey, floworder,'TFTracePerRegion.png') pr.parse(perRegionTF) pr.plot() if os.path.exists(perRegionLib): pr = plotRawRegions.PerRegionKey(libKey, floworder,'LibTracePerRegion.png') pr.parse(perRegionLib) pr.plot() sys.stdout.flush() sys.stderr.flush() else: printtime('Skipping SFF Processing') if runFromSFF: ######################################################## #Attempt to align # ######################################################## printtime("Attempt to align") # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = green\n') f.write('sffread = green\n') f.write('alignment = yellow') f.close() try: align_full_chip(libsff_path, libKey, tfKey, floworder, fastq_path, env['align_full'], DIR_BC_FILES, env, ALIGNMENT_RESULTS) except Exception: printtime("ERROR: Alignment Failed") traceback.print_exc() printtime("make the read length histogram") try: filepath_readLenHistogram = os.path.join(ALIGNMENT_RESULTS,'readLenHisto.png') trimmedReadLenHisto.trimmedReadLenHisto('readLen.txt',filepath_readLenHistogram) except: printtime("Failed to create %s" % filepath_readLenHistogram) ######################################################## #ParseFiles # ######################################################## printtime('ParseFiles') # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = green\n') f.write('sffread = green\n') f.write('alignment = green') f.close() else: printtime('Skipping TMAP Processing')
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder): ######################################################## # Merge datasets_basecaller.json # ######################################################## block_datasets_json = [] combined_datasets_json = {} for dir in dirs: current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json') try: f = open(current_datasets_path,'r') block_datasets_json.append(json.load(f)) f.close() except: printtime("ERROR: skipped %s" % current_datasets_path) if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]): printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting") return combined_datasets_json = copy.deepcopy(block_datasets_json[0]) for dataset_idx in range(len(combined_datasets_json['datasets'])): combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0 for current_datasets_json in block_datasets_json: combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0) for read_group in combined_datasets_json['read_groups'].iterkeys(): combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0; combined_datasets_json['read_groups'][read_group]['total_bases'] = 0; combined_datasets_json['read_groups'][read_group]['read_count'] = 0; combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False for current_datasets_json in block_datasets_json: combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0) combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0) combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0) combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True) try: f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w") json.dump(combined_datasets_json, f, indent=4) f.close() except: printtime("ERROR; Failed to write merged datasets_basecaller.json") traceback.print_exc() ######################################################## # Merge ionstats_basecaller.json: # # First across blocks, then across barcodes # ######################################################## try: composite_filename_list = [] for dataset in combined_datasets_json["datasets"]: composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json') barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs] barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)] ionstats.reduce_stats(barcode_filename_list,composite_filename) if os.path.exists(composite_filename): composite_filename_list.append(composite_filename) ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) ionstats.generate_legacy_basecaller_files( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'')) except: printtime("ERROR: Failed to merge ionstats_basecaller.json") traceback.print_exc() ######################################################## # write composite return code # ######################################################## try: if len(dirs)==96: composite_return_code=96 for subdir in dirs: blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt") if os.path.exists(blockstatus_return_code_file): with open(blockstatus_return_code_file, 'r') as f: text = f.read() if 'Basecaller=0' in text: composite_return_code-=1 composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt") if not os.path.exists(composite_return_code_file): printtime("DEBUG: create %s" % composite_return_code_file) os.umask(0002) f = open(composite_return_code_file, 'a') f.write(str(composite_return_code)) f.close() else: printtime("DEBUG: skip generation of %s" % composite_return_code_file) except: traceback.print_exc() ################################################## #generate TF Metrics # #look for both keys and append same file # ################################################## printtime("Merging TFMapper metrics and generating TF plots") try: TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder) except: printtime("ERROR: Merging TFMapper metrics failed") ############################################### # Merge BaseCaller.json files # ############################################### printtime("Merging BaseCaller.json files") try: basecallerfiles = [] for subdir in dirs: subdir = os.path.join(BASECALLER_RESULTS,subdir) printtime("DEBUG: %s:" % subdir) if isbadblock(subdir, "Merging BaseCaller.json files"): continue basecallerjson = os.path.join(subdir,'BaseCaller.json') if os.path.exists(basecallerjson): basecallerfiles.append(subdir) else: printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson) mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS) except: printtime("Merging BaseCaller.json files failed") ############################################### # Generate composite plots ############################################### printtime("Build composite basecaller graphs") try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 400 # Plot read length sparkline for dataset in combined_datasets_json["datasets"]: ionstats_plots.read_length_sparkline( os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'), graph_max_x) # Plot classic read length histogram ionstats_plots.old_read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto.png'), graph_max_x) # Plot new read length histogram ionstats_plots.read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'), graph_max_x) # Plot quality value histogram ionstats_plots.quality_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'quality_histogram.png')) try: wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS) except: printtime ("ERROR: Wells beadogram generation failed") traceback.print_exc() printtime("Finished merging basecaller stats")
env['SIGPROC_RESULTS']) except: printtime("ERROR: Wells beadogram generation failed") traceback.print_exc() set_result_status('TF Processing') try: # TODO basecaller_results/datasets_tf.json might contain read_count : 0 if os.path.exists( os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')): TFPipeline.processBlock( os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam'), env['BASECALLER_RESULTS'], env['tfKey'], env['flowOrder'], '.') except: traceback.print_exc() # Process unfiltered reads if do_unfiltered_processing: set_result_status('Process Unfiltered BAM') bidirectional = False activate_barcode_filter = False create_index = False for unfiltered_directory in [
alignment.create_plots('ionstats_alignment.json', graph_max_x) except: traceback.print_exc() try: wells_beadogram.generate_wells_beadogram(env['BASECALLER_RESULTS'], env['SIGPROC_RESULTS']) except: printtime ("ERROR: Wells beadogram generation failed") traceback.print_exc() set_result_status('TF Processing') try: TFPipeline.processBlock( os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam'), env['BASECALLER_RESULTS'], env['tfKey'], env['flowOrder'], '.') #add_status("TF Processing", 0) except: traceback.print_exc() #add_status("TF Processing", 1) # Process unfiltered reads if do_unfiltered_processing: set_result_status('Process Unfiltered BAM') bidirectional = False activate_barcode_filter = False