# Default options to produce smaller basecaller results prebasecallerArgs = env['prebasecallerArgs'] if env['doBaseRecal'] == "panel_recal": prebasecallerArgs = prebasecallerArgs + " --calibration-training=0" prebasecallerArgs = prebasecallerArgs + " --calibration-panel /opt/ion/config/datasets_calibration.json" else: if not "--calibration-training=" in prebasecallerArgs: prebasecallerArgs = prebasecallerArgs + " --calibration-training=100000" if not "--flow-signals-type" in prebasecallerArgs: prebasecallerArgs = prebasecallerArgs + " --flow-signals-type scaled-residual" basecaller.basecalling( env['SIGPROC_RESULTS'], prebasecallerArgs, env['libraryKey'], env['tfKey'], env['runID'], env['reverse_primer_dict'], os.path.join(env['BASECALLER_RESULTS'], 'recalibration'), env['barcodeId'], env['barcodeInfo'], env['library'], env['notes'], env['site_name'], env['platform'], env['instrumentName'], env['chipType']) basecaller_recalibration_datasets = blockprocessing.get_datasets_basecaller( os.path.join(env['BASECALLER_RESULTS'], 'recalibration')) if env['doBaseRecal'] == "panel_recal": basecaller_recalibration_datasets = basecaller_recalibration_datasets[ 'IonControl'] # file containing dimension info (offsets, rows, cols) and flow info for stratification try: c = open( os.path.join(env['BASECALLER_RESULTS'], "recalibration", 'BaseCaller.json'), 'r')
set_result_status('Base Calling') try: basecaller.basecalling( env['SIGPROC_RESULTS'], env['basecallerArgs'] + additional_basecallerArgs, env['libraryKey'], env['tfKey'], env['runID'], env['flowOrder'], env['reverse_primer_dict'], env['BASECALLER_RESULTS'], env['barcodeId'], env['barcodeSamples'], env['barcodesplit_filter'], env['DIR_BC_FILES'], os.path.join("barcodeList.txt"), os.path.join(env['BASECALLER_RESULTS'], "barcodeMask.bin"), env['libraryName'], env['sample'], env['site_name'], env['notes'], env['start_time'], env['chipType'], env['expName'], env['resultsName'], env['pgmName'] ) add_status("Basecaller", 0) except: traceback.print_exc()
prebasecallerArgs = prebasecallerArgs + " --calibration-training=0" prebasecallerArgs = prebasecallerArgs + " --calibration-panel /opt/ion/config/datasets_calibration.json" else: if not "--calibration-training=" in prebasecallerArgs: prebasecallerArgs = prebasecallerArgs + " --calibration-training=100000" if not "--flow-signals-type" in prebasecallerArgs: prebasecallerArgs = prebasecallerArgs + " --flow-signals-type scaled-residual" basecaller.basecalling( env['SIGPROC_RESULTS'], prebasecallerArgs, env['libraryKey'], env['tfKey'], env['runID'], env['reverse_primer_dict'], os.path.join(env['BASECALLER_RESULTS'], 'recalibration'), env['barcodeId'], env['barcodeInfo'], env['library'], env['notes'], env['site_name'], env['platform'], env['instrumentName'], env['chipType']) basecaller_recalibration_datasets = blockprocessing.get_datasets_basecaller(os.path.join(env['BASECALLER_RESULTS'],'recalibration')) if env['doBaseRecal'] == "panel_recal": basecaller_recalibration_datasets = basecaller_recalibration_datasets['IonControl'] # file containing dimension info (offsets, rows, cols) and flow info for stratification try: c = open(os.path.join(env['BASECALLER_RESULTS'], "recalibration", 'BaseCaller.json'),'r')
else: printtime( "DEBUG: Flow Space Recalibration is disabled, Reference: '%s'" % env['libraryName']) updated_basecallerArgs = env['basecallerArgs'] set_result_status('Base Calling') try: basecaller.basecalling( env['SIGPROC_RESULTS'], env['basecallerArgs'] + additional_basecallerArgs, env['libraryKey'], env['tfKey'], env['runID'], env['flowOrder'], env['reverse_primer_dict'], env['BASECALLER_RESULTS'], env['barcodeId'], env['barcodeSamples'], env.get('barcodesplit_filter', 0), env.get('barcodesplit_filter_minreads', 0), env['DIR_BC_FILES'], os.path.join("barcodeList.txt"), os.path.join(env['BASECALLER_RESULTS'], "barcodeMask.bin"), env['libraryName'], env['sample'], env['site_name'], env['notes'], env['start_time'], env['chipType'], env['expName'], env['resultsName'], env['pgmName']) add_status("Basecaller", 0) except: traceback.print_exc() add_status("Basecaller", 1) set_result_status('Post Basecalling') try: basecaller.post_basecalling(env['BASECALLER_RESULTS'],
def base_recalib(SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, BASECALLER_RESULTS, barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName, tmap_version, dataset_name, chipflow_name): '''Do flow space recalibration for all basecall files in a report heirarchy''' # frame = inspect.currentframe() # args, _, _, values = inspect.getargvalues(frame) # print 'function name "%s"' % inspect.getframeinfo(frame)[2] # for i in args: # print " %s = %s" % (i, values[i]) # #overwrite reverse_primer_dict # if not reverse_primer_dict: # reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9} # try: # Produce smaller basecaller results # basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual", if not "--calibration-training=" in basecallerArgs: basecallerArgs = basecallerArgs + " --calibration-training=2000000" if not "--flow-signals-type" in basecallerArgs: basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual" basecaller.basecalling( SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, os.path.join(BASECALLER_RESULTS, "recalibration"), barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName) # load datasets_basecaller.json try: f = open( os.path.join(BASECALLER_RESULTS, "recalibration", dataset_name), 'r') datasets_basecaller = json.load(f) f.close() except: printtime("ERROR: load " + dataset_name) traceback.print_exc() raise try: c = open( os.path.join(BASECALLER_RESULTS, "recalibration", chipflow_name), 'r') chipflow = json.load(c) c.close() except: printtime("ERROR: load " + chipflow_name) traceback.print_exc() raise #collect dimension and flow info xMin = chipflow["BaseCaller"]['block_col_offset'] xMax = chipflow["BaseCaller"]['block_col_size'] + xMin - 1 yMin = chipflow["BaseCaller"]['block_row_offset'] yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1 yCuts = 2 xCuts = 2 numFlows = chipflow["BaseCaller"]['num_flows'] flowSpan = numFlows / 2 flowCuts = 2 # print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows)); try: for dataset in datasets_basecaller["datasets"]: read_count = dataset['read_count'] if (read_count == 0): continue # readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1]) readsFile = os.path.join(BASECALLER_RESULTS, 'recalibration', dataset['basecaller_bam']) runname_prefix = os.path.split(dataset['file_prefix'])[-1] #add protection that readsFile might not exist if not os.path.isfile(readsFile): printtime("WARNING: missing file: %s" % readsFile) continue printtime("DEBUG: Work starting on %s" % readsFile) RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS, "recalibration", runname_prefix) os.makedirs(RECALIBRATION_RESULTS) sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam") try: alignment.align(libraryName, readsFile, align_full=False, sam_parsed=False, bidirectional=False, mark_duplicates=False, realign=False, skip_sorting=True, aligner_opts_extra="", logfile=os.path.join( RECALIBRATION_RESULTS, "alignmentQC_out.txt"), output_dir=RECALIBRATION_RESULTS, output_basename="samplelib") except: traceback.print_exc() raise try: # Flow QV table generation # Input -> recalibration/samplelib.bam, genome_path # Output -> QVtable file #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName) #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan) HPtable(RECALIBRATION_RESULTS, sample_map_path, xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows, flowCuts) except: traceback.print_exc() raise #create flowQVtable.txt try: qvtable = os.path.join(BASECALLER_RESULTS, "recalibration", "flowQVtable.txt") #QVaggregation( # os.path.join(BASECALLER_RESULTS,"recalibration"), # flowSpan, # qvtable #) HPaggregation(os.path.join(BASECALLER_RESULTS, "recalibration")) except: printtime('ERROR: Flow QV aggregation failed') raise except: traceback.print_exc() raise except Exception as err: printtime("WARNING: Recalibration is not performed: %s" % err) raise return qvtable
def base_recalib( SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, BASECALLER_RESULTS, barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName, tmap_version, dataset_name, chipflow_name ): '''Do flow space recalibration for all basecall files in a report heirarchy''' # frame = inspect.currentframe() # args, _, _, values = inspect.getargvalues(frame) # print 'function name "%s"' % inspect.getframeinfo(frame)[2] # for i in args: # print " %s = %s" % (i, values[i]) # #overwrite reverse_primer_dict # if not reverse_primer_dict: # reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9} # try: # Produce smaller basecaller results # basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual", if not "--calibration-training=" in basecallerArgs: basecallerArgs = basecallerArgs + " --calibration-training=2000000" if not "--flow-signals-type" in basecallerArgs: basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual" basecaller.basecalling( SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, os.path.join(BASECALLER_RESULTS, "recalibration"), barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName) # load datasets_basecaller.json try: f = open(os.path.join(BASECALLER_RESULTS, "recalibration", dataset_name),'r') datasets_basecaller = json.load(f); f.close() except: printtime("ERROR: load " + dataset_name) traceback.print_exc() raise try: c = open(os.path.join(BASECALLER_RESULTS, "recalibration", chipflow_name),'r') chipflow = json.load(c) c.close() except: printtime("ERROR: load " + chipflow_name) traceback.print_exc() raise #collect dimension and flow info xMin = chipflow["BaseCaller"]['block_col_offset'] xMax = chipflow["BaseCaller"]['block_col_size'] + xMin -1 yMin = chipflow["BaseCaller"]['block_row_offset'] yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1 yCuts = 2 xCuts = 2 numFlows = chipflow["BaseCaller"]['num_flows'] flowSpan = numFlows/2 flowCuts = 2 # print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows)); try: for dataset in datasets_basecaller["datasets"]: read_count = dataset['read_count'] if (read_count == 0): continue # readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1]) readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',dataset['basecaller_bam']) runname_prefix = os.path.split(dataset['file_prefix'])[-1] #add protection that readsFile might not exist if not os.path.isfile(readsFile): printtime("WARNING: missing file: %s" % readsFile) continue printtime("DEBUG: Work starting on %s" % readsFile) RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS,"recalibration", runname_prefix) os.makedirs(RECALIBRATION_RESULTS) sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam") try: alignment.align( libraryName, readsFile, align_full=False, sam_parsed=False, bidirectional=False, mark_duplicates=False, realign=False, skip_sorting=True, aligner_opts_extra="", logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"), output_dir=RECALIBRATION_RESULTS, output_basename="samplelib") except: traceback.print_exc() raise try: # Flow QV table generation # Input -> recalibration/samplelib.bam, genome_path # Output -> QVtable file #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName) #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan) HPtable(RECALIBRATION_RESULTS,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,numFlows,flowCuts) except: traceback.print_exc() raise #create flowQVtable.txt try: qvtable = os.path.join(BASECALLER_RESULTS, "recalibration", "flowQVtable.txt") #QVaggregation( # os.path.join(BASECALLER_RESULTS,"recalibration"), # flowSpan, # qvtable #) HPaggregation(os.path.join(BASECALLER_RESULTS,"recalibration")) except: printtime('ERROR: Flow QV aggregation failed') raise except: traceback.print_exc() raise except Exception as err: printtime("WARNING: Recalibration is not performed: %s" % err) raise return qvtable
" --calibration-panel /opt/ion/config/datasets_calibration.json" ) else: if not "--calibration-training=" in prebasecallerArgs: prebasecallerArgs = (prebasecallerArgs + " --calibration-training=100000") basecaller.basecalling( my_block_offset, env["SIGPROC_RESULTS"], prebasecallerArgs, env["libraryKey"], env["tfKey"], env["runID"], env["reverse_primer_dict"]["sequence"], os.path.join(env["BASECALLER_RESULTS"], "recalibration"), env["barcodeId"], env["barcodeInfo"], env["library"], env["notes"], env["site_name"], env["platform"], env["instrumentName"], env["chipInfo"], ) # Reuse phase estimates in main base calling task additional_basecallerArgs += " --phase-estimation-file " + os.path.join( env["BASECALLER_RESULTS"], "recalibration", "BaseCaller.json") #
sys.exit(1) # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = yellow\n') f.write('sffread = grey\n') f.write('alignment = grey') f.close() basecaller.basecalling( env['SIGPROC_RESULTS'], env['previousReport'], env['basecallerArgs'], env['libraryKey'], env['tfKey'], env['runID'], env['flowOrder'], env['reverse_primer_dict'], env['BASECALLER_RESULTS']) # create analysis progress bar file f = open('progress.txt','w') f.write('wellfinding = green\n') f.write('signalprocessing = green\n') f.write('basecalling = green\n') f.write('sffread = yellow\n') f.write('alignment = grey') f.close() generate_beadsummary=True