RECALIBRATION_RESULTS, dataset['file_prefix']) blocks = [] basecaller_meta_information = None if len(calibration_input_bams) > 0: calibration_input_bams += "," calibration_input_bams += calibration_bam_base + ".bam" alignment.align(blocks, basecaller_bam, env['alignmentArgs'], env['ionstatsArgs'], referenceName, basecaller_meta_information, env['libraryKey'], graph_max_x, do_realign=False, do_ionstats=False, do_sorting=False, do_mark_duplicates=False, do_indexing=False, output_dir=RECALIBRATION_RESULTS, output_basename=dataset['file_prefix']) # # Part 3) Call Calibration executable to create models and update basecallerArgs # If we didn't generate any BAMs for calibration we don't do anything # # file containing chip dimension info (offsets, rows, cols) and flow info for stratification try:
dataset['basecaller_bam']) printtime("DEBUG: Work starting on %s" % readsFile) RECALIBRATION_RESULTS = os.path.join( env['BASECALLER_RESULTS'], "recalibration", dataset['file_prefix']) os.makedirs(RECALIBRATION_RESULTS) sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam") alignment.align(referenceName, readsFile, bidirectional=False, mark_duplicates=False, realign=False, skip_sorting=True, aligner_opts_extra="", logfile=os.path.join( RECALIBRATION_RESULTS, "alignmentQC_out.txt"), output_dir=RECALIBRATION_RESULTS, output_basename="samplelib") # Generate both hpTable and hpModel. flow_space_recal.calibrate(RECALIBRATION_RESULTS, sample_map_path, env['recalibArgs'], chipflow) # merge step, calibrate collects the training data saved for each barcode, # calculate and generate hpTable and hpModel files for the whole dataset flow_space_recal.HPaggregation( os.path.join(env['BASECALLER_RESULTS'], "recalibration"),
if not referenceName: continue readsFile = os.path.join(env['BASECALLER_RESULTS'],'recalibration',dataset['basecaller_bam']) printtime("DEBUG: Work starting on %s" % readsFile) RECALIBRATION_RESULTS = os.path.join(env['BASECALLER_RESULTS'],"recalibration", dataset['file_prefix']) os.makedirs(RECALIBRATION_RESULTS) sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam") alignment.align( referenceName, readsFile, bidirectional=False, mark_duplicates=False, realign=False, skip_sorting=True, aligner_opts_extra="", logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"), output_dir=RECALIBRATION_RESULTS, output_basename="samplelib") # Generate both hpTable and hpModel. flow_space_recal.calibrate( RECALIBRATION_RESULTS, sample_map_path, env['recalibArgs'], chipflow) # merge step, calibrate collects the training data saved for each barcode, # calculate and generate hpTable and hpModel files for the whole dataset
calibration_bam_base = os.path.join(RECALIBRATION_RESULTS,dataset['file_prefix']) blocks=[] basecaller_meta_information=None if len(calibration_input_bams)>0: calibration_input_bams += "," calibration_input_bams += calibration_bam_base + ".bam" alignment.align( blocks, basecaller_bam, env['alignmentArgs'], env['ionstatsArgs'], referenceName, basecaller_meta_information, env['libraryKey'], graph_max_x, do_realign=False, do_ionstats=False, do_sorting=False, do_mark_duplicates=False, do_indexing=False, output_dir=RECALIBRATION_RESULTS, output_basename=dataset['file_prefix']) # Do not call Calibration if we did not generate any aligned input BAMs but gracefully continue additional_basecallerArgs += " --phase-estimation-file " + os.path.join(env['BASECALLER_RESULTS'], "recalibration", "BaseCaller.json") if env['doBaseRecal'] == "panel_recal": additional_basecallerArgs += " --calibration-panel /opt/ion/config/datasets_calibration.json" if len(calibration_input_bams)>0: # Call Calibration module to process aligned training BAM files
def base_recalib(SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, BASECALLER_RESULTS, barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName, tmap_version, dataset_name, chipflow_name): '''Do flow space recalibration for all basecall files in a report heirarchy''' # frame = inspect.currentframe() # args, _, _, values = inspect.getargvalues(frame) # print 'function name "%s"' % inspect.getframeinfo(frame)[2] # for i in args: # print " %s = %s" % (i, values[i]) # #overwrite reverse_primer_dict # if not reverse_primer_dict: # reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9} # try: # Produce smaller basecaller results # basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual", if not "--calibration-training=" in basecallerArgs: basecallerArgs = basecallerArgs + " --calibration-training=2000000" if not "--flow-signals-type" in basecallerArgs: basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual" basecaller.basecalling( SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, os.path.join(BASECALLER_RESULTS, "recalibration"), barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName) # load datasets_basecaller.json try: f = open( os.path.join(BASECALLER_RESULTS, "recalibration", dataset_name), 'r') datasets_basecaller = json.load(f) f.close() except: printtime("ERROR: load " + dataset_name) traceback.print_exc() raise try: c = open( os.path.join(BASECALLER_RESULTS, "recalibration", chipflow_name), 'r') chipflow = json.load(c) c.close() except: printtime("ERROR: load " + chipflow_name) traceback.print_exc() raise #collect dimension and flow info xMin = chipflow["BaseCaller"]['block_col_offset'] xMax = chipflow["BaseCaller"]['block_col_size'] + xMin - 1 yMin = chipflow["BaseCaller"]['block_row_offset'] yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1 yCuts = 2 xCuts = 2 numFlows = chipflow["BaseCaller"]['num_flows'] flowSpan = numFlows / 2 flowCuts = 2 # print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows)); try: for dataset in datasets_basecaller["datasets"]: read_count = dataset['read_count'] if (read_count == 0): continue # readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1]) readsFile = os.path.join(BASECALLER_RESULTS, 'recalibration', dataset['basecaller_bam']) runname_prefix = os.path.split(dataset['file_prefix'])[-1] #add protection that readsFile might not exist if not os.path.isfile(readsFile): printtime("WARNING: missing file: %s" % readsFile) continue printtime("DEBUG: Work starting on %s" % readsFile) RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS, "recalibration", runname_prefix) os.makedirs(RECALIBRATION_RESULTS) sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam") try: alignment.align(libraryName, readsFile, align_full=False, sam_parsed=False, bidirectional=False, mark_duplicates=False, realign=False, skip_sorting=True, aligner_opts_extra="", logfile=os.path.join( RECALIBRATION_RESULTS, "alignmentQC_out.txt"), output_dir=RECALIBRATION_RESULTS, output_basename="samplelib") except: traceback.print_exc() raise try: # Flow QV table generation # Input -> recalibration/samplelib.bam, genome_path # Output -> QVtable file #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName) #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan) HPtable(RECALIBRATION_RESULTS, sample_map_path, xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows, flowCuts) except: traceback.print_exc() raise #create flowQVtable.txt try: qvtable = os.path.join(BASECALLER_RESULTS, "recalibration", "flowQVtable.txt") #QVaggregation( # os.path.join(BASECALLER_RESULTS,"recalibration"), # flowSpan, # qvtable #) HPaggregation(os.path.join(BASECALLER_RESULTS, "recalibration")) except: printtime('ERROR: Flow QV aggregation failed') raise except: traceback.print_exc() raise except Exception as err: printtime("WARNING: Recalibration is not performed: %s" % err) raise return qvtable
def base_recalib( SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, BASECALLER_RESULTS, barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName, tmap_version, dataset_name, chipflow_name ): '''Do flow space recalibration for all basecall files in a report heirarchy''' # frame = inspect.currentframe() # args, _, _, values = inspect.getargvalues(frame) # print 'function name "%s"' % inspect.getframeinfo(frame)[2] # for i in args: # print " %s = %s" % (i, values[i]) # #overwrite reverse_primer_dict # if not reverse_primer_dict: # reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9} # try: # Produce smaller basecaller results # basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual", if not "--calibration-training=" in basecallerArgs: basecallerArgs = basecallerArgs + " --calibration-training=2000000" if not "--flow-signals-type" in basecallerArgs: basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual" basecaller.basecalling( SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder, reverse_primer_dict, os.path.join(BASECALLER_RESULTS, "recalibration"), barcodeId, barcodeSamples, barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path, barcodeMask_path, libraryName, sample, site_name, notes, start_time, chipType, expName, resultsName, pgmName) # load datasets_basecaller.json try: f = open(os.path.join(BASECALLER_RESULTS, "recalibration", dataset_name),'r') datasets_basecaller = json.load(f); f.close() except: printtime("ERROR: load " + dataset_name) traceback.print_exc() raise try: c = open(os.path.join(BASECALLER_RESULTS, "recalibration", chipflow_name),'r') chipflow = json.load(c) c.close() except: printtime("ERROR: load " + chipflow_name) traceback.print_exc() raise #collect dimension and flow info xMin = chipflow["BaseCaller"]['block_col_offset'] xMax = chipflow["BaseCaller"]['block_col_size'] + xMin -1 yMin = chipflow["BaseCaller"]['block_row_offset'] yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1 yCuts = 2 xCuts = 2 numFlows = chipflow["BaseCaller"]['num_flows'] flowSpan = numFlows/2 flowCuts = 2 # print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows)); try: for dataset in datasets_basecaller["datasets"]: read_count = dataset['read_count'] if (read_count == 0): continue # readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1]) readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',dataset['basecaller_bam']) runname_prefix = os.path.split(dataset['file_prefix'])[-1] #add protection that readsFile might not exist if not os.path.isfile(readsFile): printtime("WARNING: missing file: %s" % readsFile) continue printtime("DEBUG: Work starting on %s" % readsFile) RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS,"recalibration", runname_prefix) os.makedirs(RECALIBRATION_RESULTS) sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam") try: alignment.align( libraryName, readsFile, align_full=False, sam_parsed=False, bidirectional=False, mark_duplicates=False, realign=False, skip_sorting=True, aligner_opts_extra="", logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"), output_dir=RECALIBRATION_RESULTS, output_basename="samplelib") except: traceback.print_exc() raise try: # Flow QV table generation # Input -> recalibration/samplelib.bam, genome_path # Output -> QVtable file #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName) #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan) HPtable(RECALIBRATION_RESULTS,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,numFlows,flowCuts) except: traceback.print_exc() raise #create flowQVtable.txt try: qvtable = os.path.join(BASECALLER_RESULTS, "recalibration", "flowQVtable.txt") #QVaggregation( # os.path.join(BASECALLER_RESULTS,"recalibration"), # flowSpan, # qvtable #) HPaggregation(os.path.join(BASECALLER_RESULTS,"recalibration")) except: printtime('ERROR: Flow QV aggregation failed') raise except: traceback.print_exc() raise except Exception as err: printtime("WARNING: Recalibration is not performed: %s" % err) raise return qvtable
printtime("DEBUG: Work starting on %s" % readsFile) RECALIBRATION_RESULTS = os.path.join(env['BASECALLER_RESULTS'],"recalibration", dataset['file_prefix']) os.makedirs(RECALIBRATION_RESULTS) sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam") blocks=[] basecaller_meta_information=None alignment.align( blocks, env['alignmentArgs'], env['ionstatsArgs'], referenceName, basecaller_meta_information, env['libraryKey'], graph_max_x, readsFile, do_realign=False, do_ionstats=False, do_sorting=False, do_mark_duplicates=False, do_indexing=False, logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"), output_dir=RECALIBRATION_RESULTS, output_basename="samplelib") # Generate both hpTable and hpModel. flow_space_recal.calibrate( RECALIBRATION_RESULTS, sample_map_path, env['recalibArgs'], chipflow)