def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder): ######################################################## # Merge datasets_basecaller.json # ######################################################## block_datasets_json = [] combined_datasets_json = {} for dir in dirs: current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json') try: f = open(current_datasets_path,'r') block_datasets_json.append(json.load(f)) f.close() except: printtime("ERROR: skipped %s" % current_datasets_path) if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]): printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting") return combined_datasets_json = copy.deepcopy(block_datasets_json[0]) for dataset_idx in range(len(combined_datasets_json['datasets'])): combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0 for current_datasets_json in block_datasets_json: combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0) for read_group in combined_datasets_json['read_groups'].iterkeys(): combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0; combined_datasets_json['read_groups'][read_group]['total_bases'] = 0; combined_datasets_json['read_groups'][read_group]['read_count'] = 0; combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False for current_datasets_json in block_datasets_json: combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0) combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0) combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0) combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True) try: f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w") json.dump(combined_datasets_json, f, indent=4) f.close() except: printtime("ERROR; Failed to write merged datasets_basecaller.json") traceback.print_exc() ######################################################## # Merge ionstats_basecaller.json: # # First across blocks, then across barcodes # ######################################################## try: composite_filename_list = [] for dataset in combined_datasets_json["datasets"]: composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json') barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs] barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)] ionstats.reduce_stats(barcode_filename_list,composite_filename) if os.path.exists(composite_filename): composite_filename_list.append(composite_filename) ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) ionstats.generate_legacy_basecaller_files( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'')) except: printtime("ERROR: Failed to merge ionstats_basecaller.json") traceback.print_exc() ######################################################## # write composite return code # ######################################################## try: if len(dirs)==96: composite_return_code=96 for subdir in dirs: blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt") if os.path.exists(blockstatus_return_code_file): with open(blockstatus_return_code_file, 'r') as f: text = f.read() if 'Basecaller=0' in text: composite_return_code-=1 composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt") if not os.path.exists(composite_return_code_file): printtime("DEBUG: create %s" % composite_return_code_file) os.umask(0002) f = open(composite_return_code_file, 'a') f.write(str(composite_return_code)) f.close() else: printtime("DEBUG: skip generation of %s" % composite_return_code_file) except: traceback.print_exc() ################################################## #generate TF Metrics # #look for both keys and append same file # ################################################## printtime("Merging TFMapper metrics and generating TF plots") try: TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder) except: printtime("ERROR: Merging TFMapper metrics failed") ############################################### # Merge BaseCaller.json files # ############################################### printtime("Merging BaseCaller.json files") try: basecallerfiles = [] for subdir in dirs: subdir = os.path.join(BASECALLER_RESULTS,subdir) printtime("DEBUG: %s:" % subdir) if isbadblock(subdir, "Merging BaseCaller.json files"): continue basecallerjson = os.path.join(subdir,'BaseCaller.json') if os.path.exists(basecallerjson): basecallerfiles.append(subdir) else: printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson) mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS) except: printtime("Merging BaseCaller.json files failed") ############################################### # Generate composite plots ############################################### printtime("Build composite basecaller graphs") try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 400 # Plot read length sparkline for dataset in combined_datasets_json["datasets"]: ionstats_plots.read_length_sparkline( os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'), graph_max_x) # Plot classic read length histogram ionstats_plots.old_read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto.png'), graph_max_x) # Plot new read length histogram ionstats_plots.read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'), graph_max_x) # Plot quality value histogram ionstats_plots.quality_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'quality_histogram.png')) try: wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS) except: printtime ("ERROR: Wells beadogram generation failed") traceback.print_exc() printtime("Finished merging basecaller stats")
def post_basecalling(BASECALLER_RESULTS,expName,resultsName,flows): datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json") if not os.path.exists(datasets_basecaller_path): printtime("ERROR: %s does not exist" % datasets_basecaller_path) raise Exception("ERROR: %s does not exist" % datasets_basecaller_path) datasets_basecaller = {} try: f = open(datasets_basecaller_path,'r') datasets_basecaller = json.load(f); f.close() except: printtime("ERROR: problem parsing %s" % datasets_basecaller_path) raise Exception("ERROR: problem parsing %s" % datasets_basecaller_path) try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 400 quality_file_list = [] for dataset in datasets_basecaller["datasets"]: if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])): continue # Call ionstats utility to generate alignment-independent metrics for current unmapped BAM ionstats.generate_ionstats_basecaller( os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), graph_max_x) # Plot read length sparkline ionstats_plots.read_length_sparkline( os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'), graph_max_x) quality_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')) # Merge ionstats_basecaller files from individual barcodes/dataset ionstats.reduce_stats(quality_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) # Generate legacy stats file: quality.summary ionstats.generate_legacy_basecaller_files( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'')) # Plot classic read length histogram ionstats_plots.old_read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto.png'), graph_max_x) # Plot new read length histogram ionstats_plots.read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'), graph_max_x) # Plot quality value histogram ionstats_plots.quality_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'quality_histogram.png')) printtime("Finished basecaller post processing")