def barcode_report_stats(barcode_names): CA_barcodes_json = [] ionstats_file_list = [] printtime("DEBUG: creating CA_barcode_summary.json") for bcname in sorted(barcode_names): ionstats_file = bcname + '_rawlib.ionstats_alignment.json' barcode_json = {"barcode_name": bcname, "AQ7_num_bases":0, "full_num_reads":0, "AQ7_mean_read_length":0} try: stats = json.load(open(ionstats_file)) for key in stats.keys(): if key in ['AQ7', 'AQ10', 'AQ17', 'AQ20', 'AQ30', 'AQ47', 'full', 'aligned']: barcode_json.update({ key+ "_max_read_length": stats[key].get("max_read_length"), key+ "_mean_read_length": stats[key].get("mean_read_length"), key+ "_num_bases": stats[key].get("num_bases"), key+ "_num_reads": stats[key].get("num_reads") }) ionstats_file_list.append(ionstats_file) except: printtime("DEBUG: error reading ionstats from %s" % ionstats_file) traceback.print_exc() if bcname == 'nomatch': CA_barcodes_json.insert(0, barcode_json) else: CA_barcodes_json.append(barcode_json) with open('CA_barcode_summary.json','w') as f: f.write(json.dumps(CA_barcodes_json, indent=2)) # generate merged ionstats_alignment.json if not os.path.exists('ionstats_alignment.json'): ionstats.reduce_stats(ionstats_file_list,'ionstats_alignment.json')
def mergeBlocks(BASECALLER_RESULTS, dirs, floworder): ionstats_tf_filename = os.path.join(BASECALLER_RESULTS, "ionstats_tf.json") tfstatsjson_path = os.path.join(BASECALLER_RESULTS, "TFStats.json") composite_filename_list = [os.path.join(BASECALLER_RESULTS, dir, "ionstats_tf.json") for dir in dirs] composite_filename_list = [filename for filename in composite_filename_list if os.path.exists(filename)] ionstats.reduce_stats(composite_filename_list, ionstats_tf_filename) ionstats_plots.tf_length_histograms(ionstats_tf_filename, ".") ionstats.generate_legacy_tf_files(ionstats_tf_filename, tfstatsjson_path)
def barcode_report_stats(barcode_names): CA_barcodes_json = [] ionstats_file_list = [] printtime("DEBUG: creating CA_barcode_summary.json") for bcname in sorted(barcode_names): ionstats_file = bcname + "_rawlib.ionstats_alignment.json" barcode_json = { "barcode_name": bcname, "AQ7_num_bases": 0, "full_num_reads": 0, "AQ7_mean_read_length": 0, } try: stats = json.load(open(ionstats_file)) for key in list(stats.keys()): if key in [ "AQ7", "AQ10", "AQ17", "AQ20", "AQ30", "AQ47", "full", "aligned", ]: barcode_json.update({ key + "_max_read_length": stats[key].get("max_read_length"), key + "_mean_read_length": stats[key].get("mean_read_length"), key + "_num_bases": stats[key].get("num_bases"), key + "_num_reads": stats[key].get("num_reads"), }) ionstats_file_list.append(ionstats_file) except Exception: printtime("DEBUG: error reading ionstats from %s" % ionstats_file) traceback.print_exc() if bcname == "nomatch": CA_barcodes_json.insert(0, barcode_json) else: CA_barcodes_json.append(barcode_json) with open("CA_barcode_summary.json", "w") as f: f.write(json.dumps(CA_barcodes_json, indent=2)) # generate merged ionstats_alignment.json if not os.path.exists("ionstats_alignment.json"): ionstats.reduce_stats(ionstats_file_list, "ionstats_alignment.json")
def barcode_report_stats(bcfile_names): CA_barcodes_json = [] ionstats_file_list = [] printtime("DEBUG: creating CA_barcode_summary.json") for bcname in bcfile_names: barcode_name = bcname.split('_rawlib.bam')[0] ionstats_file = bcname.split('.bam')[0] + '.ionstats_alignment.json' barcode_json = { "barcode_name": barcode_name, "AQ7_num_bases": 0, "full_num_reads": 0, "AQ7_mean_read_length": 0 } try: stats = json.load(open(ionstats_file)) for key in stats.keys(): if key in [ 'AQ7', 'AQ10', 'AQ17', 'AQ20', 'AQ30', 'AQ47', 'full', 'aligned' ]: barcode_json.update({ key + "_max_read_length": stats[key].get("max_read_length"), key + "_mean_read_length": stats[key].get("mean_read_length"), key + "_num_bases": stats[key].get("num_bases"), key + "_num_reads": stats[key].get("num_reads") }) ionstats_file_list.append(ionstats_file) except: printtime("DEBUG: error reading ionstats from %s" % ionstats_file) traceback.print_exc() if barcode_name == 'nomatch': CA_barcodes_json.insert(0, barcode_json) else: CA_barcodes_json.append(barcode_json) with open('CA_barcode_summary.json', 'w') as f: f.write(json.dumps(CA_barcodes_json, indent=2)) # generate merged ionstats_alignment.json if not os.path.exists('ionstats_alignment.json'): ionstats.reduce_stats(ionstats_file_list, 'ionstats_alignment.json')
def mergeBlocks(BASECALLER_RESULTS, dirs, floworder): ionstats_tf_filename = os.path.join(BASECALLER_RESULTS, "ionstats_tf.json") tfstatsjson_path = os.path.join(BASECALLER_RESULTS, "TFStats.json") composite_filename_list = [ os.path.join(BASECALLER_RESULTS, dir, "ionstats_tf.json") for dir in dirs ] composite_filename_list = [ filename for filename in composite_filename_list if os.path.exists(filename) ] ionstats.reduce_stats(composite_filename_list, ionstats_tf_filename) ionstats_plots.tf_length_histograms(ionstats_tf_filename, '.') ionstats.generate_legacy_tf_files(ionstats_tf_filename, tfstatsjson_path)
try: # Merge ionstats_basecaller files from individual barcodes/dataset BASECALLER_RESULTS = 'basecaller_results' ionstats_file = 'ionstats_basecaller.json' file_list = [] for filepath in args.files: ionstats_path = os.path.join(os.path.dirname(filepath), BASECALLER_RESULTS, ionstats_file) ionstats_path_CA = os.path.join(os.path.dirname(filepath), ionstats_file) if os.path.exists(ionstats_path): file_list.append(ionstats_path) elif os.path.exists(ionstats_path_CA): file_list.append(ionstats_path_CA) else: raise Exception('') ionstats.reduce_stats(file_list, ionstats_file) # Make alignment_rate_plot.png stats = json.load(open(ionstats_file)) l = stats['full']['max_read_length'] graph_max_x = int(round(l + 49, -2)) ionstats_plots.alignment_rate_plot( 'alignStats_err.json', 'ionstats_basecaller.json', 'alignment_rate_plot.png', int(graph_max_x)) print("Ionstats plot created successfully") except: print("ERROR: Failed to generate alignment rate plot") try:
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder): ######################################################## # Merge datasets_basecaller.json # ######################################################## block_datasets_json = [] combined_datasets_json = {} for dir in dirs: current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json') try: f = open(current_datasets_path,'r') block_datasets_json.append(json.load(f)) f.close() except: printtime("ERROR: skipped %s" % current_datasets_path) if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]): printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting") return combined_datasets_json = copy.deepcopy(block_datasets_json[0]) for dataset_idx in range(len(combined_datasets_json['datasets'])): combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0 for current_datasets_json in block_datasets_json: combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0) for read_group in combined_datasets_json['read_groups'].iterkeys(): combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0; combined_datasets_json['read_groups'][read_group]['total_bases'] = 0; combined_datasets_json['read_groups'][read_group]['read_count'] = 0; combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False for current_datasets_json in block_datasets_json: combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0) combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0) combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0) combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True) try: f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w") json.dump(combined_datasets_json, f, indent=4) f.close() except: printtime("ERROR; Failed to write merged datasets_basecaller.json") traceback.print_exc() ######################################################## # Merge ionstats_basecaller.json: # # First across blocks, then across barcodes # ######################################################## try: composite_filename_list = [] for dataset in combined_datasets_json["datasets"]: composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json') barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs] barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)] ionstats.reduce_stats(barcode_filename_list,composite_filename) if os.path.exists(composite_filename): composite_filename_list.append(composite_filename) ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) ionstats.generate_legacy_basecaller_files( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'')) except: printtime("ERROR: Failed to merge ionstats_basecaller.json") traceback.print_exc() ######################################################## # write composite return code # ######################################################## try: if len(dirs)==96: composite_return_code=96 for subdir in dirs: blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt") if os.path.exists(blockstatus_return_code_file): with open(blockstatus_return_code_file, 'r') as f: text = f.read() if 'Basecaller=0' in text: composite_return_code-=1 composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt") if not os.path.exists(composite_return_code_file): printtime("DEBUG: create %s" % composite_return_code_file) os.umask(0002) f = open(composite_return_code_file, 'a') f.write(str(composite_return_code)) f.close() else: printtime("DEBUG: skip generation of %s" % composite_return_code_file) except: traceback.print_exc() ################################################## #generate TF Metrics # #look for both keys and append same file # ################################################## printtime("Merging TFMapper metrics and generating TF plots") try: TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder) except: printtime("ERROR: Merging TFMapper metrics failed") ############################################### # Merge BaseCaller.json files # ############################################### printtime("Merging BaseCaller.json files") try: basecallerfiles = [] for subdir in dirs: subdir = os.path.join(BASECALLER_RESULTS,subdir) printtime("DEBUG: %s:" % subdir) if isbadblock(subdir, "Merging BaseCaller.json files"): continue basecallerjson = os.path.join(subdir,'BaseCaller.json') if os.path.exists(basecallerjson): basecallerfiles.append(subdir) else: printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson) mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS) except: printtime("Merging BaseCaller.json files failed") ############################################### # Generate composite plots ############################################### printtime("Build composite basecaller graphs") try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 400 # Plot read length sparkline for dataset in combined_datasets_json["datasets"]: ionstats_plots.read_length_sparkline( os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'), graph_max_x) # Plot classic read length histogram ionstats_plots.old_read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto.png'), graph_max_x) # Plot new read length histogram ionstats_plots.read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'), graph_max_x) # Plot quality value histogram ionstats_plots.quality_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'quality_histogram.png')) try: wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS) except: printtime ("ERROR: Wells beadogram generation failed") traceback.print_exc() printtime("Finished merging basecaller stats")
def post_basecalling(BASECALLER_RESULTS,expName,resultsName,flows): datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json") if not os.path.exists(datasets_basecaller_path): printtime("ERROR: %s does not exist" % datasets_basecaller_path) raise Exception("ERROR: %s does not exist" % datasets_basecaller_path) datasets_basecaller = {} try: f = open(datasets_basecaller_path,'r') datasets_basecaller = json.load(f); f.close() except: printtime("ERROR: problem parsing %s" % datasets_basecaller_path) raise Exception("ERROR: problem parsing %s" % datasets_basecaller_path) try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 400 quality_file_list = [] for dataset in datasets_basecaller["datasets"]: if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])): continue # Call ionstats utility to generate alignment-independent metrics for current unmapped BAM ionstats.generate_ionstats_basecaller( os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), graph_max_x) # Plot read length sparkline ionstats_plots.read_length_sparkline( os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'), graph_max_x) quality_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')) # Merge ionstats_basecaller files from individual barcodes/dataset ionstats.reduce_stats(quality_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) # Generate legacy stats file: quality.summary ionstats.generate_legacy_basecaller_files( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'')) # Plot classic read length histogram ionstats_plots.old_read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto.png'), graph_max_x) # Plot new read length histogram ionstats_plots.read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'), graph_max_x) # Plot quality value histogram ionstats_plots.quality_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'quality_histogram.png')) printtime("Finished basecaller post processing")
def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets): # Merge *ionstats_alignment.json files across blocks # DEBUG: check if merging is commutative try: # DEBUG composite_filename_list = [] composite_h5_filename_list = [] for dataset in basecaller_datasets["datasets"]: # filter out based on flag keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get( 'filtered', False): keep_dataset = True if not keep_dataset: printtime("INFO: filter out %s" % rg_name) continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group][ 'reference'] if reference: ionstats_folder = ALIGNMENT_RESULTS ionstats_file = 'ionstats_alignment.json' else: ionstats_folder = BASECALLER_RESULTS ionstats_file = 'ionstats_basecaller.json' block_filename_list = [ os.path.join(dir, ionstats_folder, dataset['file_prefix'] + '.' + ionstats_file) for dir in dirs ] block_filename_list = [ filename for filename in block_filename_list if os.path.exists(filename) ] # TODO, remove this check and provide list with valid blocks composite_filename = os.path.join( ionstats_folder, dataset['file_prefix'] + '.composite_allblocks_' + ionstats_file) ionstats.reduce_stats(block_filename_list, composite_filename) composite_filename_list.append(composite_filename) if reference: block_h5_filename_list = [ os.path.join( dir, ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') for dir in dirs ] block_h5_filename_list = [ filename for filename in block_h5_filename_list if os.path.exists(filename) ] # TODO, remove this check and provide list with valid blocks composite_h5_filename = os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') ionstats.reduce_stats_h5(block_h5_filename_list, composite_h5_filename) composite_h5_filename_list.append(composite_h5_filename) block_filename_list = [ os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_alignment.json') for dir in dirs ] block_filename_list = [ filename for filename in block_filename_list if os.path.exists(filename) ] composite_filename = os.path.join( ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json') ionstats.reduce_stats(block_filename_list, composite_filename) block_h5_filename_list = [ os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') for dir in dirs ] block_h5_filename_list = [ filename for filename in block_h5_filename_list if os.path.exists(filename) ] composite_filename = os.path.join( ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') # composite_allblocks if len(block_h5_filename_list): ionstats.reduce_stats_h5(block_h5_filename_list, composite_filename) # DEBUG: this is used to check if merging is commutative, the length check is necessary in case all datasets are 'filtered' (e.g.) if len(composite_filename_list) > 0: ionstats.reduce_stats( composite_filename_list, os.path.join(ALIGNMENT_RESULTS, 'composite_allbarcodes_ionstats_alignment.json')) if len(composite_h5_filename_list) > 0: ionstats.reduce_stats_h5( composite_h5_filename_list, os.path.join( ALIGNMENT_RESULTS, 'composite_allbarcodes_ionstats_error_summary.h5')) except: printtime("ERROR: Failed to merge ionstats_alignment.json") traceback.print_exc()
def create_ionstats(BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter, evaluate_hp): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] if evaluate_hp: ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get( 'filtered', False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append( os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam')) ionstats.generate_ionstats_alignment( [ os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam') ], os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json'), os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) ionstats_alignment_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json')) if evaluate_hp: ionstats_alignment_h5_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append( os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json'), graph_max_x) ionstats_basecaller_file_list.append( os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats( ionstats_alignment_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats( ionstats_basecaller_file_list, os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json')) if evaluate_hp and len(ionstats_alignment_h5_file_list ) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5( ionstats_alignment_h5_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')) '''
def merge_alignment_stats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows): datasets_basecaller = {} try: f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r') datasets_basecaller = json.load(f) f.close() except: printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")) traceback.print_exc() return try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 800 ######################################################## # Merge ionstats_alignment.json # First across blocks, then across barcoded ######################################################## try: composite_filename_list = [] for dataset in datasets_basecaller["datasets"]: composite_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json') barcode_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,dataset['file_prefix']+'.ionstats_alignment.json') for dir in dirs] barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)] ionstats.reduce_stats(barcode_filename_list,composite_filename) if os.path.exists(composite_filename): composite_filename_list.append(composite_filename) ionstats.reduce_stats(composite_filename_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')) except: printtime("ERROR: Failed to merge ionstats_alignment.json") traceback.print_exc() # Use ionstats alignment results to generate plots ionstats_plots.alignment_rate_plot2( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'alignment_rate_plot.png', graph_max_x) ionstats_plots.base_error_plot( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'base_error_plot.png', graph_max_x) ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q10.png', 'AQ10', 'red') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q17.png', 'AQ17', 'yellow') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q20.png', 'AQ20', 'green') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q47.png', 'AQ47', 'purple') # Generate alignment_barcode_summary.csv barcodelist_path = 'barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../../../barcodeList.txt' if os.path.exists(barcodelist_path): printtime("Barcode processing, aggregate") aggregate_alignment ("./",barcodelist_path)
def alignment_post_processing( libraryName, BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows, mark_duplicates): datasets_basecaller = {} try: f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r') datasets_basecaller = json.load(f) f.close() except: printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")) traceback.print_exc() return try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 800 alignment_file_list = [] for dataset in datasets_basecaller["datasets"]: if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])): continue ionstats.generate_ionstats_alignment( os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), graph_max_x) ionstats2alignstats(libraryName, os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.alignment.summary')) alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')) # In Progress: merge ionstats alignment results ionstats.reduce_stats(alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')) ionstats2alignstats(libraryName, os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS,'alignment.summary')) # Special legacy post-processing. # Generate merged rawlib.bam on barcoded runs composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam') if not os.path.exists(composite_bam_filename): bam_file_list = [] for dataset in datasets_basecaller["datasets"]: bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam') if os.path.exists(bam_name): bam_file_list.append(bam_name) blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates) # Generate alignment_barcode_summary.csv #TODO: use datasets_basecaller.json + *.ionstats_alignment.json instead of barcodeList.txt and alignment_*.summary barcodelist_path = 'barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../../../barcodeList.txt' if os.path.exists(barcodelist_path): printtime("Barcode processing, aggregate") aggregate_alignment ("./",barcodelist_path) # These graphs are likely obsolete #makeAlignGraphs() # In Progress: Use ionstats alignment results to generate plots ionstats_plots.alignment_rate_plot2( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'alignment_rate_plot.png', graph_max_x) ionstats_plots.base_error_plot( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'base_error_plot.png', graph_max_x) ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q10.png', 'AQ10', 'red') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q17.png', 'AQ17', 'yellow') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q20.png', 'AQ20', 'green') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q47.png', 'AQ47', 'purple')
BASECALLER_RESULTS = 'basecaller_results' ionstats_file = 'ionstats_basecaller.json' file_list = [] for filepath in args.files: ionstats_path = os.path.join(os.path.dirname(filepath), BASECALLER_RESULTS, ionstats_file) ionstats_path_CA = os.path.join(os.path.dirname(filepath), ionstats_file) if os.path.exists(ionstats_path): file_list.append(ionstats_path) elif os.path.exists(ionstats_path_CA): file_list.append(ionstats_path_CA) else: raise Exception('') ionstats.reduce_stats(file_list, ionstats_file) # Make alignment_rate_plot.png stats = json.load(open(ionstats_file)) l = stats['full']['max_read_length'] graph_max_x = int(round(l + 49, -2)) ionstats_plots.alignment_rate_plot('alignStats_err.json', 'ionstats_basecaller.json', 'alignment_rate_plot.png', int(graph_max_x)) print("Ionstats plot created successfully") except: print("ERROR: Failed to generate alignment rate plot") try:
def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets): # Merge *ionstats_alignment.json files across blocks # DEBUG: check if merging is commutative try: # DEBUG composite_filename_list = [] composite_h5_filename_list = [] for dataset in basecaller_datasets["datasets"]: # filter out based on flag keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get('filtered',False): keep_dataset = True if not keep_dataset: printtime("INFO: filter out %s" % rg_name) continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference: ionstats_folder = ALIGNMENT_RESULTS ionstats_file = 'ionstats_alignment.json' else: ionstats_folder = BASECALLER_RESULTS ionstats_file = 'ionstats_basecaller.json' block_filename_list = [os.path.join(dir,ionstats_folder,dataset['file_prefix']+'.'+ionstats_file) for dir in dirs] block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)] # TODO, remove this check and provide list with valid blocks composite_filename = os.path.join(ionstats_folder, dataset['file_prefix']+'.composite_allblocks_'+ionstats_file) ionstats.reduce_stats(block_filename_list, composite_filename) composite_filename_list.append(composite_filename) if reference: block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,dataset['file_prefix']+'.ionstats_error_summary.h5') for dir in dirs] block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)] # TODO, remove this check and provide list with valid blocks composite_h5_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5') ionstats.reduce_stats_h5(block_h5_filename_list, composite_h5_filename) composite_h5_filename_list.append(composite_h5_filename) block_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_alignment.json') for dir in dirs] block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)] composite_filename = os.path.join(ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json') ionstats.reduce_stats(block_filename_list, composite_filename) block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_error_summary.h5') for dir in dirs] block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)] composite_filename = os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') # composite_allblocks if len(block_h5_filename_list): ionstats.reduce_stats_h5(block_h5_filename_list, composite_filename) # DEBUG: this is used to check if merging is commutative, the length check is necessary in case all datasets are 'filtered' (e.g.) if len(composite_filename_list) > 0: ionstats.reduce_stats(composite_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_alignment.json')) if len(composite_h5_filename_list) > 0: ionstats.reduce_stats_h5(composite_h5_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_error_summary.h5')) except: printtime("ERROR: Failed to merge ionstats_alignment.json") traceback.print_exc()
def create_ionstats( BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get('filtered',False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')) ionstats.generate_ionstats_alignment( [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')], os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')) ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5')) '''