args = parser.parse_args() if args.merge_out and len(args.files) > 1: # Merge BAM files outputBAM = args.merge_out print "Merging bam files to %s, mark duplicates is %s" % (outputBAM, args.duplicates) merge_bam_files(args.files, outputBAM, outputBAM.replace('.bam','.bam.bai'), args.duplicates) # generate ionstats files from merged BAM graph_max_x = 400 if outputBAM == 'rawlib.bam': ionstats_file = 'ionstats_alignment.json' else: ionstats_file = outputBAM.split('.bam')[0] + '.ionstats_alignment.json' ionstats.generate_ionstats_alignment(outputBAM, ionstats_file, graph_max_x) if args.merge_plots: print "Generating plots for merged report" ionstats_file = 'ionstats_alignment.json' try: stats = json.load(open(ionstats_file)) l = stats['full']['max_read_length'] graph_max_x = int(round(l + 49, -2)) # Make alignment_rate_plot.png and base_error_plot.png ionstats_plots.alignment_rate_plot2(ionstats_file, 'alignment_rate_plot.png', int(graph_max_x)) ionstats_plots.base_error_plot(ionstats_file, 'base_error_plot.png', int(graph_max_x)) except: traceback.print_exc()
def create_ionstats(BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter, evaluate_hp): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] if evaluate_hp: ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get( 'filtered', False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append( os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam')) ionstats.generate_ionstats_alignment( [ os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam') ], os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json'), os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) ionstats_alignment_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json')) if evaluate_hp: ionstats_alignment_h5_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append( os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json'), graph_max_x) ionstats_basecaller_file_list.append( os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats( ionstats_alignment_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats( ionstats_basecaller_file_list, os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json')) if evaluate_hp and len(ionstats_alignment_h5_file_list ) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5( ionstats_alignment_h5_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')) '''
def alignment_post_processing( libraryName, BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows, mark_duplicates): datasets_basecaller = {} try: f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r') datasets_basecaller = json.load(f) f.close() except: printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")) traceback.print_exc() return try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 800 alignment_file_list = [] for dataset in datasets_basecaller["datasets"]: if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])): continue ionstats.generate_ionstats_alignment( os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), graph_max_x) ionstats2alignstats(libraryName, os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.alignment.summary')) alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')) # In Progress: merge ionstats alignment results ionstats.reduce_stats(alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')) ionstats2alignstats(libraryName, os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS,'alignment.summary')) # Special legacy post-processing. # Generate merged rawlib.bam on barcoded runs composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam') if not os.path.exists(composite_bam_filename): bam_file_list = [] for dataset in datasets_basecaller["datasets"]: bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam') if os.path.exists(bam_name): bam_file_list.append(bam_name) blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates) # Generate alignment_barcode_summary.csv #TODO: use datasets_basecaller.json + *.ionstats_alignment.json instead of barcodeList.txt and alignment_*.summary barcodelist_path = 'barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../../barcodeList.txt' if not os.path.exists(barcodelist_path): barcodelist_path = '../../../../barcodeList.txt' if os.path.exists(barcodelist_path): printtime("Barcode processing, aggregate") aggregate_alignment ("./",barcodelist_path) # These graphs are likely obsolete #makeAlignGraphs() # In Progress: Use ionstats alignment results to generate plots ionstats_plots.alignment_rate_plot2( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'alignment_rate_plot.png', graph_max_x) ionstats_plots.base_error_plot( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'base_error_plot.png', graph_max_x) ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q10.png', 'AQ10', 'red') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q17.png', 'AQ17', 'yellow') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q20.png', 'AQ20', 'green') ionstats_plots.old_aq_length_histogram( os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 'Filtered_Alignments_Q47.png', 'AQ47', 'purple')
printtime("Merging bam files to %s, mark duplicates is %s" % (outputBAM, args.duplicates)) try: merge_bam_files(args.files, outputBAM, outputBAM.replace('.bam','.bam.bai'), args.duplicates) except: traceback.print_exc() if args.align_stats and len(args.files) > 0: # generate ionstats files from merged BAMs printtime("Generating alignment stats for %s" % ', '.join(args.files)) graph_max_x = 400 for bamfile in args.files: if bamfile == 'rawlib.bam': ionstats_file = 'ionstats_alignment.json' else: ionstats_file = bamfile.split('.bam')[0] + '.ionstats_alignment.json' ionstats.generate_ionstats_alignment(bamfile, ionstats_file, graph_max_x) if args.merge_plots: printtime("Generating plots for merged report") ionstats_file = 'ionstats_alignment.json' try: stats = json.load(open(ionstats_file)) l = stats['full']['max_read_length'] graph_max_x = int(round(l + 49, -2)) # Make alignment_rate_plot.png and base_error_plot.png ionstats_plots.alignment_rate_plot2(ionstats_file, 'alignment_rate_plot.png', int(graph_max_x)) ionstats_plots.base_error_plot(ionstats_file, 'base_error_plot.png', int(graph_max_x)) except: traceback.print_exc()
print "Merging bam files to %s, mark duplicates is %s" % ( outputBAM, args.duplicates) try: merge_bam_files(args.files, outputBAM, outputBAM.replace('.bam', '.bam.bai'), args.duplicates) except: traceback.print_exc() # generate ionstats files from merged BAM graph_max_x = 400 if outputBAM == 'rawlib.bam': ionstats_file = 'ionstats_alignment.json' else: ionstats_file = outputBAM.split( '.bam')[0] + '.ionstats_alignment.json' ionstats.generate_ionstats_alignment(outputBAM, ionstats_file, graph_max_x) if args.merge_plots: print "Generating plots for merged report" ionstats_file = 'ionstats_alignment.json' try: stats = json.load(open(ionstats_file)) l = stats['full']['max_read_length'] graph_max_x = int(round(l + 49, -2)) # Make alignment_rate_plot.png and base_error_plot.png ionstats_plots.alignment_rate_plot2(ionstats_file, 'alignment_rate_plot.png', int(graph_max_x)) ionstats_plots.base_error_plot(ionstats_file,
def create_ionstats( BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get('filtered',False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')) ionstats.generate_ionstats_alignment( [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')], os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')) ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5')) '''