def create_ionstats(BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter, evaluate_hp): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] if evaluate_hp: ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get( 'filtered', False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append( os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam')) ionstats.generate_ionstats_alignment( [ os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam') ], os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json'), os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) ionstats_alignment_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json')) if evaluate_hp: ionstats_alignment_h5_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append( os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json'), graph_max_x) ionstats_basecaller_file_list.append( os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats( ionstats_alignment_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats( ionstats_basecaller_file_list, os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json')) if evaluate_hp and len(ionstats_alignment_h5_file_list ) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5( ionstats_alignment_h5_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')) '''
def post_basecalling(BASECALLER_RESULTS,expName,resultsName,flows): datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json") if not os.path.exists(datasets_basecaller_path): printtime("ERROR: %s does not exist" % datasets_basecaller_path) raise Exception("ERROR: %s does not exist" % datasets_basecaller_path) datasets_basecaller = {} try: f = open(datasets_basecaller_path,'r') datasets_basecaller = json.load(f); f.close() except: printtime("ERROR: problem parsing %s" % datasets_basecaller_path) raise Exception("ERROR: problem parsing %s" % datasets_basecaller_path) try: graph_max_x = int(50 * math.ceil(0.014 * int(flows))) except: graph_max_x = 400 quality_file_list = [] for dataset in datasets_basecaller["datasets"]: if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])): continue # Call ionstats utility to generate alignment-independent metrics for current unmapped BAM ionstats.generate_ionstats_basecaller( os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), graph_max_x) # Plot read length sparkline ionstats_plots.read_length_sparkline( os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'), graph_max_x) quality_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')) # Merge ionstats_basecaller files from individual barcodes/dataset ionstats.reduce_stats(quality_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) # Generate legacy stats file: quality.summary ionstats.generate_legacy_basecaller_files( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'')) # Plot classic read length histogram ionstats_plots.old_read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto.png'), graph_max_x) # Plot new read length histogram ionstats_plots.read_length_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'), graph_max_x) # Plot quality value histogram ionstats_plots.quality_histogram( os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS,'quality_histogram.png')) printtime("Finished basecaller post processing")
def create_ionstats( BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get('filtered',False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')) ionstats.generate_ionstats_alignment( [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')], os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')) ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5')) '''