def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets): # Merge *ionstats_alignment.json files across blocks # DEBUG: check if merging is commutative try: # DEBUG composite_filename_list = [] composite_h5_filename_list = [] for dataset in basecaller_datasets["datasets"]: # filter out based on flag keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get( 'filtered', False): keep_dataset = True if not keep_dataset: printtime("INFO: filter out %s" % rg_name) continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group][ 'reference'] if reference: ionstats_folder = ALIGNMENT_RESULTS ionstats_file = 'ionstats_alignment.json' else: ionstats_folder = BASECALLER_RESULTS ionstats_file = 'ionstats_basecaller.json' block_filename_list = [ os.path.join(dir, ionstats_folder, dataset['file_prefix'] + '.' + ionstats_file) for dir in dirs ] block_filename_list = [ filename for filename in block_filename_list if os.path.exists(filename) ] # TODO, remove this check and provide list with valid blocks composite_filename = os.path.join( ionstats_folder, dataset['file_prefix'] + '.composite_allblocks_' + ionstats_file) ionstats.reduce_stats(block_filename_list, composite_filename) composite_filename_list.append(composite_filename) if reference: block_h5_filename_list = [ os.path.join( dir, ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') for dir in dirs ] block_h5_filename_list = [ filename for filename in block_h5_filename_list if os.path.exists(filename) ] # TODO, remove this check and provide list with valid blocks composite_h5_filename = os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') ionstats.reduce_stats_h5(block_h5_filename_list, composite_h5_filename) composite_h5_filename_list.append(composite_h5_filename) block_filename_list = [ os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_alignment.json') for dir in dirs ] block_filename_list = [ filename for filename in block_filename_list if os.path.exists(filename) ] composite_filename = os.path.join( ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json') ionstats.reduce_stats(block_filename_list, composite_filename) block_h5_filename_list = [ os.path.join(dir, ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') for dir in dirs ] block_h5_filename_list = [ filename for filename in block_h5_filename_list if os.path.exists(filename) ] composite_filename = os.path.join( ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') # composite_allblocks if len(block_h5_filename_list): ionstats.reduce_stats_h5(block_h5_filename_list, composite_filename) # DEBUG: this is used to check if merging is commutative, the length check is necessary in case all datasets are 'filtered' (e.g.) if len(composite_filename_list) > 0: ionstats.reduce_stats( composite_filename_list, os.path.join(ALIGNMENT_RESULTS, 'composite_allbarcodes_ionstats_alignment.json')) if len(composite_h5_filename_list) > 0: ionstats.reduce_stats_h5( composite_h5_filename_list, os.path.join( ALIGNMENT_RESULTS, 'composite_allbarcodes_ionstats_error_summary.h5')) except: printtime("ERROR: Failed to merge ionstats_alignment.json") traceback.print_exc()
def merge_ionstats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_datasets): # Merge *ionstats_alignment.json files across blocks # DEBUG: check if merging is commutative try: # DEBUG composite_filename_list = [] composite_h5_filename_list = [] for dataset in basecaller_datasets["datasets"]: # filter out based on flag keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get('filtered',False): keep_dataset = True if not keep_dataset: printtime("INFO: filter out %s" % rg_name) continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference: ionstats_folder = ALIGNMENT_RESULTS ionstats_file = 'ionstats_alignment.json' else: ionstats_folder = BASECALLER_RESULTS ionstats_file = 'ionstats_basecaller.json' block_filename_list = [os.path.join(dir,ionstats_folder,dataset['file_prefix']+'.'+ionstats_file) for dir in dirs] block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)] # TODO, remove this check and provide list with valid blocks composite_filename = os.path.join(ionstats_folder, dataset['file_prefix']+'.composite_allblocks_'+ionstats_file) ionstats.reduce_stats(block_filename_list, composite_filename) composite_filename_list.append(composite_filename) if reference: block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,dataset['file_prefix']+'.ionstats_error_summary.h5') for dir in dirs] block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)] # TODO, remove this check and provide list with valid blocks composite_h5_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5') ionstats.reduce_stats_h5(block_h5_filename_list, composite_h5_filename) composite_h5_filename_list.append(composite_h5_filename) block_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_alignment.json') for dir in dirs] block_filename_list = [filename for filename in block_filename_list if os.path.exists(filename)] composite_filename = os.path.join(ALIGNMENT_RESULTS, 'composite_allblocks_ionstats_alignment.json') ionstats.reduce_stats(block_filename_list, composite_filename) block_h5_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,'ionstats_error_summary.h5') for dir in dirs] block_h5_filename_list = [filename for filename in block_h5_filename_list if os.path.exists(filename)] composite_filename = os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') # composite_allblocks if len(block_h5_filename_list): ionstats.reduce_stats_h5(block_h5_filename_list, composite_filename) # DEBUG: this is used to check if merging is commutative, the length check is necessary in case all datasets are 'filtered' (e.g.) if len(composite_filename_list) > 0: ionstats.reduce_stats(composite_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_alignment.json')) if len(composite_h5_filename_list) > 0: ionstats.reduce_stats_h5(composite_h5_filename_list,os.path.join(ALIGNMENT_RESULTS,'composite_allbarcodes_ionstats_error_summary.h5')) except: printtime("ERROR: Failed to merge ionstats_alignment.json") traceback.print_exc()
def create_ionstats(BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter, evaluate_hp): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] if evaluate_hp: ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get( 'filtered', False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append( os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam')) ionstats.generate_ionstats_alignment( [ os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.bam') ], os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json'), os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) ionstats_alignment_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_alignment.json')) if evaluate_hp: ionstats_alignment_h5_file_list.append( os.path.join( ALIGNMENT_RESULTS, dataset['file_prefix'] + '.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append( os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json'), graph_max_x) ionstats_basecaller_file_list.append( os.path.join( BASECALLER_RESULTS, dataset['file_prefix'] + '.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats( ionstats_alignment_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5') if evaluate_hp else None, basecaller_meta_information if evaluate_hp else None, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats( ionstats_basecaller_file_list, os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd, shell=True) if ret != 1: printtime( "ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json')) if evaluate_hp and len(ionstats_alignment_h5_file_list ) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5( ionstats_alignment_h5_file_list, os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')) '''
def create_ionstats( BASECALLER_RESULTS, ALIGNMENT_RESULTS, basecaller_meta_information, basecaller_datasets, graph_max_x, activate_barcode_filter): # TEST basecaller_bam_file_list = [] alignment_bam_file_list = [] ionstats_alignment_file_list = [] ionstats_alignment_h5_file_list = [] ionstats_basecaller_file_list = [] for dataset in basecaller_datasets["datasets"]: keep_dataset = False for rg_name in dataset["read_groups"]: if not basecaller_datasets["read_groups"][rg_name].get('filtered',False): keep_dataset = True filtered = not keep_dataset # filter out based on flag if activate_barcode_filter: if filtered: continue # skip non-existing bam file if int(dataset["read_count"]) == 0: continue read_group = dataset['read_groups'][0] reference = basecaller_datasets['read_groups'][read_group]['reference'] if reference and not filtered: # TEST alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')) ionstats.generate_ionstats_alignment( [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')], os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')) ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5')) else: # TEST basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])) ionstats.generate_ionstats_basecaller( [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])], os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'), os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')) # Merge ionstats files from individual (barcoded) datasets if len(ionstats_alignment_file_list) > 0: ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_alignment( ['empty_dummy.bam'], os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'), os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'), basecaller_meta_information, graph_max_x) except: pass if len(ionstats_basecaller_file_list) > 0: ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')) else: # barcode classification filtered all barcodes or no reads available # TODO: ionstats needs to produce initial json file try: #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" cmd = "echo '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam" printtime("DEBUG: Calling '%s':" % cmd) ret = subprocess.call(cmd,shell=True) if ret != 1: printtime("ERROR: empty bam file generation failed, return code: %d" % ret) raise RuntimeError('exit code: %d' % ret) ionstats.generate_ionstats_basecaller( ['empty_dummy.bam'], os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'), os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed basecaller_meta_information, graph_max_x) except: pass ionstatslist = [] a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json') b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json') if os.path.exists(a): ionstatslist.append(a) if os.path.exists(b): ionstatslist.append(b) if len(ionstatslist) > 0: ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json')) ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json')) if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information: ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5')) '''