def parse_aligner_contig_report(report_fpath): aligned_blocks = [] misassembled_id_to_structure = defaultdict(list) with open(report_fpath) as report_file: contig_id = None start_col = None end_col = None ref_col = None contig_col = None ambig_col = None best_col = None for i, line in enumerate(report_file): split_line = line.replace('\n', '').split('\t') if i == 0: start_col = split_line.index('S1') end_col = split_line.index('E1') ref_col = split_line.index('Reference') contig_col = split_line.index('Contig') idy_col = split_line.index('IDY') ambig_col = split_line.index('Ambiguous') best_col = split_line.index('Best_group') elif split_line and split_line[0] == 'CONTIG': continue elif split_line and len(split_line) < 5: misassembled_id_to_structure[contig_id].append(line.strip()) elif split_line and len(split_line) > 5: start, end, ref_name, contig_id, ambiguity, is_best = int(split_line[start_col]), int(split_line[end_col]), \ split_line[ref_col], split_line[contig_col], \ split_line[ambig_col], split_line[best_col] block = Alignment(name=contig_id, start=start, end=end, ref_name=ref_name, is_best_set=is_best == 'True') block.ambiguous = ambiguity if block.is_best_set: aligned_blocks.append(block) misassembled_id_to_structure[contig_id].append(block) return aligned_blocks, misassembled_id_to_structure
def parse_nucmer_contig_report(report_fpath): aligned_blocks = [] misassembled_id_to_structure = defaultdict(list) with open(report_fpath) as report_file: contig_id = None start_col = None end_col = None ref_col = None contig_col = None ambig_col = None best_col = None for i, line in enumerate(report_file): split_line = line.replace('\n', '').split('\t') if i == 0: start_col = split_line.index('S1') end_col = split_line.index('E1') ref_col = split_line.index('Reference') contig_col = split_line.index('Contig') idy_col = split_line.index('IDY') ambig_col = split_line.index('Ambiguous') best_col = split_line.index('Best_group') elif split_line and split_line[0] == 'CONTIG': continue elif split_line and len(split_line) < 5: misassembled_id_to_structure[contig_id].append(line.strip()) elif split_line and len(split_line) > 5: start, end, ref_name, contig_id, ambiguity, is_best = int(split_line[start_col]), int(split_line[end_col]), \ split_line[ref_col], split_line[contig_col], \ split_line[ambig_col], split_line[best_col] block = Alignment(name=contig_id, start=start, end=end, ref_name=ref_name, is_best_set=is_best == 'True') block.ambiguous = ambiguity if block.is_best_set: aligned_blocks.append(block) misassembled_id_to_structure[contig_id].append(block) return aligned_blocks, misassembled_id_to_structure
def parse_nucmer_contig_report(report_fpath, ref_names, cumulative_ref_lengths): aligned_blocks = [] contigs = [] with open(report_fpath) as report_file: misassembled_id_to_structure = defaultdict(list) ambiguity_alignments = defaultdict(list) contig_id = None start_col = None end_col = None start_in_contig_col = None end_in_contig_col = None ref_col = None contig_col = None idy_col = None ambig_col = None best_col = None for i, line in enumerate(report_file): split_line = line.replace('\n', '').split('\t') if i == 0: start_col = split_line.index('S1') end_col = split_line.index('E1') start_in_contig_col = split_line.index('S2') end_in_contig_col = split_line.index('E2') ref_col = split_line.index('Reference') contig_col = split_line.index('Contig') idy_col = split_line.index('IDY') ambig_col = split_line.index('Ambiguous') best_col = split_line.index('Best_group') elif split_line and split_line[0] == 'CONTIG': _, name, size, contig_type = split_line contig = Contig(name=name, size=int(size), contig_type=contig_type) contigs.append(contig) elif split_line and len(split_line) < 5: misassembled_id_to_structure[contig_id].append(line.strip()) elif split_line and len(split_line) > 5: unshifted_start, unshifted_end, start_in_contig, end_in_contig, ref_name, contig_id, idy, ambiguity, is_best = \ split_line[start_col], split_line[end_col], split_line[start_in_contig_col], split_line[end_in_contig_col], \ split_line[ref_col], split_line[contig_col], split_line[idy_col], split_line[ambig_col], split_line[best_col] unshifted_start, unshifted_end, start_in_contig, end_in_contig = int(unshifted_start), int(unshifted_end),\ int(start_in_contig), int(end_in_contig) cur_shift = cumulative_ref_lengths[ref_names.index(ref_name)] start = unshifted_start + cur_shift end = unshifted_end + cur_shift is_rc = ((start - end) * (start_in_contig - end_in_contig)) < 0 position_in_ref = unshifted_start block = Alignment(name=contig_id, start=start, end=end, unshifted_start=unshifted_start, unshifted_end=unshifted_end, is_rc=is_rc, start_in_contig=start_in_contig, end_in_contig=end_in_contig, position_in_ref=position_in_ref, ref_name=ref_name, idy=idy, is_best_set=is_best == 'True') block.ambiguous = ambiguity if block.is_best_set: misassembled_id_to_structure[contig_id].append(block) else: ambiguity_alignments[contig_id].append(block) aligned_blocks.append(block) return aligned_blocks, misassembled_id_to_structure, contigs, ambiguity_alignments
def parse_nucmer_contig_report(report_fpath, ref_names, cumulative_ref_lengths): aligned_blocks = [] contigs = [] with open(report_fpath) as report_file: misassembled_id_to_structure = defaultdict(list) ambiguity_alignments = defaultdict(list) contig_id = None start_col = None end_col = None start_in_contig_col = None end_in_contig_col = None ref_col = None contig_col = None idy_col = None ambig_col = None best_col = None for i, line in enumerate(report_file): split_line = line.replace('\n', '').split('\t') if i == 0: start_col = split_line.index('S1') end_col = split_line.index('E1') start_in_contig_col = split_line.index('S2') end_in_contig_col = split_line.index('E2') ref_col = split_line.index('Reference') contig_col = split_line.index('Contig') idy_col = split_line.index('IDY') ambig_col = split_line.index('Ambiguous') best_col = split_line.index('Best_group') elif split_line and split_line[0] == 'CONTIG': _, name, size, contig_type = split_line contig = Contig(name=name, size=int(size), contig_type=contig_type) contigs.append(contig) elif split_line and len(split_line) < 5: misassembled_id_to_structure[contig_id].append(line.strip()) elif split_line and len(split_line) > 5: unshifted_start, unshifted_end, start_in_contig, end_in_contig, ref_name, contig_id, idy, ambiguity, is_best = \ split_line[start_col], split_line[end_col], split_line[start_in_contig_col], split_line[end_in_contig_col], \ split_line[ref_col], split_line[contig_col], split_line[idy_col], split_line[ambig_col], split_line[best_col] unshifted_start, unshifted_end, start_in_contig, end_in_contig = int(unshifted_start), int(unshifted_end),\ int(start_in_contig), int(end_in_contig) cur_shift = cumulative_ref_lengths[ref_names.index(ref_name)] or 1 start = unshifted_start + cur_shift - 1 end = unshifted_end + cur_shift - 1 is_rc = ((start - end) * (start_in_contig - end_in_contig)) < 0 position_in_ref = unshifted_start block = Alignment( name=contig_id, start=start, end=end, unshifted_start=unshifted_start, unshifted_end=unshifted_end, is_rc=is_rc, start_in_contig=start_in_contig, end_in_contig=end_in_contig, position_in_ref=position_in_ref, ref_name=ref_name, idy=idy, is_best_set=is_best == 'True') block.ambiguous = ambiguity if block.is_best_set: misassembled_id_to_structure[contig_id].append(block) else: ambiguity_alignments[contig_id].append(block) aligned_blocks.append(block) return aligned_blocks, misassembled_id_to_structure, contigs, ambiguity_alignments