コード例 #1
0
ファイル: circos.py プロジェクト: e4ong1031/MDHHS_TB_WGS
def parse_aligner_contig_report(report_fpath):
    aligned_blocks = []
    misassembled_id_to_structure = defaultdict(list)

    with open(report_fpath) as report_file:
        contig_id = None

        start_col = None
        end_col = None
        ref_col = None
        contig_col = None
        ambig_col = None
        best_col = None
        for i, line in enumerate(report_file):
            split_line = line.replace('\n', '').split('\t')
            if i == 0:
                start_col = split_line.index('S1')
                end_col = split_line.index('E1')
                ref_col = split_line.index('Reference')
                contig_col = split_line.index('Contig')
                idy_col = split_line.index('IDY')
                ambig_col = split_line.index('Ambiguous')
                best_col = split_line.index('Best_group')
            elif split_line and split_line[0] == 'CONTIG':
                continue
            elif split_line and len(split_line) < 5:
                misassembled_id_to_structure[contig_id].append(line.strip())
            elif split_line and len(split_line) > 5:
                start, end, ref_name, contig_id, ambiguity, is_best = int(split_line[start_col]), int(split_line[end_col]), \
                                                           split_line[ref_col], split_line[contig_col], \
                                                           split_line[ambig_col], split_line[best_col]
                block = Alignment(name=contig_id,
                                  start=start,
                                  end=end,
                                  ref_name=ref_name,
                                  is_best_set=is_best == 'True')
                block.ambiguous = ambiguity
                if block.is_best_set:
                    aligned_blocks.append(block)
                    misassembled_id_to_structure[contig_id].append(block)

    return aligned_blocks, misassembled_id_to_structure
コード例 #2
0
ファイル: circos.py プロジェクト: student-t/quast
def parse_nucmer_contig_report(report_fpath):
    aligned_blocks = []
    misassembled_id_to_structure = defaultdict(list)

    with open(report_fpath) as report_file:
        contig_id = None

        start_col = None
        end_col = None
        ref_col = None
        contig_col = None
        ambig_col = None
        best_col = None
        for i, line in enumerate(report_file):
            split_line = line.replace('\n', '').split('\t')
            if i == 0:
                start_col = split_line.index('S1')
                end_col = split_line.index('E1')
                ref_col = split_line.index('Reference')
                contig_col = split_line.index('Contig')
                idy_col = split_line.index('IDY')
                ambig_col = split_line.index('Ambiguous')
                best_col = split_line.index('Best_group')
            elif split_line and split_line[0] == 'CONTIG':
                continue
            elif split_line and len(split_line) < 5:
                misassembled_id_to_structure[contig_id].append(line.strip())
            elif split_line and len(split_line) > 5:
                start, end, ref_name, contig_id, ambiguity, is_best = int(split_line[start_col]), int(split_line[end_col]), \
                                                           split_line[ref_col], split_line[contig_col], \
                                                           split_line[ambig_col], split_line[best_col]
                block = Alignment(name=contig_id, start=start, end=end, ref_name=ref_name, is_best_set=is_best == 'True')
                block.ambiguous = ambiguity
                if block.is_best_set:
                    aligned_blocks.append(block)
                    misassembled_id_to_structure[contig_id].append(block)

    return aligned_blocks, misassembled_id_to_structure
コード例 #3
0
ファイル: icarus_parser.py プロジェクト: lucian-ilie/LASER
def parse_nucmer_contig_report(report_fpath, ref_names,
                               cumulative_ref_lengths):
    aligned_blocks = []
    contigs = []

    with open(report_fpath) as report_file:
        misassembled_id_to_structure = defaultdict(list)
        ambiguity_alignments = defaultdict(list)
        contig_id = None

        start_col = None
        end_col = None
        start_in_contig_col = None
        end_in_contig_col = None
        ref_col = None
        contig_col = None
        idy_col = None
        ambig_col = None
        best_col = None
        for i, line in enumerate(report_file):
            split_line = line.replace('\n', '').split('\t')
            if i == 0:
                start_col = split_line.index('S1')
                end_col = split_line.index('E1')
                start_in_contig_col = split_line.index('S2')
                end_in_contig_col = split_line.index('E2')
                ref_col = split_line.index('Reference')
                contig_col = split_line.index('Contig')
                idy_col = split_line.index('IDY')
                ambig_col = split_line.index('Ambiguous')
                best_col = split_line.index('Best_group')
            elif split_line and split_line[0] == 'CONTIG':
                _, name, size, contig_type = split_line
                contig = Contig(name=name,
                                size=int(size),
                                contig_type=contig_type)
                contigs.append(contig)
            elif split_line and len(split_line) < 5:
                misassembled_id_to_structure[contig_id].append(line.strip())
            elif split_line and len(split_line) > 5:
                unshifted_start, unshifted_end, start_in_contig, end_in_contig, ref_name, contig_id, idy, ambiguity, is_best = \
                    split_line[start_col], split_line[end_col], split_line[start_in_contig_col], split_line[end_in_contig_col], \
                    split_line[ref_col], split_line[contig_col], split_line[idy_col], split_line[ambig_col], split_line[best_col]
                unshifted_start, unshifted_end, start_in_contig, end_in_contig = int(unshifted_start), int(unshifted_end),\
                                                                                 int(start_in_contig), int(end_in_contig)
                cur_shift = cumulative_ref_lengths[ref_names.index(ref_name)]
                start = unshifted_start + cur_shift
                end = unshifted_end + cur_shift

                is_rc = ((start - end) * (start_in_contig - end_in_contig)) < 0
                position_in_ref = unshifted_start
                block = Alignment(name=contig_id,
                                  start=start,
                                  end=end,
                                  unshifted_start=unshifted_start,
                                  unshifted_end=unshifted_end,
                                  is_rc=is_rc,
                                  start_in_contig=start_in_contig,
                                  end_in_contig=end_in_contig,
                                  position_in_ref=position_in_ref,
                                  ref_name=ref_name,
                                  idy=idy,
                                  is_best_set=is_best == 'True')
                block.ambiguous = ambiguity
                if block.is_best_set:
                    misassembled_id_to_structure[contig_id].append(block)
                else:
                    ambiguity_alignments[contig_id].append(block)

                aligned_blocks.append(block)

    return aligned_blocks, misassembled_id_to_structure, contigs, ambiguity_alignments
コード例 #4
0
ファイル: icarus_parser.py プロジェクト: student-t/quast
def parse_nucmer_contig_report(report_fpath, ref_names, cumulative_ref_lengths):
    aligned_blocks = []
    contigs = []

    with open(report_fpath) as report_file:
        misassembled_id_to_structure = defaultdict(list)
        ambiguity_alignments = defaultdict(list)
        contig_id = None

        start_col = None
        end_col = None
        start_in_contig_col = None
        end_in_contig_col = None
        ref_col = None
        contig_col = None
        idy_col = None
        ambig_col = None
        best_col = None
        for i, line in enumerate(report_file):
            split_line = line.replace('\n', '').split('\t')
            if i == 0:
                start_col = split_line.index('S1')
                end_col = split_line.index('E1')
                start_in_contig_col = split_line.index('S2')
                end_in_contig_col = split_line.index('E2')
                ref_col = split_line.index('Reference')
                contig_col = split_line.index('Contig')
                idy_col = split_line.index('IDY')
                ambig_col = split_line.index('Ambiguous')
                best_col = split_line.index('Best_group')
            elif split_line and split_line[0] == 'CONTIG':
                _, name, size, contig_type = split_line
                contig = Contig(name=name, size=int(size), contig_type=contig_type)
                contigs.append(contig)
            elif split_line and len(split_line) < 5:
                misassembled_id_to_structure[contig_id].append(line.strip())
            elif split_line and len(split_line) > 5:
                unshifted_start, unshifted_end, start_in_contig, end_in_contig, ref_name, contig_id, idy, ambiguity, is_best = \
                    split_line[start_col], split_line[end_col], split_line[start_in_contig_col], split_line[end_in_contig_col], \
                    split_line[ref_col], split_line[contig_col], split_line[idy_col], split_line[ambig_col], split_line[best_col]
                unshifted_start, unshifted_end, start_in_contig, end_in_contig = int(unshifted_start), int(unshifted_end),\
                                                                                 int(start_in_contig), int(end_in_contig)
                cur_shift = cumulative_ref_lengths[ref_names.index(ref_name)] or 1
                start = unshifted_start + cur_shift - 1
                end = unshifted_end + cur_shift - 1

                is_rc = ((start - end) * (start_in_contig - end_in_contig)) < 0
                position_in_ref = unshifted_start
                block = Alignment(
                    name=contig_id, start=start, end=end, unshifted_start=unshifted_start, unshifted_end=unshifted_end,
                    is_rc=is_rc, start_in_contig=start_in_contig, end_in_contig=end_in_contig, position_in_ref=position_in_ref, ref_name=ref_name,
                    idy=idy, is_best_set=is_best == 'True')
                block.ambiguous = ambiguity
                if block.is_best_set:
                    misassembled_id_to_structure[contig_id].append(block)
                else:
                    ambiguity_alignments[contig_id].append(block)

                aligned_blocks.append(block)

    return aligned_blocks, misassembled_id_to_structure, contigs, ambiguity_alignments