예제 #1
0
    def test_e2e_ovl_8_20_7(self, e2e_ovl_8_20_7: FixtureForFindOvl):
        # Should find overlap of 7 bp length
        seq1: str = e2e_ovl_8_20_7[0]
        seq2: str = e2e_ovl_8_20_7[1]
        mink: int = e2e_ovl_8_20_7[2]
        maxk: int = e2e_ovl_8_20_7[3]

        assert fov.find_overlap_e2e(seq1, seq2, mink, maxk) == 0
예제 #2
0
    def test_e2e_ovl_7_6(self, e2e_ovl_7_6: FixtureForFindOvl):
        # Should not find any overlap
        seq1: str = e2e_ovl_7_6[0]
        seq2: str = e2e_ovl_7_6[1]
        mink: int = e2e_ovl_7_6[2]
        maxk: int = e2e_ovl_7_6[3]

        assert fov.find_overlap_e2e(seq1, seq2, mink, maxk) == 0
예제 #3
0
def detect_adjacent_contigs(contig_collection: ContigCollection, mink: int,
                            maxk: int) -> OverlapCollection:
    # Function detects adjacent contigs by comparing their termini.
    #
    # :param contig_collection: instance of ContigCollection returned by
    #   `src.contigs.get_contig_collection` function;
    # :param mink: minimum length of and overlap to be detected;
    # :param maxk: maximum length of and overlap to be detected;

    # Count contigs and save this length in order nom to re-cont it later.
    num_contigs: int = len(contig_collection)

    # Initialize `OverlapCollection` instance
    overlap_collection: OverlapCollection = OverlapCollection()

    # Iterate over contigs and compare it's termini to other termini
    ovl_len: int
    i: ContigIndex
    for i in range(num_contigs):

        # Omit contigs shorter that 'mink'
        if contig_collection[i].length <= mink:
            print('\r{}/{}'.format(i + 1, num_contigs), end='')
            continue
        # end if

        # === Compare start of the current contig to end of the current contig ===
        ovl_len = find_overlap_e2s(contig_collection[i].end,
                                   contig_collection[i].start, mink, maxk)
        if not ovl_len in (0, contig_collection[i].length):
            overlap_collection.add_overlap(i, Overlap(i, END, i, START,
                                                      ovl_len))
            overlap_collection.add_overlap(i, Overlap(i, START, i, END,
                                                      ovl_len))
        # end if

        # === Compare start of the current conitg to rc-end of the current contig ===
        ovl_len = find_overlap_s2s(contig_collection[i].start,
                                   contig_collection[i].rcend, mink, maxk)
        if ovl_len != 0:
            overlap_collection.add_overlap(
                i, Overlap(i, START, i, RCEND, ovl_len))
            overlap_collection.add_overlap(
                i, Overlap(i, RCEND, i, START, ovl_len))
        # end if

        # |=== Compare i-th contig to contigs from i+1 to N ===|
        # We do it in order not to compare pairs of contigs more than one time
        j: ContigIndex
        for j in range(i + 1, num_contigs):

            # === Compare i-th start to j-th end ===
            ovl_len = find_overlap_e2s(contig_collection[j].end,
                                       contig_collection[i].start, mink, maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, START, j, END, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, END, i, START, ovl_len))
            # end if

            # === Compare i-th end to j-th start ===
            ovl_len = find_overlap_e2s(contig_collection[i].end,
                                       contig_collection[j].start, mink, maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, END, j, START, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, START, i, END, ovl_len))
            # end if

            # === Compare i-th start to reverse-complement j-th start ===
            ovl_len = find_overlap_e2s(contig_collection[j].rcstart,
                                       contig_collection[i].start, mink, maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, START, j, RCSTART, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, START, i, RCSTART, ovl_len))
            # end if

            # === Compare i-th end to reverse-complement j-th end ===
            ovl_len = find_overlap_e2s(contig_collection[i].end,
                                       contig_collection[j].rcend, mink, maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, END, j, RCEND, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, END, i, RCEND, ovl_len))
            # end if

            # === Compare i-th start to j-th start ===
            ovl_len = find_overlap_s2s(contig_collection[i].start,
                                       contig_collection[j].start, mink, maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, START, j, START, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, START, i, START, ovl_len))
            # end if

            # === Compare i-th end to j-th end ===
            ovl_len = find_overlap_e2e(contig_collection[i].end,
                                       contig_collection[j].end, mink, maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, END, j, END, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, END, i, END, ovl_len))
            # end if

            # === Compare i-th start to reverse-complement j-th end ===
            ovl_len = find_overlap_s2s(contig_collection[i].start,
                                       contig_collection[j].rcend, mink, maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, START, j, RCEND, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, RCEND, i, START, ovl_len))
            # end if

            # === Compare i-th end to reverse-complement j-th start ===
            ovl_len = find_overlap_e2e(contig_collection[i].end,
                                       contig_collection[j].rcstart, mink,
                                       maxk)
            if ovl_len != 0:
                overlap_collection.add_overlap(
                    i, Overlap(i, END, j, RCSTART, ovl_len))
                overlap_collection.add_overlap(
                    j, Overlap(j, RCSTART, i, END, ovl_len))
            # end if
        # end for

        print('\r{}/{}'.format(i + 1, num_contigs), end='')
    # end for
    print()

    return overlap_collection