def test_e2e_ovl_8_20_7(self, e2e_ovl_8_20_7: FixtureForFindOvl): # Should find overlap of 7 bp length seq1: str = e2e_ovl_8_20_7[0] seq2: str = e2e_ovl_8_20_7[1] mink: int = e2e_ovl_8_20_7[2] maxk: int = e2e_ovl_8_20_7[3] assert fov.find_overlap_e2e(seq1, seq2, mink, maxk) == 0
def test_e2e_ovl_7_6(self, e2e_ovl_7_6: FixtureForFindOvl): # Should not find any overlap seq1: str = e2e_ovl_7_6[0] seq2: str = e2e_ovl_7_6[1] mink: int = e2e_ovl_7_6[2] maxk: int = e2e_ovl_7_6[3] assert fov.find_overlap_e2e(seq1, seq2, mink, maxk) == 0
def detect_adjacent_contigs(contig_collection: ContigCollection, mink: int, maxk: int) -> OverlapCollection: # Function detects adjacent contigs by comparing their termini. # # :param contig_collection: instance of ContigCollection returned by # `src.contigs.get_contig_collection` function; # :param mink: minimum length of and overlap to be detected; # :param maxk: maximum length of and overlap to be detected; # Count contigs and save this length in order nom to re-cont it later. num_contigs: int = len(contig_collection) # Initialize `OverlapCollection` instance overlap_collection: OverlapCollection = OverlapCollection() # Iterate over contigs and compare it's termini to other termini ovl_len: int i: ContigIndex for i in range(num_contigs): # Omit contigs shorter that 'mink' if contig_collection[i].length <= mink: print('\r{}/{}'.format(i + 1, num_contigs), end='') continue # end if # === Compare start of the current contig to end of the current contig === ovl_len = find_overlap_e2s(contig_collection[i].end, contig_collection[i].start, mink, maxk) if not ovl_len in (0, contig_collection[i].length): overlap_collection.add_overlap(i, Overlap(i, END, i, START, ovl_len)) overlap_collection.add_overlap(i, Overlap(i, START, i, END, ovl_len)) # end if # === Compare start of the current conitg to rc-end of the current contig === ovl_len = find_overlap_s2s(contig_collection[i].start, contig_collection[i].rcend, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, START, i, RCEND, ovl_len)) overlap_collection.add_overlap( i, Overlap(i, RCEND, i, START, ovl_len)) # end if # |=== Compare i-th contig to contigs from i+1 to N ===| # We do it in order not to compare pairs of contigs more than one time j: ContigIndex for j in range(i + 1, num_contigs): # === Compare i-th start to j-th end === ovl_len = find_overlap_e2s(contig_collection[j].end, contig_collection[i].start, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, START, j, END, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, END, i, START, ovl_len)) # end if # === Compare i-th end to j-th start === ovl_len = find_overlap_e2s(contig_collection[i].end, contig_collection[j].start, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, END, j, START, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, START, i, END, ovl_len)) # end if # === Compare i-th start to reverse-complement j-th start === ovl_len = find_overlap_e2s(contig_collection[j].rcstart, contig_collection[i].start, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, START, j, RCSTART, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, START, i, RCSTART, ovl_len)) # end if # === Compare i-th end to reverse-complement j-th end === ovl_len = find_overlap_e2s(contig_collection[i].end, contig_collection[j].rcend, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, END, j, RCEND, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, END, i, RCEND, ovl_len)) # end if # === Compare i-th start to j-th start === ovl_len = find_overlap_s2s(contig_collection[i].start, contig_collection[j].start, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, START, j, START, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, START, i, START, ovl_len)) # end if # === Compare i-th end to j-th end === ovl_len = find_overlap_e2e(contig_collection[i].end, contig_collection[j].end, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, END, j, END, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, END, i, END, ovl_len)) # end if # === Compare i-th start to reverse-complement j-th end === ovl_len = find_overlap_s2s(contig_collection[i].start, contig_collection[j].rcend, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, START, j, RCEND, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, RCEND, i, START, ovl_len)) # end if # === Compare i-th end to reverse-complement j-th start === ovl_len = find_overlap_e2e(contig_collection[i].end, contig_collection[j].rcstart, mink, maxk) if ovl_len != 0: overlap_collection.add_overlap( i, Overlap(i, END, j, RCSTART, ovl_len)) overlap_collection.add_overlap( j, Overlap(j, RCSTART, i, END, ovl_len)) # end if # end for print('\r{}/{}'.format(i + 1, num_contigs), end='') # end for print() return overlap_collection