def test_overlapping_intervals(self): from samsum import alignment_utils coords_one = (0, 100) coords_two = (50, 101) coords_three = (101, 200) self.assertTrue(alignment_utils.overlapping_intervals(coords_one, coords_two)) self.assertTrue(alignment_utils.overlapping_intervals(coords_two, coords_three)) self.assertFalse(alignment_utils.overlapping_intervals(coords_one, coords_three)) return
def test_overlapping_intervals(): from samsum import alignment_utils coords_one = (0, 100) coords_two = (50, 101) coords_three = (101, 200) assert alignment_utils.overlapping_intervals(coords_one, coords_two) is True assert alignment_utils.overlapping_intervals(coords_two, coords_three) is True assert alignment_utils.overlapping_intervals(coords_one, coords_three) is False
def merge_tiles(self) -> None: """ Checks for Tile instances with overlapping ranges. Tile instances must have a 'start' and 'end' variable. :return: None """ i = 0 while i < len(self.tiles): tiles = sorted(self.tiles, key=lambda x: x.start) tile_i = tiles[i] # type: Tile j = i + 1 while j < len(tiles): tile_j = tiles[j] # type: Tile if ss_aln_utils.overlapping_intervals((tile_i.start, tile_i.end), (tile_j.start, tile_j.end)): # print("Merging:") # print(tile_i.get_info(), "and", tile_j.get_info()) tile_i.merge(tile_j) tiles.pop(j) else: j += 1 i += 1 return
def proportion_covered(self) -> float: """ Calculate the proportion of the RefSequence that was covered by mapped reads. The algorithm works as follows: 1. For each AlignmentDat instance in self.alignments: bin it into a continuously aligned regions (Tile) 2. Merge the Tile instances from step one into the most contiguous possible 3. Calculate the combined lengths of Tiles across the reference sequence and divide by its length :return: Float representing the proportion of the Reference Sequence that was covered """ if self.reads_mapped == 0: return 0 self.tiles.clear() for aln_dat in sorted(self.alignments, key=lambda x: x.start): # type: AlignmentDat tile = Tile() tile.load_from_alignment_dat(aln_dat) i = 0 while i < len(self.tiles): aln_coords = self.tiles[i] # type: Tile if ss_aln_utils.overlapping_intervals((aln_coords.start, aln_coords.end), (aln_dat.start, aln_dat.end)): aln_coords = self.tiles.pop(i) tile.merge(aln_coords) i = len(self.tiles) # Increase i to the length of self.tiles to exit while loop i += 1 self.tiles.append(tile) # Since coordinates are not compared as the tiles are merged the tiles can overlap necessitating a final merge self.merge_tiles() # Calculate the combined lengths of all tiles across the reference sequence total_tiled = 0 for tile in self.tiles: total_tiled += (tile.end - tile.start) return total_tiled/self.length