def test_getRelPos(self): a = Interval(5, 15) self.assertEqual(a.get_rel_pos(5), 0) self.assertEqual(a.get_rel_pos(10), 5) self.assertEqual(a.get_rel_pos(14), 9) self.assertRaises(IndexError, a.get_rel_pos, 15)
def test_getAbsPos(self): a = Interval(5, 15) self.assertEqual(a.get_abs_pos(0), 5) self.assertEqual(a.get_abs_pos(5), 10) self.assertEqual(a.get_abs_pos(9), 14) self.assertRaises(IndexError, a.get_abs_pos, 10)
def test_get_rel_pos(self): ivl = Interval(100, 200) self.assertEqual(0, ivl.get_rel_pos(100)) self.assertEqual(99, ivl.get_rel_pos(199)) self.assertRaises(IndexError, ivl.get_rel_pos, 99) self.assertRaises(IndexError, ivl.get_rel_pos, 200)
def clip_read(read): read_interval = Interval(read.pos, read.pos + len(read.seq)) try: matches = intervals.fetch(read.rname, read_interval.start, read_interval.stop) matches.sort(key=lambda x: len(read_interval.intersect(x))) match = matches[-1] except KeyError as e: return read, 'warning, reference sequence "{}" not found'.format(str(e)) except IndexError: return read, 'warning, no overlapping intervals' match = Interval(match.start + offset_5p + 1, match.stop + offset_3p) cigar = expand_cigar(read.cigar) new_cigar = [] cigar_pos = 0 while cigar_pos < len(cigar) and cigar[cigar_pos] != 'M': if cigar[cigar_pos] in 'MIS=X': new_cigar.append('S') cigar_pos += 1 ref_pos = read.pos while cigar_pos < len(cigar) and ref_pos < match.start: ref_pos += cigar[cigar_pos] in 'MDNS=X' if cigar[cigar_pos] in 'MIS=X': new_cigar.append('S') elif cigar[cigar_pos] in 'D': pass else: new_cigar.append(cigar[cigar_pos]) cigar_pos += 1 while cigar_pos < len(cigar) and cigar[cigar_pos] != 'M': ref_pos += cigar[cigar_pos] in 'MDNS=X' new_cigar.append(cigar[cigar_pos]) cigar_pos += 1 pos = ref_pos while cigar_pos < len(cigar) and ref_pos < match.stop: ref_pos += cigar[cigar_pos] in 'MDNS=X' new_cigar.append(cigar[cigar_pos]) cigar_pos += 1 while cigar_pos < len(cigar): if cigar[cigar_pos] in 'MIS=X': new_cigar.append('S') cigar_pos += 1 parts = list(read) parts[3] = pos parts[5] = contract_cigar(new_cigar) return SamLine(*parts), 'clipped'
def difference(self, other, *, ignore_strand=False): if not ignore_strand: self._assert_same_chromosome_and_strand(other) if not self.overlaps(other): return Interval.INTERVAL_PAIR(self, None) left = right = None if self.start < other.start: left = GenomicInterval(self.start.position, other.start.position, chromosome=self.chromosome, strand=self.strand, data=self.data) if other.stop < self.stop: right = GenomicInterval(other.stop.position, self.stop.position, chromosome=self.chromosome, strand=self.strand, data=self.data) return Interval.INTERVAL_PAIR(left, right)
def __init__(self, iterator): self.key_index = MultiDimensionMap([str]) self.ivl_index = MultiDimensionMap([str, Interval]) self.data = list(iterator) for i, entry in enumerate(self.data): self.key_index[entry.name] = i self.ivl_index[(entry.chr, Interval(entry.start, entry.stop))] = i
def __init__(self, iterator): self.key_index = {} self.ivl_index = MultiDimensionMap([str, Interval]) self.data = list(iterator) for i, entry in enumerate(self.data): self.key_index[entry.data['name']] = i self.ivl_index[(entry.chromosome, Interval(entry.start.position, entry.stop.position))] = i
def fetch(self, chr, start, stop): return [ self.data[v] for v in self.ivl_index[chr, Interval(start, stop)] ]
def __init__(self, iterator): self.data = list(iterator) self.ivl_index = MultiDimensionMap([str, Interval]) for i, variant in enumerate(self.data): ivl = Interval(variant.position, variant.position + len(variant.data['ref'])) self.ivl_index[(variant.chromosome, ivl)] = i
def fetch(self, chr, start, stop=None): if stop is None: stop = start + 1 idxs = self.ivl_index[(chr, Interval(start, stop))] return [self.data[v] for v in idxs]
def get_item(starts, stops): return [start if start == stop else Interval(start, stop) for start, stop in zip(starts, stops)]
def __init__(self, iterator): self.data = list(iterator) self.ivl_index = MultiDimensionMap([str, Interval]) for i, entry in enumerate(self.data): ivl = Interval(entry.start, entry.stop) self.ivl_index[(entry.chr, ivl)] = i
def test_overlaps(self): a = Interval(0, 10) b = Interval(6, 16) c = Interval(12, 22) d = Interval(2, 8) e = Interval(10, 20) self.assertTrue(a.overlaps(b)) self.assertTrue(b.overlaps(a)) self.assertFalse(a.overlaps(c)) self.assertFalse(c.overlaps(a)) self.assertTrue(a.overlaps(d)) self.assertTrue(d.overlaps(a)) self.assertFalse(a.overlaps(e)) self.assertFalse(e.overlaps(a))
def test_touches(self): a = Interval(0, 10) b = Interval(6, 16) c = Interval(12, 22) d = Interval(2, 8) e = Interval(10, 20) self.assertFalse(a.touches(b)) self.assertFalse(b.touches(a)) self.assertFalse(a.touches(c)) self.assertFalse(c.touches(a)) self.assertFalse(a.touches(d)) self.assertFalse(d.touches(a)) self.assertTrue(a.touches(e)) self.assertTrue(e.touches(a))
def test_contains(self): a = Interval(0, 10) b = Interval(6, 16) c = Interval(12, 22) d = Interval(2, 8) e = Interval(10, 20) self.assertFalse(a.contains(b)) self.assertFalse(b.contains(a)) self.assertFalse(a.contains(c)) self.assertFalse(c.contains(a)) self.assertTrue(a.contains(d)) self.assertFalse(d.contains(a)) self.assertFalse(a.contains(e)) self.assertFalse(d.contains(e))