def setUp(self): iv = IntervalNode(50, 59, Interval(50, 59)) for i in range(0, 110, 10): if i == 50: continue f = Interval(i, i + 9) iv = iv.insert(f.start, f.end, f) self.intervals = iv
def setUp(self): iv = IntervalTree() iv.add_interval(Interval(50, 59)) for i in range(0, 110, 10): if i == 50: continue f = Interval(i, i + 9) iv.add_interval(f) self.intervals = iv
def test_left(self): iv = self.intervals self.assertEqual(str(iv.left(60, n=2)), str([Interval(50, 59), Interval(40, 49)])) for i in range(10, 100, 10): r = iv.left(i, max_dist=10, n=1) self.assertEqual(r[0].end, i - 1)
def setUp(self): iv = IntervalNode(1, 2, Interval(1, 2)) self.max = 1000000 for i in range(0, self.max, 10): f = Interval(i, i) iv = iv.insert(f.start, f.end, f) for i in range(600): iv = iv.insert(0, 1, Interval(0, 1)) self.intervals = iv
def test_downstream(self): iv = self.intervals downstreams = iv.downstream_of_interval(Interval(59, 60), num_intervals=200) for d in downstreams: self.assertTrue(d.start > 60) downstreams = iv.downstream_of_interval(Interval(59, 60, strand=-1), num_intervals=200) for d in downstreams: self.assertTrue(d.start < 59)
def test_upstream(self): iv = self.intervals upstreams = iv.upstream_of_interval(Interval(59, 60), num_intervals=200) for u in upstreams: self.assertTrue(u.end < 59) upstreams = iv.upstream_of_interval(Interval(60, 70, strand=-1), num_intervals=200) for u in upstreams: self.assertTrue(u.start > 70) upstreams = iv.upstream_of_interval(Interval(58, 58, strand=-1), num_intervals=200) for u in upstreams: self.assertTrue(u.start > 59)
def test_right(self): iv = self.intervals self.assertEqual(str(iv.left(60, n=2)), str([Interval(50, 59), Interval(40, 49)])) def get_right_start(b10): r = iv.right(b10 + 1, n=1) assert len(r) == 1 return r[0].start for i in range(10, 100, 10): self.assertEqual(get_right_start(i), i + 10) for i in range(0, 100, 10): r = iv.right(i - 1, max_dist=10, n=1) self.assertEqual(r[0].start, i)
def setUp(self): iv = IntervalTree() n = 0 for i in range(1, 1000, 80): iv.insert(i, i + 10, dict(value=i * i)) # add is synonym for insert. iv.add(i + 20, i + 30, dict(astr=str(i * i))) # or insert/add an interval object with start, end attrs. iv.insert_interval( Interval(i + 40, i + 50, value=dict(astr=str(i * i)))) iv.add_interval( Interval(i + 60, i + 70, value=dict(astr=str(i * i)))) n += 4 self.intervals = self.iv = iv self.nintervals = n
def test_n(self): iv = self.intervals for i in range(0, 90, 10): r = iv.after(i, max_dist=20, num_intervals=2) self.assertEqual(r[0].start, i + 10) self.assertEqual(r[1].start, i + 20) r = iv.after_interval(Interval(i, i), max_dist=20, num_intervals=2) self.assertEqual(r[0].start, i + 10) self.assertEqual(r[1].start, i + 20)
def _check_cdna_vs_utr(transcript): """ Verify that cDNA + UTR in the transcript add up. :return: """ transcript.logger.debug("Checking the cDNA for %s", transcript.id) if transcript.cdna_length > transcript.combined_utr_length + transcript.combined_cds_length: if transcript.combined_utr == transcript.combined_cds == []: # non-coding transcript transcript.logger.debug("%s is non coding, returning", transcript.id) return assert transcript.combined_cds != [] transcript.logger.debug( "Recalculating the UTR for %s. Reason: cDNA length %s, UTR %s, CDS %s (total %s)", transcript.id, transcript.cdna_length, transcript.combined_utr_length, transcript.combined_cds_length, transcript.combined_utr_length + transcript.combined_cds_length) transcript.combined_utr = [] # Reset transcript.combined_cds = sorted(transcript.combined_cds, key=operator.itemgetter(0, 1)) combined_cds = IntervalTree.from_tuples(transcript.combined_cds) orfs = [ IntervalTree.from_tuples([_[1] for _ in orf if _[0] == "CDS"]) for orf in transcript.internal_orfs ] assert isinstance(combined_cds, IntervalTree) exons = IntervalTree.from_intervals( [Interval(*exon) for exon in transcript.exons]) mapper = defaultdict(list) for cds in transcript.combined_cds: fexon = exons.find(cds[0] - 1, cds[1], strict=False) if len(fexon) > 1: raise InvalidCDS( "{} has a CDS ({}) which straddles {} different exons ({})." .format(transcript.id, cds, len(fexon), fexon)) elif len(fexon) == 0: raise InvalidCDS( "{} has a CDS ({}) which is not mapped to any exon.". format(transcript.id, cds, len(fexon), fexon)) mapper[fexon[0]].append(cds) for exon in transcript.exons: if exon not in mapper: transcript.combined_utr.append(exon) continue elif len(mapper[exon]) == 1: cds = mapper[exon][0] if cds[0] == exon[0] and exon[1] == cds[1]: continue else: before = None after = None if cds[0] < exon[0] or cds[1] > exon[1]: raise InvalidCDS("{} in {} debords its exon {}".format( cds, transcript.id, exon)) if cds[0] > exon[0]: before = (exon[0], max(cds[0] - 1, exon[0])) transcript.combined_utr.append(before) if cds[1] < exon[1]: after = (min(cds[1] + 1, exon[1]), exon[1]) transcript.combined_utr.append(after) assert before or after, (exon, cds) else: transcript.logger.debug("Starting to find the UTRs for %s", exon) found = sorted(mapper[exon]) utrs = [] for pos, interval in enumerate(found): if pos == len(found) - 1: if exon[1] > interval[1]: utrs.append((min(exon[1], interval[1] + 1), exon[1])) continue if pos == 0 and exon[0] < interval[0]: utrs.append((exon[0], max(exon[0], interval[0] - 1))) next_interval = found[pos + 1] if not (interval[1] + 1 <= next_interval[0] - 1): raise InvalidCDS( "Error while inferring the UTR for a transcript with multiple ORFs: overlapping CDS found." ) utrs.append((interval[1] + 1, next_interval[0] - 1)) assert utrs, found utr_sum = sum([_[1] - _[0] + 1 for _ in utrs]) cds_sum = sum(_[1] - _[0] + 1 for _ in found) assert utr_sum + cds_sum == exon[1] - exon[0] + 1, (utr_sum, cds_sum, exon[1] - exon[0] + 1, utrs, found) transcript.combined_utr.extend(utrs) # If no CDS and no UTR are present, all good equality_one = (transcript.combined_cds_length == transcript.combined_utr_length == 0) # Otherwise, if cDNA length == UTR + CDS, all good equality_two = ( transcript.cdna_length == transcript.combined_utr_length + transcript.combined_cds_length) if not (equality_one or equality_two): # Something fishy going on raise InvalidCDS(""""Failed to create the UTR: ID: {} Exons: {} Combined CDS: {} Combined UTR: {} CDS == UTR == 0: {} CDNA == CDS + UTR: {} CDNA == {} CDS == {} UTR == {}""".format(transcript.id, transcript.exons, transcript.combined_cds, transcript.combined_utr, equality_one, equality_two, transcript.cdna_length, transcript.combined_cds_length, transcript.combined_utr_length))