def merge(self, result, hit, syn): ''' Input one hit from an exonerate output. ''' assert(hit.name == result.name) result.total_hits += 1 anchor = syn.anchor_target(hit.target) # If no blocks map to the specified target contig, stop if not anchor: if not self.quiet: msg = "%s is on a contig with no syntenic blocks: %s" print(msg % (result.gene.name, str(hit.target)), file=sys.stderr) return False # stop if this hit already exists for h in result.hits: if h == hit: return True # Define the interval in which to search for neighbors in the target target_flanks = intervals.Interval( contig=anchor.contig, start=max(0, anchor.start - self.target_flank_ratio * self.flank_width), stop=(anchor.stop + self.target_flank_ratio * self.flank_width)) # If the query flanks have not yet been measured, do so if not result.query_flanks: result.query_flanks = intervals.Interval( contig=result.gene.contig, start=max(0, result.gene.start - self.flank_width), stop=(result.gene.stop + self.flank_width)) # === my hacky first order solution === # I simply count the number of nearby (by an arbitrary cutoff) blocks # that map to a region near the target interval. If there are more than # a certain number, I keep the exonerate hit. matching, total = (0, 0) a, b = (result.lower, result.upper) while intervals.overlaps(result.query_flanks, a): total += 1 matching += intervals.overlaps(target_flanks, a.over) a = a.last while intervals.overlaps(result.query_flanks, b): total += 1 matching += intervals.overlaps(target_flanks, b.over) b = b.next if matching >= self.min_neighbors: result.hits.append(hit)
def _get_links(self, result, anchor): ''' find contiguous synteny blocks in the query that all overlap gene ''' m = anchor links = [] while m and intervals.overlaps(m, result.gene): links.append(m) m = m.last m = anchor.next while m and intervals.overlaps(m, result.gene): links.append(m) m = m.next links = sorted(links, key=lambda x: (x.start, x.stop)) return(links)
def test_overlaps(self): a = intervals.Interval(contig='a', start=10, stop=20) self.assertFalse(intervals.overlaps(a, self.a)) self.assertTrue(intervals.overlaps( a, self.b)) self.assertTrue(intervals.overlaps( a, self.c)) self.assertTrue(intervals.overlaps( a, self.d)) self.assertTrue(intervals.overlaps( a, self.e)) self.assertTrue(intervals.overlaps( a, self.f)) self.assertFalse(intervals.overlaps(a, self.g)) self.assertFalse(intervals.overlaps(a, self.h))
def _get_is_simple(self, anchor, context, syn): # are all the intervals on the same contig? all_on_same_contig = intervals.allequal((x.over.contig for x in context)) # make an interval describing the start and stop of the query context qminstart = min(x.start for x in context) qmaxstop = max(x.stop for x in context) tminstart = min(x.over.start for x in context) tmaxstop = max(x.over.stop for x in context) query_bound = intervals.Interval(contig=anchor.contig, start=qminstart, stop=qmaxstop) target_bound = intervals.Interval(contig=anchor.over.contig, start=tminstart, stop=tmaxstop) has_outer = False for q in syn.target.get_overlapping(target_bound): if not intervals.overlaps(query_bound, q.over): has_outer = True break is_simple = all_on_same_contig and not has_outer return(is_simple)