def overlap_repeats(self): """Overlaps event coordinates with repeatmasker, simple repeats""" event_groups_by_chr = self.group_by_chr() for chrom, events in event_groups_by_chr.iteritems(): proper_chrom = tools.proper_chrom(chrom, chrom_proper=self.chrom_proper) for snv_type, snv_groups in events.iteritems(): print 'processing repeat', snv_type for snvs in snv_groups: overlaps = repeat.find_overlaps({'chrom':proper_chrom, 'start':int(snvs[0].ref_start), 'end':int(snvs[0].ref_end)}, self.repeat_overlaps) if overlaps: attrs = {} for repeat_type, types in overlaps.iteritems(): if repeat_type == 'simple_repeats': attr = 'within_simple_repeats' elif repeat_type == 'segdup': attr = 'within_segdup' elif repeat_type == 'rmsk': attr = 'repeatmasker' if types: # only report one with shortest name types_sorted = types.keys() types_sorted.sort(lambda x,y: len(x)-len(y)) attrs[attr] = types_sorted[0] if attrs: for snv in snvs: tools.set_attrs(snv, attrs) # clears cache for repeat_olap in self.repeat_overlaps.values(): repeat_olap.finish()
def is_within_repeats(self, proper_chrom, span): """Determines if given coordinate span overlaps segdups or simple_repeats""" overlaps = repeat.find_overlaps({'chrom':proper_chrom, 'start':span[0], 'end':span[1]}, self.repeat_overlaps) if overlaps: for repeat_type in overlaps.keys(): if overlaps[repeat_type]: if repeat_type == 'segdup' or repeat_type == 'simple_repeats': return True return False