def _get_intron_transcript_records(self): if len(self.Exons) < 2: self._set_null_values(["Introns"]) return exon_positions = [(exon.Location.Start, exon.Location.End) for exon in self.Exons] exon_positions.sort() end = exon_positions[-1][-1] exon_map = Map(locations=exon_positions, parent_length=end) intron_map = exon_map.shadow() intron_positions = [(span.Start, span.End) for span in intron_map.spans if span.Start != 0] chrom = self.Location.CoordName strand = self.Location.Strand introns = [] rank = 1 if strand == -1: intron_positions.reverse() for s, e in intron_positions: coord = self.genome.makeLocation(CoordName=chrom, Start=s, End=e, Strand=strand, ensembl_coord=False) introns.append(Intron(self.genome, self.db, rank, self.StableId, coord)) rank += 1 self._cached['Introns'] = tuple(introns)
def getAnnotatedSeq(self, feature_types=None, where_feature=None): regions = list(self.getFeatures(feature_types = feature_types, where_feature = where_feature)) # seq_map is on the + strand, regardless the actual strand of sequence seq_map = Map(locations = [(self.Location.Start, self.Location.End)], parent_length = DEFAULT_PARENT_LENGTH) seq_map = seq_map.inverse() for region in regions: data = region.featureData(seq_map) if data is None: continue # this will consider the strand information of actual sequence feature_map = [data[-1], data[-1].nucleicReversed()][self.Location.Strand == -1] self.Seq.addAnnotation(Feature, data[0], data[1], feature_map) if region.Type == 'gene': # TODO: SHOULD be much simplified sub_data = region.subFeatureData(seq_map) for feature_type, feature_name, feature_map in sub_data: if self.Location.Strand == -1: # again, change feature map to -1 strand sequence if # needed. feature_map = feature_map.nucleicReversed() self.Seq.addAnnotation(Feature, feature_type, feature_name, feature_map) return self.Seq
def test_map(self): """reversing a map with multiple spans should preserve span relative order""" forward = [Span(20, 30), Span(40, 50)] fmap = Map(spans=forward, parent_length=100) fmap_reversed = fmap.nucleicReversed() reverse = [Span(70, 80, Reverse=True), Span(50, 60, Reverse=True)] rmap = Map(spans=reverse, parent_length=100) for i in range(2): self.assertEquals(fmap_reversed.spans[i], rmap.spans[i])
def test_map(self): """reversing a map with multiple spans should preserve span relative order""" forward = [Span(20,30), Span(40,50)] fmap = Map(spans=forward, parent_length=100) fmap_reversed = fmap.nucleicReversed() reverse = [Span(70,80, Reverse=True), Span(50,60, Reverse=True)] rmap = Map(spans=reverse, parent_length=100) for i in range(2): self.assertEquals(fmap_reversed.spans[i], rmap.spans[i])
def test_get_coords(self): """getCoordinates should return raw coordinates matching input""" spans = [(0,9), (20, 32)] map = Map(spans, parent_length=100) coords = map.getCoordinates() self.assertEqual(coords, spans) # should work for reversed Maps too spans = [(32, 20), (9, 0)] map = Map(spans, parent_length=100) coords = map.getCoordinates() self.assertEqual(coords, spans)
def __init__(self, base, policy=DisplayPolicy, _policy=None, pad=1, yrange=None, **kw): self.pad = pad self.base = base self.yrange = yrange assert len(base) > 0, len(base) if _policy is None: policy = policy(**kw).copy( map=Map([(0, len(base))], parent_length=len(base)), depth=0, rowlen=len(base)) else: policy = _policy self.policy = policy self.smap=Map([(0, len(base))], parent_length=len(base)) self._calc_tracks()
def __init__(self, base, policy=DisplayPolicy, _policy=None, pad=1, yrange=None, **kw): self.pad = pad self.base = base self.yrange = yrange assert len(base) > 0, len(base) if _policy is None: policy = policy(**kw).copy(map=Map([(0, len(base))], parent_length=len(base)), depth=0, rowlen=len(base)) else: policy = _policy self.policy = policy self.smap = Map([(0, len(base))], parent_length=len(base)) self._calc_tracks()
def _remap(map): start = map.Start if start == 0: new_map = map new_map.parent_length = map.End else: spans = [] for span in map.spans: if span.lost: spans.append(span) else: span.Start = span.Start - start span.End = span.End - start length = span.End spans.append(span) new_map = Map(spans=spans, parent_length=length) return new_map
def cigar_to_map(cigar_text): """convert cigar string into Map""" assert 'I' not in cigar_text spans, posn = [], 0 for n, c in pattern.findall(cigar_text): if n: n = int(n) else: n = 1 if c == 'M': spans.append(Span(posn, posn + n)) posn += n else: spans.append(LostSpan(n)) map = Map(spans=spans, parent_length=posn) return map
def test_get_coords(self): """getCoordinates should return raw coordinates matching input""" spans = [(0, 9), (20, 32)] map = Map(spans, parent_length=100) coords = map.getCoordinates() self.assertEqual(coords, spans) # should work for reversed Maps too spans = [(32, 20), (9, 0)] map = Map(spans, parent_length=100) coords = map.getCoordinates() self.assertEqual(coords, spans)
def test_spans(self): # a simple two part map of length 10 map = Map([(0, 5), (5, 10)], parent_length=10) # try different spans on the above map for ((start, end), expected) in [ ((0, 4), "[0:4]"), ((0, 5), "[0:5]"), ((0, 6), "[0:5, 5:6]"), ((5, 10), "[5:10]"), ((-1, 10), "[-1-, 0:5, 5:10]"), ((5, 11), "[5:10, -1-]"), ((0, 10), "[0:5, 5:10]"), ((10, 0), "[10:5, 5:0]"), ]: r = repr(Span(start, end, Reverse=start > end).remapWith(map)) #print (start, end), r, if r != expected: self.fail(repr((r, expected)))
class Display(rlg2mpl.Drawable): """Holds a list of tracks and displays them all aligned base: A sequence, alignment, or anything else offering .getTracks(policy) policy: A DisplayPolicy subclass. pad: Gap between tracks in points. Other keyword arguments are used to modify the DisplayPolicy: Sequence display: show_text: Represent bases as characters. Slow. draw_bases: Represent bases as rectangles if MolType allows. show_gaps: Represent bases as line segments. colour_sequences: Colour code sequences if MolType allows. seq_color_callback: f(seq)->[colours] for flexible seq coloring. Layout: rowlen: wrap at this many characters per line. min_feature_height: minimum feature symbol height in points. min_graph_height: minimum height of any graphed features in points. Inclusion: recursive: include the sequences of the alignment. ignored_features: list of feature type tags to leave out. keep_unexpected_tracks: show features not assigned to a track by the policy. """ def __init__(self, base, policy=DisplayPolicy, _policy=None, pad=1, yrange=None, **kw): self.pad = pad self.base = base self.yrange = yrange assert len(base) > 0, len(base) if _policy is None: policy = policy(**kw).copy( map=Map([(0, len(base))], parent_length=len(base)), depth=0, rowlen=len(base)) else: policy = _policy self.policy = policy self.smap=Map([(0, len(base))], parent_length=len(base)) self._calc_tracks() def __len__(self): return len(self.smap.inverse()) def _calc_tracks(self): y = 0 self._tracks = [] for p in self.base.getTracks(self.policy)[::-1]: if not isinstance(p, Track): if not isinstance(p, list): p = [p] p = Track('', p) y2 = y + p.height + self.pad self._tracks.append((y+self.pad/2, (y+y2)/2, p)) y = y2 self.height = y if self.yrange is None: self.yrange = {} for (y, ym, p) in self._tracks: self.yrange[p.tag] = max(self.yrange.get(p.tag, 0), p.range) def copy(self, **kw): new = copy.copy(self) new.policy = self.policy.copy(**kw) new._calc_tracks() return new def __getitem__(self, slice): c = copy.copy(self) c.smap = self.smap.inverse()[slice].inverse() return c def makeArtist(self, vertical=False): g = rlg2mpl.Group() for (y, ym, p) in self._tracks: smap = self.smap.inverse() for s in p.getShapes( span=(smap.Start, smap.End), rotated=vertical, height=float(p.height), yrange=self.yrange[p.tag]): trans = matplotlib.transforms.Affine2D() trans.translate(0, y) s.set_transform(s.get_transform() + trans) g.add(s) if vertical: g.rotate(90) g.scale(-1.0, 1.0) return g def asAxes(self, fig, posn, labeled=True, vertical=False): ax = fig.add_axes(posn) self.applyScaleToAxes(ax, labeled=labeled, vertical=vertical) g = self.makeArtist(vertical=vertical) ax.add_artist(g) return ax def applyScaleToAxes(self, ax, labeled=True, vertical=False): (seqaxis, trackaxis) = [ax.xaxis, ax.yaxis] if vertical: (seqaxis, trackaxis) = (trackaxis, seqaxis) if not labeled: trackaxis.set_ticks([]) else: track_positions = [] track_labels = [] for (y, ym, p) in self._tracks: if p.height > 8: track_labels.append(p.label) track_positions.append(ym) trackaxis.set_ticks(track_positions) trackaxis.set_ticklabels(track_labels) if vertical: for tick in trackaxis.get_major_ticks(): tick.label1.set_rotation('vertical') tick.label2.set_rotation('vertical') seqaxis.set_major_formatter( matplotlib.ticker.FuncFormatter(lambda x,pos:str(int(x)))) smap = self.smap.inverse() seq_lim = (smap.Start, smap.End) if vertical: ax.set_ylim(*seq_lim) ax.set_xlim(0, self.height or 0.1) else: ax.set_xlim(*seq_lim) ax.set_ylim(0, self.height or 0.1) def figureLayout(self, labeled=True, vertical=False, width=None, height=None, left=None, **kw): if left is None: if labeled and self._tracks: left = max(len(p.label) for (y, ym, p) in self._tracks) left *= 12/72 * .5 # guess mixed chars, 12pt, inaccurate! else: left = 0 height = height or (self.height or 0.1) / 72 useful_width = len(self)*16/72 # ie bigish font, wide chars fkw = dict(leftovers=True, width=width, height=height, left=left, useful_width=useful_width, **kw) (w,h),posn,kw = rlg2mpl.figureLayout(**fkw) #points_per_base = w * posn[3] / len(self) if vertical: (w, h) = (h, w) posn[0:2] = reversed(posn[0:2]) posn[2:4] = reversed(posn[2:4]) return (w, h), posn, kw def makeFigure(self, width=None, height=None, rowlen=None, **kw): if rowlen: rows = [self[i:i+rowlen] for i in range(0, len(self), rowlen)] else: rows = [self] rowlen = len(self) kw.update(width=width, height=height) ((width, height), (x, y, w, h), kw) = self.figureLayout(**kw) N = len(rows) # since scales go below and titles go above, each row # gets the bottom margin, but not the top margin. vzoom = 1 + (y+h) * (N-1) fig = self._makeFigure(width, height * vzoom) for (i, row) in enumerate(rows): i = len(rows) - i - 1 posn = [x, (y+i*(y+h))/vzoom, w*len(row)/rowlen, h/vzoom] row.asAxes(fig, posn, **kw) return fig
self.assertEqual( str(answer[0]), 'TCGAT') def test_getBySequenceAnnotation(self): aln = LoadSeqs(data={ 'a': 'ATCGAAATCGAT', 'b': 'ATCGA--TCGAT'}) b = aln.getSeq('b') b.addAnnotation(Feature, 'test_type', 'test_label', [(4,6)]) answer = aln.getBySequenceAnnotation('b', 'test_type')[0].todict() self.assertEqual(answer, {'b':'A--T', 'a':'AAAT'}) if 0: # old, needs fixes # Maps a = Map([(10,20)], parent_length=100) for (desc, map, expected) in [ ('a ', a, "Map([10:20] on base)"), ('i ', a.inverse(), "Map([-10-, 0:10, -80-] on Map([10:20] on base))"), ('1 ', a[5:], "Map([5:10] on Map([10:20] on base))"), ('1r', a[5:].relative_to(b), "Map([15:20] on base)"), ('2 ', a[:5], "Map([0:5] on Map([10:20] on base))"), ('2r', a[:5].relative_to(b), "Map([10:15] on base)"), ('r ', a.relative_to(a[5:]), "Map([-5-, 0:5] on Map([5:10] on Map([10:20] on base)))"), ('r ', a[2:4].relative_to(a[2:6]), "Map([0:2] on Map([2:6] on Map([10:20] on base)))"), ('r ', a[2:4].relative_to(a[2:6][0:3]), "Map([0:2] on Map([0:3] on Map([2:6] on Map([10:20] on base))))")]: print desc, repr(map), if repr(map) == expected: print else:
sliced_seq.getByAnnotation('test_type', ignore_partial=True)) self.assertEqual(len(answer), 1) self.assertEqual(str(answer[0]), 'TCGAT') def test_getBySequenceAnnotation(self): aln = LoadSeqs(data={'a': 'ATCGAAATCGAT', 'b': 'ATCGA--TCGAT'}) b = aln.getSeq('b') b.addAnnotation(Feature, 'test_type', 'test_label', [(4, 6)]) answer = aln.getBySequenceAnnotation('b', 'test_type')[0].todict() self.assertEqual(answer, {'b': 'A--T', 'a': 'AAAT'}) if 0: # old, needs fixes # Maps a = Map([(10, 20)], parent_length=100) for (desc, map, expected) in [ ('a ', a, "Map([10:20] on base)"), ('i ', a.inverse(), "Map([-10-, 0:10, -80-] on Map([10:20] on base))"), ('1 ', a[5:], "Map([5:10] on Map([10:20] on base))"), ('1r', a[5:].relative_to(b), "Map([15:20] on base)"), ('2 ', a[:5], "Map([0:5] on Map([10:20] on base))"), ('2r', a[:5].relative_to(b), "Map([10:15] on base)"), ('r ', a.relative_to(a[5:]), "Map([-5-, 0:5] on Map([5:10] on Map([10:20] on base)))"), ('r ', a[2:4].relative_to(a[2:6]), "Map([0:2] on Map([2:6] on Map([10:20] on base)))"), ('r ', a[2:4].relative_to(a[2:6][0:3]), "Map([0:2] on Map([0:3] on Map([2:6] on Map([10:20] on base))))") ]:
class Display(rlg2mpl.Drawable): """Holds a list of tracks and displays them all aligned base: A sequence, alignment, or anything else offering .getTracks(policy) policy: A DisplayPolicy subclass. pad: Gap between tracks in points. Other keyword arguments are used to modify the DisplayPolicy: Sequence display: show_text: Represent bases as characters. Slow. draw_bases: Represent bases as rectangles if MolType allows. show_gaps: Represent bases as line segments. colour_sequences: Colour code sequences if MolType allows. seq_color_callback: f(seq)->[colours] for flexible seq coloring. Layout: rowlen: wrap at this many characters per line. min_feature_height: minimum feature symbol height in points. min_graph_height: minimum height of any graphed features in points. Inclusion: recursive: include the sequences of the alignment. ignored_features: list of feature type tags to leave out. keep_unexpected_tracks: show features not assigned to a track by the policy. """ def __init__(self, base, policy=DisplayPolicy, _policy=None, pad=1, yrange=None, **kw): self.pad = pad self.base = base self.yrange = yrange assert len(base) > 0, len(base) if _policy is None: policy = policy(**kw).copy(map=Map([(0, len(base))], parent_length=len(base)), depth=0, rowlen=len(base)) else: policy = _policy self.policy = policy self.smap = Map([(0, len(base))], parent_length=len(base)) self._calc_tracks() def __len__(self): return len(self.smap.inverse()) def _calc_tracks(self): y = 0 self._tracks = [] for p in self.base.getTracks(self.policy)[::-1]: if not isinstance(p, Track): if not isinstance(p, list): p = [p] p = Track('', p) y2 = y + p.height + self.pad self._tracks.append((y + self.pad / 2, (y + y2) / 2, p)) y = y2 self.height = y if self.yrange is None: self.yrange = {} for (y, ym, p) in self._tracks: self.yrange[p.tag] = max(self.yrange.get(p.tag, 0), p.range) def copy(self, **kw): new = copy.copy(self) new.policy = self.policy.copy(**kw) new._calc_tracks() return new def __getitem__(self, slice): c = copy.copy(self) c.smap = self.smap.inverse()[slice].inverse() return c def makeArtist(self, vertical=False): g = rlg2mpl.Group() for (y, ym, p) in self._tracks: smap = self.smap.inverse() for s in p.getShapes(span=(smap.Start, smap.End), rotated=vertical, height=float(p.height), yrange=self.yrange[p.tag]): trans = matplotlib.transforms.Affine2D() trans.translate(0, y) s.set_transform(s.get_transform() + trans) g.add(s) if vertical: g.rotate(90) g.scale(-1.0, 1.0) return g def asAxes(self, fig, posn, labeled=True, vertical=False): ax = fig.add_axes(posn) self.applyScaleToAxes(ax, labeled=labeled, vertical=vertical) g = self.makeArtist(vertical=vertical) ax.add_artist(g) return ax def applyScaleToAxes(self, ax, labeled=True, vertical=False): (seqaxis, trackaxis) = [ax.xaxis, ax.yaxis] if vertical: (seqaxis, trackaxis) = (trackaxis, seqaxis) if not labeled: trackaxis.set_ticks([]) else: track_positions = [] track_labels = [] for (y, ym, p) in self._tracks: if p.height > 8: track_labels.append(p.label) track_positions.append(ym) trackaxis.set_ticks(track_positions) trackaxis.set_ticklabels(track_labels) if vertical: for tick in trackaxis.get_major_ticks(): tick.label1.set_rotation('vertical') tick.label2.set_rotation('vertical') seqaxis.set_major_formatter( matplotlib.ticker.FuncFormatter(lambda x, pos: str(int(x)))) smap = self.smap.inverse() seq_lim = (smap.Start, smap.End) if vertical: ax.set_ylim(*seq_lim) ax.set_xlim(0, self.height or 0.1) else: ax.set_xlim(*seq_lim) ax.set_ylim(0, self.height or 0.1) def figureLayout(self, labeled=True, vertical=False, width=None, height=None, left=None, **kw): if left is None: if labeled and self._tracks: left = max(len(p.label) for (y, ym, p) in self._tracks) left *= 12 / 72 * .5 # guess mixed chars, 12pt, inaccurate! else: left = 0 height = height or (self.height or 0.1) / 72 useful_width = len(self) * 16 / 72 # ie bigish font, wide chars fkw = dict(leftovers=True, width=width, height=height, left=left, useful_width=useful_width, **kw) (w, h), posn, kw = rlg2mpl.figureLayout(**fkw) #points_per_base = w * posn[3] / len(self) if vertical: (w, h) = (h, w) posn[0:2] = reversed(posn[0:2]) posn[2:4] = reversed(posn[2:4]) return (w, h), posn, kw def makeFigure(self, width=None, height=None, rowlen=None, **kw): if rowlen: rows = [self[i:i + rowlen] for i in range(0, len(self), rowlen)] else: rows = [self] rowlen = len(self) kw.update(width=width, height=height) ((width, height), (x, y, w, h), kw) = self.figureLayout(**kw) N = len(rows) # since scales go below and titles go above, each row # gets the bottom margin, but not the top margin. vzoom = 1 + (y + h) * (N - 1) fig = self._makeFigure(width, height * vzoom) for (i, row) in enumerate(rows): i = len(rows) - i - 1 posn = [ x, (y + i * (y + h)) / vzoom, w * len(row) / rowlen, h / vzoom ] row.asAxes(fig, posn, **kw) return fig