Beispiel #1
0
 def _get_intron_transcript_records(self):
     if len(self.Exons) < 2:
         self._set_null_values(["Introns"])
         return
     
     exon_positions = [(exon.Location.Start, exon.Location.End) 
                         for exon in self.Exons]
     exon_positions.sort()
     end = exon_positions[-1][-1]
     exon_map = Map(locations=exon_positions, parent_length=end)
     intron_map = exon_map.shadow()
     
     intron_positions = [(span.Start, span.End) 
                         for span in intron_map.spans if span.Start != 0]
     
     chrom = self.Location.CoordName
     strand = self.Location.Strand
     introns = []
     rank = 1
     if strand == -1:
         intron_positions.reverse()
     for s, e in intron_positions:
         coord = self.genome.makeLocation(CoordName=chrom, Start=s, End=e, 
                                         Strand=strand, ensembl_coord=False)
         introns.append(Intron(self.genome, self.db, rank, self.StableId,
                                 coord))
         rank += 1
     
     self._cached['Introns'] = tuple(introns)
Beispiel #2
0
 def getAnnotatedSeq(self, feature_types=None, where_feature=None):
     regions = list(self.getFeatures(feature_types = feature_types, 
                     where_feature = where_feature))
     # seq_map is on the + strand, regardless the actual strand of sequence
     seq_map = Map(locations = [(self.Location.Start, self.Location.End)], 
                 parent_length = DEFAULT_PARENT_LENGTH)
     seq_map = seq_map.inverse()
     
     for region in regions:
         data = region.featureData(seq_map)
         if data is None:
             continue
         # this will consider the strand information of actual sequence
         feature_map = [data[-1],
                 data[-1].nucleicReversed()][self.Location.Strand == -1]
         self.Seq.addAnnotation(Feature, data[0], data[1], feature_map)
         
         if region.Type == 'gene':  # TODO: SHOULD be much simplified
             sub_data = region.subFeatureData(seq_map)
             for feature_type, feature_name, feature_map in sub_data:
                 if self.Location.Strand == -1:
                     # again, change feature map to -1 strand sequence if
                     # needed.
                     feature_map = feature_map.nucleicReversed()
                 self.Seq.addAnnotation(Feature, feature_type, 
                                         feature_name, feature_map)
     
     return self.Seq
Beispiel #3
0
 def test_map(self):
     """reversing a map with multiple spans should preserve span relative
     order"""
     forward = [Span(20, 30), Span(40, 50)]
     fmap = Map(spans=forward, parent_length=100)
     fmap_reversed = fmap.nucleicReversed()
     reverse = [Span(70, 80, Reverse=True), Span(50, 60, Reverse=True)]
     rmap = Map(spans=reverse, parent_length=100)
     for i in range(2):
         self.assertEquals(fmap_reversed.spans[i], rmap.spans[i])
Beispiel #4
0
 def test_map(self):
     """reversing a map with multiple spans should preserve span relative
     order"""
     forward = [Span(20,30), Span(40,50)]
     fmap = Map(spans=forward, parent_length=100)
     fmap_reversed = fmap.nucleicReversed()
     reverse = [Span(70,80, Reverse=True), Span(50,60, Reverse=True)]
     rmap = Map(spans=reverse, parent_length=100)
     for i in range(2):
         self.assertEquals(fmap_reversed.spans[i], rmap.spans[i])
Beispiel #5
0
 def test_get_coords(self):
     """getCoordinates should return raw coordinates matching input"""
     spans = [(0,9), (20, 32)]
     map = Map(spans, parent_length=100)
     coords = map.getCoordinates()
     self.assertEqual(coords, spans)
     
     # should work for reversed Maps too
     spans = [(32, 20), (9, 0)]
     map = Map(spans, parent_length=100)
     coords = map.getCoordinates()
     self.assertEqual(coords, spans)
Beispiel #6
0
    def __init__(self, base, policy=DisplayPolicy, _policy=None, pad=1,
            yrange=None, **kw):
        self.pad = pad
        self.base = base
        self.yrange = yrange
        assert len(base) > 0, len(base)
        
        if _policy is None:
            policy = policy(**kw).copy(
                map=Map([(0, len(base))], parent_length=len(base)),
                depth=0, 
                rowlen=len(base))
        else:
            policy = _policy
        self.policy = policy
        self.smap=Map([(0, len(base))], parent_length=len(base))

        self._calc_tracks()
Beispiel #7
0
    def __init__(self, base, policy=DisplayPolicy, _policy=None, pad=1, yrange=None, **kw):
        self.pad = pad
        self.base = base
        self.yrange = yrange
        assert len(base) > 0, len(base)

        if _policy is None:
            policy = policy(**kw).copy(map=Map([(0, len(base))], parent_length=len(base)), depth=0, rowlen=len(base))
        else:
            policy = _policy
        self.policy = policy
        self.smap = Map([(0, len(base))], parent_length=len(base))

        self._calc_tracks()
Beispiel #8
0
def _remap(map):
    start = map.Start
    if start == 0:
        new_map = map
        new_map.parent_length = map.End
    else:
        spans = []
        for span in map.spans:
            if span.lost:
                spans.append(span)
            else:
                span.Start = span.Start - start
                span.End = span.End - start
                length = span.End
                spans.append(span)
        new_map = Map(spans=spans, parent_length=length)
    return new_map
Beispiel #9
0
def cigar_to_map(cigar_text):
    """convert cigar string into Map"""
    assert 'I' not in cigar_text
    spans, posn = [], 0
    for n, c in pattern.findall(cigar_text):
        if n:
            n = int(n)
        else:
            n = 1

        if c == 'M':
            spans.append(Span(posn, posn + n))
            posn += n
        else:
            spans.append(LostSpan(n))
    map = Map(spans=spans, parent_length=posn)
    return map
Beispiel #10
0
    def test_get_coords(self):
        """getCoordinates should return raw coordinates matching input"""
        spans = [(0, 9), (20, 32)]
        map = Map(spans, parent_length=100)
        coords = map.getCoordinates()
        self.assertEqual(coords, spans)

        # should work for reversed Maps too
        spans = [(32, 20), (9, 0)]
        map = Map(spans, parent_length=100)
        coords = map.getCoordinates()
        self.assertEqual(coords, spans)
Beispiel #11
0
 def test_spans(self):
     # a simple two part map of length 10
     map = Map([(0, 5), (5, 10)], parent_length=10)
     # try different spans on the above map
     for ((start, end), expected) in [
         ((0, 4), "[0:4]"),
         ((0, 5), "[0:5]"),
         ((0, 6), "[0:5, 5:6]"),
         ((5, 10), "[5:10]"),
         ((-1, 10), "[-1-, 0:5, 5:10]"),
         ((5, 11), "[5:10, -1-]"),
         ((0, 10), "[0:5, 5:10]"),
         ((10, 0), "[10:5, 5:0]"),
     ]:
         r = repr(Span(start, end, Reverse=start > end).remapWith(map))
         #print (start, end), r,
         if r != expected:
             self.fail(repr((r, expected)))
Beispiel #12
0
class Display(rlg2mpl.Drawable):
    """Holds a list of tracks and displays them all aligned
    
    base: A sequence, alignment, or anything else offering .getTracks(policy)
    policy: A DisplayPolicy subclass.
    pad: Gap between tracks in points.
    
    Other keyword arguments are used to modify the DisplayPolicy: 
    
    Sequence display:
    show_text: Represent bases as characters.  Slow.
    draw_bases: Represent bases as rectangles if MolType allows.
    show_gaps: Represent bases as line segments.
    colour_sequences: Colour code sequences if MolType allows.
    seq_color_callback: f(seq)->[colours] for flexible seq coloring.
    
    Layout:
    rowlen: wrap at this many characters per line.
    min_feature_height: minimum feature symbol height in points.
    min_graph_height: minimum height of any graphed features in points.
    
    Inclusion:
    recursive: include the sequences of the alignment.
    ignored_features: list of feature type tags to leave out.
    keep_unexpected_tracks: show features not assigned to a track by the policy.
    """
    
    def __init__(self, base, policy=DisplayPolicy, _policy=None, pad=1,
            yrange=None, **kw):
        self.pad = pad
        self.base = base
        self.yrange = yrange
        assert len(base) > 0, len(base)
        
        if _policy is None:
            policy = policy(**kw).copy(
                map=Map([(0, len(base))], parent_length=len(base)),
                depth=0, 
                rowlen=len(base))
        else:
            policy = _policy
        self.policy = policy
        self.smap=Map([(0, len(base))], parent_length=len(base))

        self._calc_tracks()
    
    def __len__(self):
        return len(self.smap.inverse())
    
    def _calc_tracks(self):
        y = 0
        self._tracks = []
        for p in self.base.getTracks(self.policy)[::-1]:
            if not isinstance(p, Track):
                if not isinstance(p, list):
                    p = [p]
                p = Track('', p)
            y2 = y + p.height + self.pad
            self._tracks.append((y+self.pad/2, (y+y2)/2, p))
            y = y2
        self.height = y
        
        if self.yrange is None:
            self.yrange = {}
            for (y, ym, p) in self._tracks:
                self.yrange[p.tag] = max(self.yrange.get(p.tag, 0), p.range)
        
    def copy(self, **kw):
        new = copy.copy(self)
        new.policy = self.policy.copy(**kw)
        new._calc_tracks()
        return new
    
    def __getitem__(self, slice):
        c = copy.copy(self)
        c.smap = self.smap.inverse()[slice].inverse()
        return c
        
    def makeArtist(self, vertical=False):
        g = rlg2mpl.Group()
        for (y, ym, p) in self._tracks:
            smap = self.smap.inverse()
            for s in p.getShapes(
                    span=(smap.Start, smap.End),
                    rotated=vertical,
                    height=float(p.height), 
                    yrange=self.yrange[p.tag]):
                trans = matplotlib.transforms.Affine2D()
                trans.translate(0, y)
                s.set_transform(s.get_transform() + trans)
                g.add(s)
        if vertical:
            g.rotate(90)
            g.scale(-1.0, 1.0)
        return g
    
    def asAxes(self, fig, posn, labeled=True, vertical=False):
        ax = fig.add_axes(posn)
        self.applyScaleToAxes(ax, labeled=labeled, vertical=vertical)
        g = self.makeArtist(vertical=vertical)
        ax.add_artist(g)
        return ax
        
    def applyScaleToAxes(self, ax, labeled=True, vertical=False):
        (seqaxis, trackaxis) = [ax.xaxis, ax.yaxis]
        if vertical:
            (seqaxis, trackaxis) = (trackaxis, seqaxis) 

        if not labeled:
            trackaxis.set_ticks([])
        else:
            track_positions = []
            track_labels = []
            for (y, ym, p) in self._tracks:
                if p.height > 8:
                    track_labels.append(p.label)
                    track_positions.append(ym)
            trackaxis.set_ticks(track_positions)
            trackaxis.set_ticklabels(track_labels)
            if vertical:
                for tick in trackaxis.get_major_ticks():
                    tick.label1.set_rotation('vertical')
                    tick.label2.set_rotation('vertical')
            
        seqaxis.set_major_formatter(
            matplotlib.ticker.FuncFormatter(lambda x,pos:str(int(x))))
        
        smap = self.smap.inverse()
        seq_lim = (smap.Start, smap.End)
        if vertical:
            ax.set_ylim(*seq_lim)
            ax.set_xlim(0, self.height or 0.1)
        else:
            ax.set_xlim(*seq_lim)
            ax.set_ylim(0, self.height or 0.1)
    
    def figureLayout(self, labeled=True, vertical=False, width=None, 
            height=None, left=None, **kw):

        if left is None:
            if labeled and self._tracks:
                left = max(len(p.label) for (y, ym, p) in self._tracks)
                left *= 12/72 * .5 # guess mixed chars, 12pt, inaccurate!
            else:
                left = 0
        
        height = height or (self.height or 0.1) / 72
        
        useful_width = len(self)*16/72 # ie bigish font, wide chars
        
        fkw = dict(leftovers=True, width=width, height=height, left=left, 
                useful_width=useful_width, **kw)  
        (w,h),posn,kw = rlg2mpl.figureLayout(**fkw)
        
        #points_per_base = w * posn[3] / len(self)
        if vertical:
            (w, h) = (h, w)
            posn[0:2] = reversed(posn[0:2])
            posn[2:4] = reversed(posn[2:4])
        return (w, h), posn, kw
    
    def makeFigure(self, width=None, height=None, rowlen=None, **kw):
        if rowlen:
            rows = [self[i:i+rowlen] for i in range(0, len(self), rowlen)]
        else:
            rows = [self]
            rowlen = len(self)
        kw.update(width=width, height=height)
        ((width, height), (x, y, w, h), kw) = self.figureLayout(**kw)
        N = len(rows)
        # since scales go below and titles go above, each row
        # gets the bottom margin, but not the top margin.
        vzoom = 1 + (y+h) * (N-1)
        fig = self._makeFigure(width, height * vzoom)
        for (i, row) in enumerate(rows):
            i = len(rows) - i - 1
            posn = [x, (y+i*(y+h))/vzoom, w*len(row)/rowlen, h/vzoom]
            row.asAxes(fig, posn, **kw)
        return fig
Beispiel #13
0
        self.assertEqual( str(answer[0]), 'TCGAT')
    
    def test_getBySequenceAnnotation(self):
        aln = LoadSeqs(data={
                'a': 'ATCGAAATCGAT',
                'b': 'ATCGA--TCGAT'})
        b = aln.getSeq('b')
        b.addAnnotation(Feature, 'test_type', 'test_label', [(4,6)])
        
        answer = aln.getBySequenceAnnotation('b', 'test_type')[0].todict()
        self.assertEqual(answer, {'b':'A--T', 'a':'AAAT'})
    

if 0:  # old, needs fixes
    # Maps
    a = Map([(10,20)], parent_length=100)
    
    for (desc, map, expected) in [
        ('a ', a,                                "Map([10:20] on base)"),
        ('i ', a.inverse(),                     "Map([-10-, 0:10, -80-] on Map([10:20] on base))"),
        ('1 ', a[5:],                            "Map([5:10] on Map([10:20] on base))"),
        ('1r', a[5:].relative_to(b),            "Map([15:20] on base)"),
        ('2 ', a[:5],                            "Map([0:5] on Map([10:20] on base))"),
        ('2r', a[:5].relative_to(b),            "Map([10:15] on base)"),
        ('r ', a.relative_to(a[5:]),            "Map([-5-, 0:5] on Map([5:10] on Map([10:20] on base)))"),
        ('r ', a[2:4].relative_to(a[2:6]),      "Map([0:2] on Map([2:6] on Map([10:20] on base)))"),
        ('r ', a[2:4].relative_to(a[2:6][0:3]), "Map([0:2] on Map([0:3] on Map([2:6] on Map([10:20] on base))))")]:
        print desc, repr(map),
        if repr(map) == expected:
            print
        else:
Beispiel #14
0
            sliced_seq.getByAnnotation('test_type', ignore_partial=True))
        self.assertEqual(len(answer), 1)
        self.assertEqual(str(answer[0]), 'TCGAT')

    def test_getBySequenceAnnotation(self):
        aln = LoadSeqs(data={'a': 'ATCGAAATCGAT', 'b': 'ATCGA--TCGAT'})
        b = aln.getSeq('b')
        b.addAnnotation(Feature, 'test_type', 'test_label', [(4, 6)])

        answer = aln.getBySequenceAnnotation('b', 'test_type')[0].todict()
        self.assertEqual(answer, {'b': 'A--T', 'a': 'AAAT'})


if 0:  # old, needs fixes
    # Maps
    a = Map([(10, 20)], parent_length=100)

    for (desc, map, expected) in [
        ('a ', a, "Map([10:20] on base)"),
        ('i ', a.inverse(), "Map([-10-, 0:10, -80-] on Map([10:20] on base))"),
        ('1 ', a[5:], "Map([5:10] on Map([10:20] on base))"),
        ('1r', a[5:].relative_to(b), "Map([15:20] on base)"),
        ('2 ', a[:5], "Map([0:5] on Map([10:20] on base))"),
        ('2r', a[:5].relative_to(b), "Map([10:15] on base)"),
        ('r ', a.relative_to(a[5:]),
         "Map([-5-, 0:5] on Map([5:10] on Map([10:20] on base)))"),
        ('r ', a[2:4].relative_to(a[2:6]),
         "Map([0:2] on Map([2:6] on Map([10:20] on base)))"),
        ('r ', a[2:4].relative_to(a[2:6][0:3]),
         "Map([0:2] on Map([0:3] on Map([2:6] on Map([10:20] on base))))")
    ]:
Beispiel #15
0
class Display(rlg2mpl.Drawable):
    """Holds a list of tracks and displays them all aligned
    
    base: A sequence, alignment, or anything else offering .getTracks(policy)
    policy: A DisplayPolicy subclass.
    pad: Gap between tracks in points.
    
    Other keyword arguments are used to modify the DisplayPolicy: 
    
    Sequence display:
    show_text: Represent bases as characters.  Slow.
    draw_bases: Represent bases as rectangles if MolType allows.
    show_gaps: Represent bases as line segments.
    colour_sequences: Colour code sequences if MolType allows.
    seq_color_callback: f(seq)->[colours] for flexible seq coloring.
    
    Layout:
    rowlen: wrap at this many characters per line.
    min_feature_height: minimum feature symbol height in points.
    min_graph_height: minimum height of any graphed features in points.
    
    Inclusion:
    recursive: include the sequences of the alignment.
    ignored_features: list of feature type tags to leave out.
    keep_unexpected_tracks: show features not assigned to a track by the policy.
    """
    def __init__(self,
                 base,
                 policy=DisplayPolicy,
                 _policy=None,
                 pad=1,
                 yrange=None,
                 **kw):
        self.pad = pad
        self.base = base
        self.yrange = yrange
        assert len(base) > 0, len(base)

        if _policy is None:
            policy = policy(**kw).copy(map=Map([(0, len(base))],
                                               parent_length=len(base)),
                                       depth=0,
                                       rowlen=len(base))
        else:
            policy = _policy
        self.policy = policy
        self.smap = Map([(0, len(base))], parent_length=len(base))

        self._calc_tracks()

    def __len__(self):
        return len(self.smap.inverse())

    def _calc_tracks(self):
        y = 0
        self._tracks = []
        for p in self.base.getTracks(self.policy)[::-1]:
            if not isinstance(p, Track):
                if not isinstance(p, list):
                    p = [p]
                p = Track('', p)
            y2 = y + p.height + self.pad
            self._tracks.append((y + self.pad / 2, (y + y2) / 2, p))
            y = y2
        self.height = y

        if self.yrange is None:
            self.yrange = {}
            for (y, ym, p) in self._tracks:
                self.yrange[p.tag] = max(self.yrange.get(p.tag, 0), p.range)

    def copy(self, **kw):
        new = copy.copy(self)
        new.policy = self.policy.copy(**kw)
        new._calc_tracks()
        return new

    def __getitem__(self, slice):
        c = copy.copy(self)
        c.smap = self.smap.inverse()[slice].inverse()
        return c

    def makeArtist(self, vertical=False):
        g = rlg2mpl.Group()
        for (y, ym, p) in self._tracks:
            smap = self.smap.inverse()
            for s in p.getShapes(span=(smap.Start, smap.End),
                                 rotated=vertical,
                                 height=float(p.height),
                                 yrange=self.yrange[p.tag]):
                trans = matplotlib.transforms.Affine2D()
                trans.translate(0, y)
                s.set_transform(s.get_transform() + trans)
                g.add(s)
        if vertical:
            g.rotate(90)
            g.scale(-1.0, 1.0)
        return g

    def asAxes(self, fig, posn, labeled=True, vertical=False):
        ax = fig.add_axes(posn)
        self.applyScaleToAxes(ax, labeled=labeled, vertical=vertical)
        g = self.makeArtist(vertical=vertical)
        ax.add_artist(g)
        return ax

    def applyScaleToAxes(self, ax, labeled=True, vertical=False):
        (seqaxis, trackaxis) = [ax.xaxis, ax.yaxis]
        if vertical:
            (seqaxis, trackaxis) = (trackaxis, seqaxis)

        if not labeled:
            trackaxis.set_ticks([])
        else:
            track_positions = []
            track_labels = []
            for (y, ym, p) in self._tracks:
                if p.height > 8:
                    track_labels.append(p.label)
                    track_positions.append(ym)
            trackaxis.set_ticks(track_positions)
            trackaxis.set_ticklabels(track_labels)
            if vertical:
                for tick in trackaxis.get_major_ticks():
                    tick.label1.set_rotation('vertical')
                    tick.label2.set_rotation('vertical')

        seqaxis.set_major_formatter(
            matplotlib.ticker.FuncFormatter(lambda x, pos: str(int(x))))

        smap = self.smap.inverse()
        seq_lim = (smap.Start, smap.End)
        if vertical:
            ax.set_ylim(*seq_lim)
            ax.set_xlim(0, self.height or 0.1)
        else:
            ax.set_xlim(*seq_lim)
            ax.set_ylim(0, self.height or 0.1)

    def figureLayout(self,
                     labeled=True,
                     vertical=False,
                     width=None,
                     height=None,
                     left=None,
                     **kw):

        if left is None:
            if labeled and self._tracks:
                left = max(len(p.label) for (y, ym, p) in self._tracks)
                left *= 12 / 72 * .5  # guess mixed chars, 12pt, inaccurate!
            else:
                left = 0

        height = height or (self.height or 0.1) / 72

        useful_width = len(self) * 16 / 72  # ie bigish font, wide chars

        fkw = dict(leftovers=True,
                   width=width,
                   height=height,
                   left=left,
                   useful_width=useful_width,
                   **kw)
        (w, h), posn, kw = rlg2mpl.figureLayout(**fkw)

        #points_per_base = w * posn[3] / len(self)
        if vertical:
            (w, h) = (h, w)
            posn[0:2] = reversed(posn[0:2])
            posn[2:4] = reversed(posn[2:4])
        return (w, h), posn, kw

    def makeFigure(self, width=None, height=None, rowlen=None, **kw):
        if rowlen:
            rows = [self[i:i + rowlen] for i in range(0, len(self), rowlen)]
        else:
            rows = [self]
            rowlen = len(self)
        kw.update(width=width, height=height)
        ((width, height), (x, y, w, h), kw) = self.figureLayout(**kw)
        N = len(rows)
        # since scales go below and titles go above, each row
        # gets the bottom margin, but not the top margin.
        vzoom = 1 + (y + h) * (N - 1)
        fig = self._makeFigure(width, height * vzoom)
        for (i, row) in enumerate(rows):
            i = len(rows) - i - 1
            posn = [
                x, (y + i * (y + h)) / vzoom, w * len(row) / rowlen, h / vzoom
            ]
            row.asAxes(fig, posn, **kw)
        return fig