Ejemplo n.º 1
0
    def testSubfeaturesAreMovedDown(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature,
        and a subfeature on the same frame, the subfeature must be plotted
        a little (0.2) below the feature.
        """

        def fetcher(title, db="database"):
            subfeature = SeqFeature(type="CDS", qualifiers={"a": ["b"]}, location=FeatureLocation(130, 150))
            feature = SeqFeature(
                type="CDS", qualifiers={"a": ["b"]}, location=FeatureLocation(100, 200), sub_features=[subfeature]
            )
            return SeqRecord(None, features=[feature])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        fig.axis = MagicMock()
        fig.legend = MagicMock()
        result = featureAdder.add(fig, "title", 0, 300, identity, sequenceFetcher=fetcher)
        self.assertEqual(
            fig.plot.call_args_list,
            [call([100, 200], [1, 1], color=ANY, linewidth=2), call([130, 150], [0.8, 0.8], color=ANY, linewidth=2)],
        )
        fig.axis.assert_called_with([0, 300, -0.5, 2.5])
        fig.legend.assert_called_with(
            ["100-200 CDS. a: b", "130-150 CDS (subfeature). a: b"],
            loc="lower center",
            shadow=True,
            ncol=2,
            fancybox=True,
            bbox_to_anchor=(0.5, 2.5),
        )
        self.assertTrue(isinstance(result, _FeatureList))
        self.assertEqual(2, len(result))
Ejemplo n.º 2
0
    def testOneFeature(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature,
        the C{text} and C{axis} methods on the figure must be called correctly
        and the C{add} call must return the sequences.
        """
        def fetcher(title, db='database'):
            location = FeatureLocation(100, 200)
            feature = SeqFeature(type='CDS', qualifiers={'a': ['b']},
                                 location=location)
            return SeqRecord(None, features=[feature])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        fig.axis = MagicMock()
        fig.legend = MagicMock()
        result = featureAdder.add(fig, 'title', 0, 300,
                                  sequenceFetcher=fetcher)
        fig.plot.assert_called_with(
            [100, 200], [1, 1],
            color=(0.2298057, 0.298717966, 0.75368315299999999, 1.0),
            linewidth=2)
        fig.axis.assert_called_with([0, 300, -0.5, 2.5])
        fig.legend.assert_called_with(
            ['100-200 CDS. a: b'], loc='lower center',
            shadow=True, ncol=2, fancybox=True,
            bbox_to_anchor=(0.5, 2.5))
        self.assertTrue(isinstance(result, FeatureList))
        self.assertEqual(1, len(result))
Ejemplo n.º 3
0
    def testOneFeatureAdjusted(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature,
        the C{text} and C{axis} methods on the figure must be called correctly
        and the C{add} call must return the sequences.

        Note that offsets in the legend of nucleotide plots are adjusted. They
        shouldn't be as the adjusted offsets make no sense to the reader.
        """

        def fetcher(title, db="database"):
            location = FeatureLocation(100, 200)
            feature = SeqFeature(type="CDS", qualifiers={"a": ["b"]}, location=location)
            return SeqRecord(None, features=[feature])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        fig.axis = MagicMock()
        fig.legend = MagicMock()
        adjuster = lambda x: 3 * x
        result = featureAdder.add(fig, "title", 0, 300, adjuster, sequenceFetcher=fetcher)
        fig.plot.assert_called_with(
            [300, 600], [0, 0], color=(0.2298057, 0.298717966, 0.75368315299999999, 1.0), linewidth=2
        )
        fig.axis.assert_called_with([0, 300, -0.5, 2.5])
        fig.legend.assert_called_with(
            ["100-200 CDS. a: b"], loc="lower center", shadow=True, ncol=2, fancybox=True, bbox_to_anchor=(0.5, 2.5)
        )
        self.assertTrue(isinstance(result, _FeatureList))
        self.assertEqual(1, len(result))
Ejemplo n.º 4
0
    def testOneFeature(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature,
        the C{text} and C{axis} methods on the figure must be called correctly
        and the C{add} call must return the sequences.
        """
        def fetcher(title, db='database'):
            location = FeatureLocation(100, 200)
            feature = SeqFeature(type='CDS', qualifiers={'a': ['b']},
                                 location=location)
            return SeqRecord(None, features=[feature])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        fig.axis = MagicMock()
        fig.legend = MagicMock()
        result = featureAdder.add(fig, 'title', 0, 300,
                                  sequenceFetcher=fetcher)
        fig.plot.assert_called_with(
            [100, 200], [1, 1],
            color=(0.2298057, 0.298717966, 0.75368315299999999, 1.0),
            linewidth=2)
        fig.axis.assert_called_with([0, 300, -0.5, 2.5])
        fig.legend.assert_called_with(
            ['100-200 CDS. a: b'], loc='lower center',
            shadow=True, ncol=2, fancybox=True,
            bbox_to_anchor=(0.5, 2.5))
        self.assertTrue(isinstance(result, FeatureList))
        self.assertEqual(1, len(result))
Ejemplo n.º 5
0
    def testUnwantedFeature(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature
        whose type is not wanted, the figure's plot method must not be called
        and the C{add} method must return an empty feature list.
        """

        def fetcher(title, db="database"):
            location = FeatureLocation(100, 200)
            feature = SeqFeature(type="unwanted", qualifiers={"a": ["b"]}, location=location)
            return SeqRecord(None, features=[feature])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        result = featureAdder.add(fig, "title", 0, 300, identity, sequenceFetcher=fetcher)
        self.assertEqual([], fig.plot.call_args_list)
        self.assertEqual([], result)
Ejemplo n.º 6
0
    def testUnwantedFeature(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature
        whose type is not wanted, the figure's plot method must not be called
        and the C{add} method must return an empty feature list.
        """
        def fetcher(title, db='database'):
            location = FeatureLocation(100, 200)
            feature = SeqFeature(type='unwanted', qualifiers={'a': ['b']},
                                 location=location)
            return SeqRecord(None, features=[feature])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        result = featureAdder.add(fig, 'title', 0, 300,
                                  sequenceFetcher=fetcher)
        self.assertEqual([], fig.plot.call_args_list)
        self.assertEqual([], result)
Ejemplo n.º 7
0
    def testSubfeaturesAreMovedDown(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature,
        and a subfeature on the same frame, the subfeature must be plotted
        a little (0.2) below the feature.
        """
        def fetcher(title, db='database'):
            subfeature = SeqFeature(type='CDS',
                                    qualifiers={'a': ['b']},
                                    location=FeatureLocation(130, 150))
            feature = SeqFeature(type='CDS',
                                 qualifiers={'a': ['b']},
                                 location=FeatureLocation(100, 200),
                                 sub_features=[subfeature])
            return SeqRecord(None, features=[feature])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        fig.axis = MagicMock()
        fig.legend = MagicMock()
        result = featureAdder.add(fig,
                                  'title',
                                  0,
                                  300,
                                  sequenceFetcher=fetcher)
        self.assertEqual(fig.plot.call_args_list, [
            call([100, 200], [1, 1], color=ANY, linewidth=2),
            call([130, 150], [0.8, 0.8], color=ANY, linewidth=2),
        ])
        fig.axis.assert_called_with([0, 300, -0.5, 2.5])
        fig.legend.assert_called_with(
            ['100-200 CDS. a: b', '130-150 CDS (subfeature). a: b'],
            loc='lower center',
            shadow=True,
            ncol=2,
            fancybox=True,
            bbox_to_anchor=(0.5, 2.5))
        self.assertTrue(isinstance(result, FeatureList))
        self.assertEqual(2, len(result))
Ejemplo n.º 8
0
    def testPolyproteinsAreMovedUp(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature,
        that's a polyprotein, the feature must be plotted a little (0.2) above
        its normal location.
        """
        def fetcher(title, db='database'):
            feature1 = SeqFeature(type='CDS',
                                  qualifiers={'product': ['a polyprotein']},
                                  location=FeatureLocation(100, 200))
            feature2 = SeqFeature(type='CDS',
                                  qualifiers={'a': ['b']},
                                  location=FeatureLocation(130, 150))
            return SeqRecord(None, features=[feature1, feature2])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111, label=_randomLabel())
        fig.plot = MagicMock()
        fig.axis = MagicMock()
        fig.legend = MagicMock()
        result = featureAdder.add(fig,
                                  'title',
                                  0,
                                  300,
                                  sequenceFetcher=fetcher)
        self.assertEqual(fig.plot.call_args_list, [
            call([100, 200], [1.2, 1.2], color=ANY, linewidth=2),
            call([130, 150], [1.0, 1.0], color=ANY, linewidth=2),
        ])
        fig.axis.assert_called_with([0, 300, -0.5, 2.5])
        fig.legend.assert_called_with(
            ['100-200 CDS. product: a polyprotein', '130-150 CDS. a: b'],
            loc='lower center',
            shadow=True,
            ncol=2,
            fancybox=True,
            bbox_to_anchor=(0.5, 2.5))
        self.assertTrue(isinstance(result, FeatureList))
        self.assertEqual(2, len(result))
Ejemplo n.º 9
0
    def testPolyproteinsAreMovedUp(self):
        """
        If the sequence fetcher used by a L{_FeatureAdder} returns a feature,
        that's a polyprotein, the feature must be plotted a little (0.2) above
        its normal location.
        """

        def fetcher(title, db="database"):
            feature1 = SeqFeature(
                type="CDS", qualifiers={"product": ["a polyprotein"]}, location=FeatureLocation(100, 200)
            )
            feature2 = SeqFeature(type="CDS", qualifiers={"a": ["b"]}, location=FeatureLocation(130, 150))
            return SeqRecord(None, features=[feature1, feature2])

        featureAdder = NucleotideFeatureAdder()
        fig = plt.subplot(111)
        fig.plot = MagicMock()
        fig.axis = MagicMock()
        fig.legend = MagicMock()
        result = featureAdder.add(fig, "title", 0, 300, identity, sequenceFetcher=fetcher)
        self.assertEqual(
            fig.plot.call_args_list,
            [
                call([100, 200], [1.2, 1.2], color=ANY, linewidth=2),
                call([130, 150], [1.0, 1.0], color=ANY, linewidth=2),
            ],
        )
        fig.axis.assert_called_with([0, 300, -0.5, 2.5])
        fig.legend.assert_called_with(
            ["100-200 CDS. product: a polyprotein", "130-150 CDS. a: b"],
            loc="lower center",
            shadow=True,
            ncol=2,
            fancybox=True,
            bbox_to_anchor=(0.5, 2.5),
        )
        self.assertTrue(isinstance(result, _FeatureList))
        self.assertEqual(2, len(result))
Ejemplo n.º 10
0
def alignmentGraph(titlesAlignments, title, addQueryLines=True,
                   showFeatures=True, logLinearXAxis=False,
                   logBase=DEFAULT_LOG_LINEAR_X_AXIS_BASE, rankScores=False,
                   colorQueryBases=False, createFigure=True, showFigure=True,
                   readsAx=None, imageFile=None, quiet=False, idList=False,
                   xRange='subject', showOrfs=True):
    """
    Align a set of matching reads against a BLAST or DIAMOND hit.

    @param titlesAlignments: A L{dark.titles.TitlesAlignments} instance.
    @param title: A C{str} sequence title that was matched. We plot the
        reads that hit this title.
    @param addQueryLines: if C{True}, draw query lines in full (these will then
        be partly overdrawn by the HSP match against the subject). These are
        the 'whiskers' that potentially protrude from each side of a query.
    @param showFeatures: if C{True}, look online for features of the subject
        sequence (given by hitId).
    @param logLinearXAxis: if C{True}, convert read offsets so that empty
        regions in the plot we're preparing will only be as wide as their
        logged actual values.
    @param logBase: The base of the logarithm to use if logLinearXAxis is
        C{True}.
    @param: rankScores: If C{True}, change the e-values and bit scores for the
        reads for each title to be their rank (worst to best).
    @param colorQueryBases: if C{True}, color each base of a query string. If
        C{True}, then addQueryLines is meaningless since the whole query is
        shown colored.
    @param createFigure: If C{True}, create a figure and give it a title.
    @param showFigure: If C{True}, show the created figure. Set this to
        C{False} if you're creating a panel of figures or just want to save an
        image (with C{imageFile}).
    @param readsAx: If not None, use this as the subplot for displaying reads.
    @param imageFile: If not None, specifies a filename to write the image to.
    @param quiet: If C{True}, don't print progress / timing output.
    @param idList: a dictionary. The keys is a color and the values is a list
        of read identifiers that should be colored in the respective color.
    @param xRange: set to either 'subject' or 'reads' to indicate the range of
        the X axis.
    @param showOrfs: If C{True}, open reading frames will be displayed.
    """

    startTime = time()

    assert xRange in ('subject', 'reads'), (
        'xRange must be either "subject" or "reads".')

    if createFigure:
        width = 20
        figure = plt.figure(figsize=(width, 20))

    createdReadsAx = readsAx is None

    if showFeatures:
        if showOrfs:
            gs = gridspec.GridSpec(4, 1, height_ratios=[3, 1, 1, 12])
            featureAx = plt.subplot(gs[0, 0])
            orfAx = plt.subplot(gs[1, 0])
            orfReversedAx = plt.subplot(gs[2, 0])
            readsAx = readsAx or plt.subplot(gs[3, 0])
        else:
            gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1])
            featureAx = plt.subplot(gs[0, 0])
            readsAx = readsAx or plt.subplot(gs[1, 0])
    else:
        if showOrfs:
            gs = gridspec.GridSpec(3, 1, height_ratios=[1, 1, 12])
            orfAx = plt.subplot(gs[0, 0])
            orfReversedAx = plt.subplot(gs[1, 0])
            readsAx = readsAx or plt.subplot(gs[2, 0])
        else:
            readsAx = readsAx or plt.subplot(111)

    # Make a deep copy of the title alignments. We're potentially going to
    # change the HSP scores, the X axis offsets, etc., and we don't want to
    # interfere with the data we were passed.
    titleAlignments = deepcopy(titlesAlignments[title])

    readsAlignments = titlesAlignments.readsAlignments
    subjectIsNucleotides = readsAlignments.params.subjectIsNucleotides

    if showOrfs and not subjectIsNucleotides:
        # We cannot show ORFs when displaying protein plots.
        showOrfs = False

    # Allow the class of titlesAlignments to adjust HSPs for plotting,
    # if it has a method for doing so.
    try:
        adjuster = readsAlignments.adjustHspsForPlotting
    except AttributeError:
        pass
    else:
        adjuster(titleAlignments)

    if rankScores:
        reverse = titlesAlignments.scoreClass is not HigherIsBetterScore
        for rank, hsp in enumerate(sorted(titleAlignments.hsps(),
                                   reverse=reverse), start=1):
            hsp.score.score = rank

    if logLinearXAxis:
        readIntervals = ReadIntervals(titleAlignments.subjectLength)
        # Examine all HSPs so we can build an offset adjuster.
        for hsp in titleAlignments.hsps():
            readIntervals.add(hsp.readStartInSubject, hsp.readEndInSubject)
        # Now adjust offsets in all HSPs.
        offsetAdjuster = OffsetAdjuster(readIntervals, base=logBase)
        for hsp in titleAlignments.hsps():
            offsetAdjuster.adjustHSP(hsp)
        # A function for adjusting other offsets, below.
        adjustOffset = offsetAdjuster.adjustOffset
    else:
        def adjustOffset(offset):
            return offset

    # It would be more efficient to only walk through all HSPs once and
    # compute these values all at once, but for now this is simple and clear.
    maxY = int(ceil(titleAlignments.bestHsp().score.score))
    minY = int(titleAlignments.worstHsp().score.score)
    maxX = max(hsp.readEndInSubject for hsp in titleAlignments.hsps())
    minX = min(hsp.readStartInSubject for hsp in titleAlignments.hsps())

    if xRange == 'subject':
        # We'll display a graph for the full subject range. Adjust X axis
        # min/max to make sure we cover at least zero to the sequence length.
        maxX = max(titleAlignments.subjectLength, maxX)
        minX = min(0, minX)

    # Swap min & max Y values, if needed, as it's possible we are dealing
    # with LSPs but that the score adjuster made numerically greater values
    # for those that were small.
    if maxY < minY:
        (maxY, minY) = (minY, maxY)

    if logLinearXAxis:
        # Adjust minX and maxX if we have gaps at the subject start or end.
        gaps = list(readIntervals.walk())
        if gaps:
            # Check start of first gap:
            intervalType, (start, stop) = gaps[0]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(start)
                if adjustedStart < minX:
                    minX = adjustedStart
            # Check stop of last gap:
            intervalType, (start, stop) = gaps[-1]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStop = adjustOffset(stop)
                if adjustedStop > maxX:
                    maxX = adjustedStop

    # We're all set up to start plotting the graph.

    # Add light grey vertical rectangles to show the logarithmic gaps. Add
    # these first so that reads will be plotted on top of them. Only draw
    # gaps that are more than SMALLEST_LOGGED_GAP_TO_DISPLAY pixels wide as
    # we could have millions of tiny gaps for a bacteria and drawing them
    # all will be slow and only serves to make the entire background grey.
    if logLinearXAxis and len(offsetAdjuster.adjustments()) < 100:
        for (intervalType, interval) in readIntervals.walk():
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(interval[0])
                adjustedStop = adjustOffset(interval[1])
                width = adjustedStop - adjustedStart
                if width >= SMALLEST_LOGGED_GAP_TO_DISPLAY:
                    readsAx.axvspan(adjustedStart, adjustedStop,
                                    color='#f4f4f4')

    if colorQueryBases:
        # Color each query by its bases.
        xScale = 3
        yScale = 2
        baseImage = BaseImage(
            maxX - minX, maxY - minY + (1 if rankScores else 0),
            xScale, yScale)
        for alignment in titleAlignments:
            for hsp in alignment.hsps:
                y = hsp.score.score - minY
                # If the product of the subject and read frame values is +ve,
                # then they're either both +ve or both -ve, so we just use the
                # read as is. Otherwise, we need to reverse complement it.
                if hsp.subjectFrame * hsp.readFrame > 0:
                    query = alignment.read.sequence
                else:
                    # One of the subject or query has negative sense.
                    query = alignment.read.reverseComplement().sequence
                readStartInSubject = hsp.readStartInSubject
                # There are 3 parts of the query string we need to
                # display. 1) the left part (if any) before the matched
                # part of the subject.  2) the matched part (which can
                # include gaps in the query and/or subject). 3) the right
                # part (if any) after the matched part.  For each part,
                # calculate the ranges in which we have to make the
                # comparison between subject and query.

                # NOTE: never use hsp['origHsp'].gaps to calculate the number
                # of gaps, as this number contains gaps in both subject and
                # query.

                # 1. Left part:
                leftRange = hsp.subjectStart - readStartInSubject

                # 2. Match, middle part:
                middleRange = len(hsp.readMatchedSequence)

                # 3. Right part:
                # Using hsp.readEndInSubject - hsp.subjectEnd to calculate the
                # length of the right part leads to the part being too long.
                # The number of gaps needs to be subtracted to get the right
                # length.
                origQuery = hsp.readMatchedSequence.upper()
                rightRange = (hsp.readEndInSubject - hsp.subjectEnd -
                              origQuery.count('-'))

                # 1. Left part.
                xOffset = readStartInSubject - minX
                queryOffset = 0
                for queryIndex in range(leftRange):
                    color = QUERY_COLORS.get(query[queryOffset + queryIndex],
                                             DEFAULT_BASE_COLOR)
                    baseImage.set(xOffset + queryIndex, y, color)

                # 2. Match part.
                xOffset = hsp.subjectStart - minX
                xIndex = 0
                queryOffset = hsp.subjectStart - hsp.readStartInSubject
                origSubject = hsp.subjectMatchedSequence
                for matchIndex in range(middleRange):
                    if origSubject[matchIndex] == '-':
                        # A gap in the subject was needed to match the query.
                        # In our graph we keep the subject the same even in the
                        # case where BLAST opened gaps in it, so we compensate
                        # for the gap in the subject by not showing this base
                        # of the query.
                        pass
                    else:
                        if origSubject[matchIndex] == origQuery[matchIndex]:
                            # The query matched the subject at this location.
                            # Matching bases are all colored in the same
                            # 'match' color.
                            color = QUERY_COLORS['match']
                        else:
                            if origQuery[matchIndex] == '-':
                                # A gap in the query. All query gaps get the
                                # same 'gap' color.
                                color = QUERY_COLORS['gap']
                            else:
                                # Query doesn't match subject (and is not a
                                # gap).
                                color = QUERY_COLORS.get(origQuery[matchIndex],
                                                         DEFAULT_BASE_COLOR)
                        baseImage.set(xOffset + xIndex, y, color)
                        xIndex += 1

                # 3. Right part.
                xOffset = hsp.subjectEnd - minX
                backQuery = query[-rightRange:].upper()
                for queryIndex in range(rightRange):
                    color = QUERY_COLORS.get(backQuery[queryIndex],
                                             DEFAULT_BASE_COLOR)
                    baseImage.set(xOffset + queryIndex, y, color)

        readsAx.imshow(baseImage.data, aspect='auto', origin='lower',
                       interpolation='nearest',
                       extent=[minX, maxX, minY, maxY])
    else:
        # Add horizontal lines for all the query sequences. These will be the
        # grey 'whiskers' in the plots once we (below) draw the matched part
        # on top of part of them.
        if addQueryLines:
            for hsp in titleAlignments.hsps():
                y = hsp.score.score
                line = Line2D([hsp.readStartInSubject, hsp.readEndInSubject],
                              [y, y], color='#aaaaaa')
                readsAx.add_line(line)

        # Add the horizontal BLAST alignment lines.

        # If an idList is given set things up to look up read colors.
        readColor = {}
        if idList:
            for color, reads in idList.items():
                for read in reads:
                    if read in readColor:
                        raise ValueError('Read %s is specified multiple '
                                         'times in idList' % read)
                    else:
                        readColor[read] = color

        # Draw the matched region.
        for titleAlignment in titleAlignments:
            readId = titleAlignment.read.id
            for hsp in titleAlignment.hsps:
                y = hsp.score.score
                line = Line2D([hsp.subjectStart, hsp.subjectEnd], [y, y],
                              color=readColor.get(readId, 'blue'))
                readsAx.add_line(line)

    if showOrfs:
        subject = readsAlignments.getSubjectSequence(title)
        orfs.addORFs(orfAx, subject.sequence, minX, maxX, adjustOffset)
        orfs.addReversedORFs(orfReversedAx,
                             subject.reverseComplement().sequence,
                             minX, maxX, adjustOffset)

    if showFeatures:
        if subjectIsNucleotides:
            featureAdder = NucleotideFeatureAdder()
        else:
            featureAdder = ProteinFeatureAdder()

        features = featureAdder.add(featureAx, title, minX, maxX,
                                    adjustOffset)

        # If there are features and there weren't too many of them, add
        # vertical feature lines to the reads and ORF axes.
        if features and not featureAdder.tooManyFeaturesToPlot:
            for feature in features:
                start = feature.start
                end = feature.end
                color = feature.color
                readsAx.axvline(x=start, color=color)
                readsAx.axvline(x=end, color='#cccccc')
                if showOrfs:
                    orfAx.axvline(x=start, color=color)
                    orfAx.axvline(x=end, color='#cccccc')
                    orfReversedAx.axvline(x=start, color=color)
                    orfReversedAx.axvline(x=end, color='#cccccc')
    else:
        features = None

    # We'll return some information we've gathered.
    result = {
        'adjustOffset': adjustOffset,
        'features': features,
        'minX': minX,
        'maxX': maxX,
        'minY': minY,
        'maxY': maxY,
    }

    # Allow the class of titlesAlignments to add to the plot, if it has a
    # method for doing so.
    try:
        adjuster = readsAlignments.adjustPlot
    except AttributeError:
        pass
    else:
        adjuster(readsAx)

    # Titles, axis, etc.
    if createFigure:
        readCount = titleAlignments.readCount()
        hspCount = titleAlignments.hspCount()
        figure.suptitle(
            '%s\nLength %d %s, %d read%s, %d HSP%s.' %
            (
                fill(titleAlignments.subjectTitle, 80),
                titleAlignments.subjectLength,
                'nt' if subjectIsNucleotides else 'aa',
                readCount, '' if readCount == 1 else 's',
                hspCount, '' if hspCount == 1 else 's'
            ),
            fontsize=20)

    # Add a title and y-axis label, but only if we made the reads axes.
    if createdReadsAx:
        readsAx.set_title('Read alignments', fontsize=20)
        ylabel = readsAlignments.params.scoreTitle
        if rankScores:
            ylabel += ' rank'
        plt.ylabel(ylabel, fontsize=17)

    # Set the x-axis limits.
    readsAx.set_xlim([minX - 1, maxX + 1])

    readsAx.set_ylim([0, int(maxY * Y_AXIS_UPPER_PADDING)])
    readsAx.grid()
    if createFigure:
        if showFigure:
            plt.show()
        if imageFile:
            figure.savefig(imageFile)
    stop = time()
    if not quiet:
        report('Graph generated in %.3f mins.' % ((stop - startTime) / 60.0))

    return result
Ejemplo n.º 11
0
def alignmentGraph(titlesAlignments,
                   title,
                   accession,
                   addQueryLines=True,
                   showFeatures=True,
                   logLinearXAxis=False,
                   logBase=DEFAULT_LOG_LINEAR_X_AXIS_BASE,
                   rankScores=False,
                   createFigure=True,
                   showFigure=True,
                   readsAx=None,
                   imageFile=None,
                   quiet=False,
                   idList=False,
                   xRange='subject'):
    """
    Align a set of matching reads against a BLAST or DIAMOND hit.

    @param titlesAlignments: A L{dark.titles.TitlesAlignments} instance.
    @param title: A C{str} sequence title that was matched. We plot the
        reads that hit this title.
    @param accession: The C{str} accession number of the matched title.
    @param addQueryLines: if C{True}, draw query lines in full (these will then
        be partly overdrawn by the HSP match against the subject). These are
        the 'whiskers' that potentially protrude from each side of a query.
    @param showFeatures: if C{True}, look online for features of the subject
        sequence (given by hitId).
    @param logLinearXAxis: if C{True}, convert read offsets so that empty
        regions in the plot we're preparing will only be as wide as their
        logged actual values.
    @param logBase: The base of the logarithm to use if logLinearXAxis is
        C{True}.
    @param: rankScores: If C{True}, change the e-values and bit scores for the
        reads for each title to be their rank (worst to best).
    @param createFigure: If C{True}, create a figure and give it a title.
    @param showFigure: If C{True}, show the created figure. Set this to
        C{False} if you're creating a panel of figures or just want to save an
        image (with C{imageFile}).
    @param readsAx: If not None, use this as the subplot for displaying reads.
    @param imageFile: If not None, specifies a filename to write the image to.
    @param quiet: If C{True}, don't print progress / timing output.
    @param idList: a dictionary. The keys is a color and the values is a list
        of read identifiers that should be colored in the respective color.
    @param xRange: set to either 'subject' or 'reads' to indicate the range of
        the X axis.
    """

    startTime = time()

    assert xRange in ('subject',
                      'reads'), ('xRange must be either "subject" or "reads".')

    if createFigure:
        width = 20
        figure = plt.figure(figsize=(width, 20))

    createdReadsAx = readsAx is None

    if showFeatures:
        gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1])
        featureAx = plt.subplot(gs[0, 0])
        readsAx = readsAx or plt.subplot(gs[1, 0])
    else:
        readsAx = readsAx or plt.subplot(111)

    # Make a deep copy of the title alignments. We're potentially going to
    # change the HSP scores, the X axis offsets, etc., and we don't want to
    # interfere with the data we were passed.
    titleAlignments = deepcopy(titlesAlignments[title])

    readsAlignments = titlesAlignments.readsAlignments
    subjectIsNucleotides = readsAlignments.params.subjectIsNucleotides

    # Allow the class of titlesAlignments to adjust HSPs for plotting,
    # if it has a method for doing so.
    try:
        adjuster = readsAlignments.adjustHspsForPlotting
    except AttributeError:
        pass
    else:
        adjuster(titleAlignments)

    if rankScores:
        reverse = titlesAlignments.scoreClass is not HigherIsBetterScore
        for rank, hsp in enumerate(sorted(titleAlignments.hsps(),
                                          reverse=reverse),
                                   start=1):
            hsp.score.score = rank

    if logLinearXAxis:
        readIntervals = ReadIntervals(titleAlignments.subjectLength)
        # Examine all HSPs so we can build an offset adjuster.
        for hsp in titleAlignments.hsps():
            readIntervals.add(hsp.readStartInSubject, hsp.readEndInSubject)
        # Now adjust offsets in all HSPs.
        offsetAdjuster = OffsetAdjuster(readIntervals, base=logBase)
        for hsp in titleAlignments.hsps():
            offsetAdjuster.adjustHSP(hsp)
        # A function for adjusting other offsets, below.
        adjustOffset = offsetAdjuster.adjustOffset
    else:

        def adjustOffset(offset):
            return offset

    # It would be more efficient to only walk through all HSPs once and
    # compute these values all at once, but for now this is simple and clear.
    maxY = int(ceil(titleAlignments.bestHsp().score.score))
    minY = int(titleAlignments.worstHsp().score.score)
    maxX = max(hsp.readEndInSubject for hsp in titleAlignments.hsps())
    minX = min(hsp.readStartInSubject for hsp in titleAlignments.hsps())

    if xRange == 'subject':
        # We'll display a graph for the full subject range. Adjust X axis
        # min/max to make sure we cover at least zero to the sequence length.
        maxX = max(titleAlignments.subjectLength, maxX)
        minX = min(0, minX)

    # Swap min & max Y values, if needed, as it's possible we are dealing
    # with LSPs but that the score adjuster made numerically greater values
    # for those that were small.
    if maxY < minY:
        (maxY, minY) = (minY, maxY)

    if logLinearXAxis:
        # Adjust minX and maxX if we have gaps at the subject start or end.
        gaps = list(readIntervals.walk())
        if gaps:
            # Check start of first gap:
            intervalType, (start, stop) = gaps[0]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(start)
                if adjustedStart < minX:
                    minX = adjustedStart
            # Check stop of last gap:
            intervalType, (start, stop) = gaps[-1]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStop = adjustOffset(stop)
                if adjustedStop > maxX:
                    maxX = adjustedStop

    # We're all set up to start plotting the graph.

    # Add light grey vertical rectangles to show the logarithmic gaps. Add
    # these first so that reads will be plotted on top of them. Only draw
    # gaps that are more than SMALLEST_LOGGED_GAP_TO_DISPLAY pixels wide as
    # we could have millions of tiny gaps for a bacteria and drawing them
    # all will be slow and only serves to make the entire background grey.
    if logLinearXAxis and len(offsetAdjuster.adjustments()) < 100:
        for (intervalType, interval) in readIntervals.walk():
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(interval[0])
                adjustedStop = adjustOffset(interval[1])
                width = adjustedStop - adjustedStart
                if width >= SMALLEST_LOGGED_GAP_TO_DISPLAY:
                    readsAx.axvspan(adjustedStart,
                                    adjustedStop,
                                    color='#f4f4f4')
    else:
        # Add horizontal lines for all the query sequences. These will be the
        # grey 'whiskers' in the plots once we (below) draw the matched part
        # on top of part of them.
        if addQueryLines:
            for hsp in titleAlignments.hsps():
                y = hsp.score.score
                line = Line2D([hsp.readStartInSubject, hsp.readEndInSubject],
                              [y, y],
                              color='#aaaaaa')
                readsAx.add_line(line)

        # Add the horizontal BLAST alignment lines.

        # If an idList is given set things up to look up read colors.
        readColor = {}
        if idList:
            for color, reads in idList.items():
                for read in reads:
                    if read in readColor:
                        raise ValueError('Read %s is specified multiple '
                                         'times in idList' % read)
                    else:
                        readColor[read] = color

        # Draw the matched region.
        for titleAlignment in titleAlignments:
            readId = titleAlignment.read.id
            for hsp in titleAlignment.hsps:
                y = hsp.score.score
                line = Line2D([hsp.subjectStart, hsp.subjectEnd], [y, y],
                              color=readColor.get(readId, 'blue'))
                readsAx.add_line(line)

    if showFeatures:
        if subjectIsNucleotides:
            featureAdder = NucleotideFeatureAdder()
        else:
            featureAdder = ProteinFeatureAdder()

        features = featureAdder.add(featureAx, title, minX, maxX, adjustOffset)

        # If there are features and there weren't too many of them, add
        # vertical feature lines to the reads and ORF axes.
        if features and not featureAdder.tooManyFeaturesToPlot:
            for feature in features:
                start = feature.start
                end = feature.end
                color = feature.color
                readsAx.axvline(x=start, color=color)
                readsAx.axvline(x=end, color='#cccccc')
    else:
        features = None

    # We'll return some information we've gathered.
    result = {
        'adjustOffset': adjustOffset,
        'features': features,
        'minX': minX,
        'maxX': maxX,
        'minY': minY,
        'maxY': maxY,
    }

    # Allow the class of titlesAlignments to add to the plot, if it has a
    # method for doing so.
    try:
        adjuster = readsAlignments.adjustPlot
    except AttributeError:
        pass
    else:
        adjuster(readsAx)

    # Titles, axis, etc.
    if createFigure:
        readCount = titleAlignments.readCount()
        hspCount = titleAlignments.hspCount()
        figure.suptitle(
            '%s (%s)\nLength %d %s, %d read%s, %d HSP%s.' %
            (fill(titleAlignments.subjectTitle,
                  80), accession, titleAlignments.subjectLength,
             'nt' if subjectIsNucleotides else 'aa', readCount, '' if readCount
             == 1 else 's', hspCount, '' if hspCount == 1 else 's'),
            fontsize=20)

    # Add a title and y-axis label, but only if we made the reads axes.
    if createdReadsAx:
        readsAx.set_title('Read alignments', fontsize=20)
        ylabel = readsAlignments.params.scoreTitle
        if rankScores:
            ylabel += ' rank'
        plt.ylabel(ylabel, fontsize=17)

    # Set the x-axis limits.
    readsAx.set_xlim([minX - 1, maxX + 1])

    readsAx.set_ylim([0, int(maxY * Y_AXIS_UPPER_PADDING)])
    readsAx.grid()
    if createFigure:
        if showFigure:
            plt.show()
        if imageFile:
            figure.savefig(imageFile)
    stop = time()
    if not quiet:
        report('Graph generated in %.3f mins.' % ((stop - startTime) / 60.0))

    return result