예제 #1
0
 def testOneIntervalExactCovering(self):
     """
     If there is a single interval that spans the whole hit exactly, just
     that one interval should be returned by walk, and it should be full.
     """
     ri = ReadIntervals(100)
     ri.add(0, 100)
     self.assertEqual([(self.FULL, (0, 100))], list(ri.walk()))
예제 #2
0
 def testOneIntervalExactCoveringCoverage(self):
     """
     If there is a single interval that spans the whole hit exactly,
     coverage should return 1.0.
     """
     ri = ReadIntervals(100)
     ri.add(0, 100)
     self.assertEqual(1.0, ri.coverage())
예제 #3
0
 def testOneIntervalCoveringAllExtendingLeftCoverage(self):
     """
     If there is a single interval that spans the whole hit, including
     going negative to the left, coverage should return 1.0.
     """
     ri = ReadIntervals(100)
     ri.add(-10, 100)
     self.assertEqual(1.0, ri.coverage())
예제 #4
0
 def testOneIntervalCoveringAllExtendingRightCoverage(self):
     """
     If there is a single interval that spans the whole hit, including
     going beyond the hit to the right, coverage should return 1.0.
     """
     ri = ReadIntervals(100)
     ri.add(0, 110)
     self.assertEqual(1.0, ri.coverage())
예제 #5
0
 def testOneIntervalEndingAfterHitEndCoverage(self):
     """
     If there is a single interval that ends after the end of the hit
     but doesn't start at zero, coverage should return the correct value.
     """
     ri = ReadIntervals(100)
     ri.add(50, 150)
     self.assertEqual(0.5, ri.coverage())
예제 #6
0
 def testOneIntervalStartingBeforeZeroCoverage(self):
     """
     If there is a single interval that starts before zero but doesn't
     cover the whole hit, coverage should return the correct value.
     """
     ri = ReadIntervals(100)
     ri.add(-50, 50)
     self.assertEqual(0.5, ri.coverage())
예제 #7
0
 def testOneIntervalStartingAtZeroCoverageCounts(self):
     """
     If there is a single interval that starts at zero but doesn't
     cover the whole hit, coverageCounts should return the correct result.
     """
     ri = ReadIntervals(10)
     ri.add(0, 5)
     c = Counter([0, 1, 2, 3, 4])
     self.assertEqual(c, ri.coverageCounts())
예제 #8
0
 def testTwoOverlappingIntervalsInMiddle(self):
     """
     If there are two overlapping intervals in the middle of the hit, we
     should get 3 intervals back from walk: empty, full, empty.
     """
     ri = ReadIntervals(100)
     ri.add(50, 60)
     ri.add(55, 70)
     self.assertEqual([(self.EMPTY, (0, 50)), (self.FULL, (50, 70)), (self.EMPTY, (70, 100))], list(ri.walk()))
예제 #9
0
 def testOneIntervalCoveringAllExtendingBothCoverage(self):
     """
     If there is a single interval that spans the whole hit, including
     starting before zero and also going beyond the hit to the right,
     coverage should return 1.0
     """
     ri = ReadIntervals(100)
     ri.add(-10, 110)
     self.assertEqual(1.0, ri.coverage())
예제 #10
0
    def testOneIntervalInMiddleCoverage(self):
        """
        If there is a single interval in the middle of the hit, coverage
        should return the correct value.

        """
        ri = ReadIntervals(100)
        ri.add(50, 60)
        self.assertEqual(0.1, ri.coverage())
예제 #11
0
파일: titles.py 프로젝트: acorg/dark-matter
 def coverageCounts(self):
     """
     For each location in the title sequence, return a count of how many
     times that location is covered by a read.
     """
     intervals = ReadIntervals(self.subjectLength)
     for hsp in self.hsps():
         intervals.add(hsp.subjectStart, hsp.subjectEnd)
     return intervals.coverageCounts()
예제 #12
0
 def testOneIntervalCoveringAllExtendingLeft(self):
     """
     If there is a single interval that spans the whole hit, including
     going negative to the left, that one interval should be returned by
     walk, and it should be full.
     """
     ri = ReadIntervals(100)
     ri.add(-10, 100)
     self.assertEqual([(self.FULL, (-10, 100))], list(ri.walk()))
예제 #13
0
 def testOneIntervalInMiddleCoverageCounts(self):
     """
     If there is a single interval in the middle of the hit, coverageCounts
     should return the correct result.
     """
     ri = ReadIntervals(10)
     ri.add(5, 6)
     c = Counter([5])
     self.assertEqual(c, ri.coverageCounts())
예제 #14
0
 def testOneIntervalExactCoveringCoverageCounts(self):
     """
     If there is a single interval that spans the whole hit exactly,
     coverageCounts should return the correct result.
     """
     ri = ReadIntervals(10)
     ri.add(0, 10)
     c = Counter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
     self.assertEqual(c, ri.coverageCounts())
예제 #15
0
 def testOneIntervalEndingAfterHitEnd(self):
     """
     If there is a single interval that ends after the end of the hit
     but doesn't start at zero, we should get 2 intervals back from walk,
     an empty then a full.
     """
     ri = ReadIntervals(100)
     ri.add(50, 150)
     self.assertEqual([(self.EMPTY, (0, 50)), (self.FULL, (50, 150))], list(ri.walk()))
예제 #16
0
 def testOneIntervalStartingBeforeZero(self):
     """
     If there is a single interval that starts before zero but doesn't
     cover the whole hit, we should get 2 intervals back from walk,
     a full one and then an empty.
     """
     ri = ReadIntervals(100)
     ri.add(-50, 50)
     self.assertEqual([(self.FULL, (-50, 50)), (self.EMPTY, (50, 100))], list(ri.walk()))
예제 #17
0
 def testOneIntervalCoveringAllExtendingBoth(self):
     """
     If there is a single interval that spans the whole hit, including
     starting before zero and also going beyond the hit to the right, that
     one interval should be returned by walk, and it should be full.
     """
     ri = ReadIntervals(100)
     ri.add(-10, 110)
     self.assertEqual([(self.FULL, (-10, 110))], list(ri.walk()))
예제 #18
0
 def testTwoOverlappingIntervalsInMiddleCoverage(self):
     """
     If there are two overlapping intervals in the middle of the hit,
     coverage should return the correct value.
     """
     ri = ReadIntervals(100)
     ri.add(50, 60)
     ri.add(55, 70)
     self.assertEqual(0.2, ri.coverage())
예제 #19
0
    def testOneIntervalInMiddle(self):
        """
        If there is a single interval in the middle of the hit, we
        should get 3 intervals back from walk: empty, full, empty.

        """
        ri = ReadIntervals(100)
        ri.add(50, 60)
        self.assertEqual([(self.EMPTY, (0, 50)), (self.FULL, (50, 60)), (self.EMPTY, (60, 100))], list(ri.walk()))
예제 #20
0
 def testTwoOverlappingIntervalsInMiddleCoverageCounts(self):
     """
     If there are two overlapping intervals in the middle of the hit,
     coverageCounts should return the correct result.
     """
     ri = ReadIntervals(10)
     ri.add(5, 7)
     ri.add(6, 8)
     c = Counter([5, 6, 6, 7])
     self.assertEqual(c, ri.coverageCounts())
예제 #21
0
 def testOneIntervalEndingAtHitEndCoverageCounts(self):
     """
     If there is a single interval that ends at the end of the hit
     but doesn't start at zero, coverageCounts should return the correct
     result.
     """
     ri = ReadIntervals(10)
     ri.add(5, 10)
     c = Counter([5, 6, 7, 8, 9])
     self.assertEqual(c, ri.coverageCounts())
예제 #22
0
 def testOneReadAtEnd(self):
     """
     When one read is added to the end of an interval, there should be one
     reduction for the empty section before the read.
     """
     ri = ReadIntervals(228)
     ri.add(128, 228)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual([(128, 121)], adjuster.adjustments())
     self.assertEqual(107, adjuster.adjustOffset(228))
예제 #23
0
 def testOneIntervalCoveringAllExtendingRightCoverageCounts(self):
     """
     If there is a single interval that spans the whole hit, including
     going beyond the hit to the right, coverageCounts should return the
     correct result.
     """
     ri = ReadIntervals(10)
     ri.add(0, 12)
     c = Counter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
     self.assertEqual(c, ri.coverageCounts())
예제 #24
0
 def testOneReadInMiddle(self):
     """
     When one read is added to the middle of an interval, there should be
     two reductions.
     """
     ri = ReadIntervals(106)
     ri.add(32, 42)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual([(32, 27), (106, 58)], adjuster.adjustments())
     self.assertEqual(106 - 27 - 58, adjuster.adjustOffset(106))
예제 #25
0
 def testOneReadThatExactlyCoversHit(self):
     """
     When one read is given that exactly covers the hit, there should
     be no length reductions.
     """
     ri = ReadIntervals(106)
     ri.add(0, 106)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual([], adjuster.adjustments())
     self.assertEqual(106, adjuster.adjustOffset(106))
예제 #26
0
 def testOneReadThatExceedsHitOnBothEnds(self):
     """
     When one read is given that exceeds the hit at both ends, there should
     be no length reductions.
     """
     ri = ReadIntervals(106)
     ri.add(-100, 200)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual([], adjuster.adjustments())
     self.assertEqual(106, adjuster.adjustOffset(106))
예제 #27
0
 def testOneReadBeforeStart(self):
     """
     When one read is added to the start of an interval before zero, there
     should be one reduction for the empty section after the read.
     """
     ri = ReadIntervals(228)
     ri.add(-10, 100)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual([(228, 121)], adjuster.adjustments())
     self.assertEqual(107, adjuster.adjustOffset(228))
예제 #28
0
파일: titles.py 프로젝트: acorg/dark-matter
    def coverage(self):
        """
        Get the fraction of this title sequence that is matched by its reads.

        @return: The C{float} fraction of the title sequence matched by its
            reads.
        """
        intervals = ReadIntervals(self.subjectLength)
        for hsp in self.hsps():
            intervals.add(hsp.subjectStart, hsp.subjectEnd)
        return intervals.coverage()
예제 #29
0
 def testOneIntervalCoveringAllExtendingLeft(self):
     """
     If there is a single interval that spans the whole hit, including
     going negative to the left, that one interval should be returned by
     walk, and it should be full.
     """
     ri = ReadIntervals(100)
     ri.add(-10, 100)
     self.assertEqual(
         [
             (self.FULL, (-10, 100))
         ],
         list(ri.walk()))
예제 #30
0
 def testOneReadThatExactlyCoversHit(self):
     """
     When one read is given that exactly covers the hit, there should
     be no length reductions.
     """
     ri = ReadIntervals(106)
     ri.add(0, 106)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual(
         [
         ],
         adjuster.adjustments())
     self.assertEqual(106, adjuster.adjustOffset(106))
예제 #31
0
 def testOneReadThatExceedsHitOnBothEnds(self):
     """
     When one read is given that exceeds the hit at both ends, there should
     be no length reductions.
     """
     ri = ReadIntervals(106)
     ri.add(-100, 200)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual(
         [
         ],
         adjuster.adjustments())
     self.assertEqual(106, adjuster.adjustOffset(106))
예제 #32
0
 def testOneReadInMiddle(self):
     """
     When one read is added to the middle of an interval, there should be
     two reductions.
     """
     ri = ReadIntervals(106)
     ri.add(32, 42)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual([
         (32, 27),
         (106, 58),
     ], adjuster.adjustments())
     self.assertEqual(106 - 27 - 58, adjuster.adjustOffset(106))
예제 #33
0
 def testOneIntervalCoveringAllExtendingBoth(self):
     """
     If there is a single interval that spans the whole hit, including
     starting before zero and also going beyond the hit to the right, that
     one interval should be returned by walk, and it should be full.
     """
     ri = ReadIntervals(100)
     ri.add(-10, 110)
     self.assertEqual(
         [
             (self.FULL, (-10, 110))
         ],
         list(ri.walk()))
예제 #34
0
 def testTwoOverlappingIntervalsInMiddle(self):
     """
     If there are two overlapping intervals in the middle of the hit, we
     should get 3 intervals back from walk: empty, full, empty.
     """
     ri = ReadIntervals(100)
     ri.add(50, 60)
     ri.add(55, 70)
     self.assertEqual([
         (self.EMPTY, (0, 50)),
         (self.FULL, (50, 70)),
         (self.EMPTY, (70, 100)),
     ], list(ri.walk()))
예제 #35
0
 def testOneReadAfterEnd(self):
     """
     When one read is added to the end of an interval, going beyond the end
     of the hit, there should be one reduction for the empty section before
     the read.
     """
     ri = ReadIntervals(228)
     ri.add(128, 250)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual([
         (128, 121),
     ], adjuster.adjustments())
     self.assertEqual(107, adjuster.adjustOffset(228))
예제 #36
0
 def testPairOfTwoOverlappingIntervalsCoverage(self):
     """
     If there are two sets of two overlapping intervals in the middle of
     the hit, coverage should return the correct value.
     """
     ri = ReadIntervals(100)
     # First overlapping pair, 50-70.
     ri.add(50, 60)
     ri.add(55, 70)
     # First overlapping pair, 80-95.
     ri.add(80, 90)
     ri.add(85, 95)
     self.assertEqual(0.35, ri.coverage())
예제 #37
0
 def testPairOfTwoOverlappingIntervalsCoverage(self):
     """
     If there are two sets of two overlapping intervals in the middle of
     the hit, coverage should return the correct value.
     """
     ri = ReadIntervals(100)
     # First overlapping pair, 50-70.
     ri.add(50, 60)
     ri.add(55, 70)
     # First overlapping pair, 80-95.
     ri.add(80, 90)
     ri.add(85, 95)
     self.assertEqual(0.35, ri.coverage())
예제 #38
0
    def testOneIntervalInMiddle(self):
        """
        If there is a single interval in the middle of the hit, we
        should get 3 intervals back from walk: empty, full, empty.

        """
        ri = ReadIntervals(100)
        ri.add(50, 60)
        self.assertEqual([
            (self.EMPTY, (0, 50)),
            (self.FULL, (50, 60)),
            (self.EMPTY, (60, 100)),
        ], list(ri.walk()))
예제 #39
0
 def testOneIntervalEndingAfterHitEnd(self):
     """
     If there is a single interval that ends after the end of the hit
     but doesn't start at zero, we should get 2 intervals back from walk,
     an empty then a full.
     """
     ri = ReadIntervals(100)
     ri.add(50, 150)
     self.assertEqual(
         [
             (self.EMPTY, (0, 50)),
             (self.FULL, (50, 150)),
         ],
         list(ri.walk()))
예제 #40
0
 def testOneReadBeforeStart(self):
     """
     When one read is added to the start of an interval before zero, there
     should be one reduction for the empty section after the read.
     """
     ri = ReadIntervals(228)
     ri.add(-10, 100)
     adjuster = OffsetAdjuster(ri)
     self.assertEqual(
         [
             (228, 121),
         ],
         adjuster.adjustments())
     self.assertEqual(107, adjuster.adjustOffset(228))
예제 #41
0
 def testOneIntervalStartingBeforeZero(self):
     """
     If there is a single interval that starts before zero but doesn't
     cover the whole hit, we should get 2 intervals back from walk,
     a full one and then an empty.
     """
     ri = ReadIntervals(100)
     ri.add(-50, 50)
     self.assertEqual(
         [
             (self.FULL, (-50, 50)),
             (self.EMPTY, (50, 100)),
         ],
         list(ri.walk()))
예제 #42
0
    def testTwoReadsInMiddle(self):
        """
        When two reads are added to the middle of an interval, there should be
        three reductions (after first empty area, after 2nd empty area, after
        final empty area.
        """
        ri = ReadIntervals(132)
        ri.add(32, 42)
        ri.add(58, 68)
        adjuster = OffsetAdjuster(ri)
        self.assertEqual(
            [
                (32, 27),
                (58, 12),
                (132, 58),
            ],
            adjuster.adjustments())
        self.assertEqual(132 - 27 - 12 - 58, adjuster.adjustOffset(132))

        # Test an HSP at the beginning is unchanged.
        hsp = HSP(10, readEndInSubject=10, readStartInSubject=0,
                  subjectEnd=10, subjectStart=0)
        adjuster.adjustHSP(hsp)
        self.assertEqual(10, hsp.readEndInSubject)
        self.assertEqual(0, hsp.readStartInSubject)
        self.assertEqual(10, hsp.subjectEnd)
        self.assertEqual(0, hsp.subjectStart)

        # Test an HSP in the first read region.
        hsp = HSP(10, readEndInSubject=42, readStartInSubject=32,
                  subjectEnd=40, subjectStart=35)
        adjuster.adjustHSP(hsp)
        self.assertEqual(15, hsp.readEndInSubject)
        self.assertEqual(5, hsp.readStartInSubject)
        self.assertEqual(13, hsp.subjectEnd)
        self.assertEqual(8, hsp.subjectStart)

        # Test an HSP in the second read region.
        hsp = HSP(10, readEndInSubject=68, readStartInSubject=58,
                  subjectEnd=66, subjectStart=60)
        adjuster.adjustHSP(hsp)
        self.assertEqual(29, hsp.readEndInSubject)
        self.assertEqual(19, hsp.readStartInSubject)
        self.assertEqual(27, hsp.subjectEnd)
        self.assertEqual(21, hsp.subjectStart)
예제 #43
0
    def testTwoReadsInMiddle(self):
        """
        When two reads are added to the middle of an interval, there should be
        three reductions (after first empty area, after 2nd empty area, after
        final empty area.
        """
        ri = ReadIntervals(132)
        ri.add(32, 42)
        ri.add(58, 68)
        adjuster = OffsetAdjuster(ri)
        self.assertEqual(
            [
                (32, 27),
                (58, 12),
                (132, 58),
            ],
            adjuster.adjustments())
        self.assertEqual(132 - 27 - 12 - 58, adjuster.adjustOffset(132))

        # Test an HSP at the beginning is unchanged.
        hsp = HSP(10, readEndInSubject=10, readStartInSubject=0,
                  subjectEnd=10, subjectStart=0)
        adjuster.adjustHSP(hsp)
        self.assertEqual(10, hsp.readEndInSubject)
        self.assertEqual(0, hsp.readStartInSubject)
        self.assertEqual(10, hsp.subjectEnd)
        self.assertEqual(0, hsp.subjectStart)

        # Test an HSP in the first read region.
        hsp = HSP(10, readEndInSubject=42, readStartInSubject=32,
                  subjectEnd=40, subjectStart=35)
        adjuster.adjustHSP(hsp)
        self.assertEqual(15, hsp.readEndInSubject)
        self.assertEqual(5, hsp.readStartInSubject)
        self.assertEqual(13, hsp.subjectEnd)
        self.assertEqual(8, hsp.subjectStart)

        # Test an HSP in the second read region.
        hsp = HSP(10, readEndInSubject=68, readStartInSubject=58,
                  subjectEnd=66, subjectStart=60)
        adjuster.adjustHSP(hsp)
        self.assertEqual(29, hsp.readEndInSubject)
        self.assertEqual(19, hsp.readStartInSubject)
        self.assertEqual(27, hsp.subjectEnd)
        self.assertEqual(21, hsp.subjectStart)
예제 #44
0
 def testPairOfTwoOverlappingIntervals(self):
     """
     If there are two sets of two overlapping intervals in the middle of
     the hit, we should get 5 intervals back from walk.
     """
     ri = ReadIntervals(100)
     # First overlapping pair, 50-70.
     ri.add(50, 60)
     ri.add(55, 70)
     # First overlapping pair, 80-95.
     ri.add(80, 90)
     ri.add(85, 95)
     self.assertEqual([
         (self.EMPTY, (0, 50)),
         (self.FULL, (50, 70)),
         (self.EMPTY, (70, 80)),
         (self.FULL, (80, 95)),
         (self.EMPTY, (95, 100)),
     ], list(ri.walk()))
예제 #45
0
 def testOverlappingIntervalsThatCoverEverythingCoverage(self):
     """
     If there are sets of overlapping intervals that cover the whole hit,
     coverage should return the correct value.
     """
     ri = ReadIntervals(100)
     ri.add(-10, 20)
     ri.add(15, 40)
     ri.add(40, 70)
     ri.add(66, 89)
     ri.add(77, 93)
     ri.add(70, 110)
     self.assertEqual(1.0, ri.coverage())
예제 #46
0
 def testOverlappingIntervalsThatCoverEverything(self):
     """
     If there are sets of overlapping intervals that cover the whole hit,
     we should get 1 full interval back from walk.
     """
     ri = ReadIntervals(100)
     ri.add(-10, 20)
     ri.add(15, 40)
     ri.add(40, 70)
     ri.add(66, 89)
     ri.add(77, 93)
     ri.add(70, 110)
     self.assertEqual([(self.FULL, (-10, 110))], list(ri.walk()))
예제 #47
0
def alignmentGraph(titlesAlignments, title, addQueryLines=True,
                   showFeatures=True, logLinearXAxis=False,
                   logBase=DEFAULT_LOG_LINEAR_X_AXIS_BASE, rankScores=False,
                   colorQueryBases=False, createFigure=True, showFigure=True,
                   readsAx=None, imageFile=None, quiet=False, idList=False,
                   xRange='subject', showOrfs=True):
    """
    Align a set of matching reads against a BLAST or DIAMOND hit.

    @param titlesAlignments: A L{dark.titles.TitlesAlignments} instance.
    @param title: A C{str} sequence title that was matched. We plot the
        reads that hit this title.
    @param addQueryLines: if C{True}, draw query lines in full (these will then
        be partly overdrawn by the HSP match against the subject). These are
        the 'whiskers' that potentially protrude from each side of a query.
    @param showFeatures: if C{True}, look online for features of the subject
        sequence (given by hitId).
    @param logLinearXAxis: if C{True}, convert read offsets so that empty
        regions in the plot we're preparing will only be as wide as their
        logged actual values.
    @param logBase: The base of the logarithm to use if logLinearXAxis is
        C{True}.
    @param: rankScores: If C{True}, change the e-values and bit scores for the
        reads for each title to be their rank (worst to best).
    @param colorQueryBases: if C{True}, color each base of a query string. If
        C{True}, then addQueryLines is meaningless since the whole query is
        shown colored.
    @param createFigure: If C{True}, create a figure and give it a title.
    @param showFigure: If C{True}, show the created figure. Set this to
        C{False} if you're creating a panel of figures or just want to save an
        image (with C{imageFile}).
    @param readsAx: If not None, use this as the subplot for displaying reads.
    @param imageFile: If not None, specifies a filename to write the image to.
    @param quiet: If C{True}, don't print progress / timing output.
    @param idList: a dictionary. The keys is a color and the values is a list
        of read identifiers that should be colored in the respective color.
    @param xRange: set to either 'subject' or 'reads' to indicate the range of
        the X axis.
    @param showOrfs: If C{True}, open reading frames will be displayed.
    """

    startTime = time()

    assert xRange in ('subject', 'reads'), (
        'xRange must be either "subject" or "reads".')

    if createFigure:
        width = 20
        figure = plt.figure(figsize=(width, 20))

    createdReadsAx = readsAx is None

    if showFeatures:
        if showOrfs:
            gs = gridspec.GridSpec(4, 1, height_ratios=[3, 1, 1, 12])
            featureAx = plt.subplot(gs[0, 0])
            orfAx = plt.subplot(gs[1, 0])
            orfReversedAx = plt.subplot(gs[2, 0])
            readsAx = readsAx or plt.subplot(gs[3, 0])
        else:
            gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1])
            featureAx = plt.subplot(gs[0, 0])
            readsAx = readsAx or plt.subplot(gs[1, 0])
    else:
        if showOrfs:
            gs = gridspec.GridSpec(3, 1, height_ratios=[1, 1, 12])
            orfAx = plt.subplot(gs[0, 0])
            orfReversedAx = plt.subplot(gs[1, 0])
            readsAx = readsAx or plt.subplot(gs[2, 0])
        else:
            readsAx = readsAx or plt.subplot(111)

    # Make a deep copy of the title alignments. We're potentially going to
    # change the HSP scores, the X axis offsets, etc., and we don't want to
    # interfere with the data we were passed.
    titleAlignments = deepcopy(titlesAlignments[title])

    readsAlignments = titlesAlignments.readsAlignments
    subjectIsNucleotides = readsAlignments.params.subjectIsNucleotides

    if showOrfs and not subjectIsNucleotides:
        # We cannot show ORFs when displaying protein plots.
        showOrfs = False

    # Allow the class of titlesAlignments to adjust HSPs for plotting,
    # if it has a method for doing so.
    try:
        adjuster = readsAlignments.adjustHspsForPlotting
    except AttributeError:
        pass
    else:
        adjuster(titleAlignments)

    if rankScores:
        reverse = titlesAlignments.scoreClass is not HigherIsBetterScore
        for rank, hsp in enumerate(sorted(titleAlignments.hsps(),
                                   reverse=reverse), start=1):
            hsp.score.score = rank

    if logLinearXAxis:
        readIntervals = ReadIntervals(titleAlignments.subjectLength)
        # Examine all HSPs so we can build an offset adjuster.
        for hsp in titleAlignments.hsps():
            readIntervals.add(hsp.readStartInSubject, hsp.readEndInSubject)
        # Now adjust offsets in all HSPs.
        offsetAdjuster = OffsetAdjuster(readIntervals, base=logBase)
        for hsp in titleAlignments.hsps():
            offsetAdjuster.adjustHSP(hsp)
        # A function for adjusting other offsets, below.
        adjustOffset = offsetAdjuster.adjustOffset
    else:
        def adjustOffset(offset):
            return offset

    # It would be more efficient to only walk through all HSPs once and
    # compute these values all at once, but for now this is simple and clear.
    maxY = int(ceil(titleAlignments.bestHsp().score.score))
    minY = int(titleAlignments.worstHsp().score.score)
    maxX = max(hsp.readEndInSubject for hsp in titleAlignments.hsps())
    minX = min(hsp.readStartInSubject for hsp in titleAlignments.hsps())

    if xRange == 'subject':
        # We'll display a graph for the full subject range. Adjust X axis
        # min/max to make sure we cover at least zero to the sequence length.
        maxX = max(titleAlignments.subjectLength, maxX)
        minX = min(0, minX)

    # Swap min & max Y values, if needed, as it's possible we are dealing
    # with LSPs but that the score adjuster made numerically greater values
    # for those that were small.
    if maxY < minY:
        (maxY, minY) = (minY, maxY)

    if logLinearXAxis:
        # Adjust minX and maxX if we have gaps at the subject start or end.
        gaps = list(readIntervals.walk())
        if gaps:
            # Check start of first gap:
            intervalType, (start, stop) = gaps[0]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(start)
                if adjustedStart < minX:
                    minX = adjustedStart
            # Check stop of last gap:
            intervalType, (start, stop) = gaps[-1]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStop = adjustOffset(stop)
                if adjustedStop > maxX:
                    maxX = adjustedStop

    # We're all set up to start plotting the graph.

    # Add light grey vertical rectangles to show the logarithmic gaps. Add
    # these first so that reads will be plotted on top of them. Only draw
    # gaps that are more than SMALLEST_LOGGED_GAP_TO_DISPLAY pixels wide as
    # we could have millions of tiny gaps for a bacteria and drawing them
    # all will be slow and only serves to make the entire background grey.
    if logLinearXAxis and len(offsetAdjuster.adjustments()) < 100:
        for (intervalType, interval) in readIntervals.walk():
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(interval[0])
                adjustedStop = adjustOffset(interval[1])
                width = adjustedStop - adjustedStart
                if width >= SMALLEST_LOGGED_GAP_TO_DISPLAY:
                    readsAx.axvspan(adjustedStart, adjustedStop,
                                    color='#f4f4f4')

    if colorQueryBases:
        # Color each query by its bases.
        xScale = 3
        yScale = 2
        baseImage = BaseImage(
            maxX - minX, maxY - minY + (1 if rankScores else 0),
            xScale, yScale)
        for alignment in titleAlignments:
            for hsp in alignment.hsps:
                y = hsp.score.score - minY
                # If the product of the subject and read frame values is +ve,
                # then they're either both +ve or both -ve, so we just use the
                # read as is. Otherwise, we need to reverse complement it.
                if hsp.subjectFrame * hsp.readFrame > 0:
                    query = alignment.read.sequence
                else:
                    # One of the subject or query has negative sense.
                    query = alignment.read.reverseComplement().sequence
                readStartInSubject = hsp.readStartInSubject
                # There are 3 parts of the query string we need to
                # display. 1) the left part (if any) before the matched
                # part of the subject.  2) the matched part (which can
                # include gaps in the query and/or subject). 3) the right
                # part (if any) after the matched part.  For each part,
                # calculate the ranges in which we have to make the
                # comparison between subject and query.

                # NOTE: never use hsp['origHsp'].gaps to calculate the number
                # of gaps, as this number contains gaps in both subject and
                # query.

                # 1. Left part:
                leftRange = hsp.subjectStart - readStartInSubject

                # 2. Match, middle part:
                middleRange = len(hsp.readMatchedSequence)

                # 3. Right part:
                # Using hsp.readEndInSubject - hsp.subjectEnd to calculate the
                # length of the right part leads to the part being too long.
                # The number of gaps needs to be subtracted to get the right
                # length.
                origQuery = hsp.readMatchedSequence.upper()
                rightRange = (hsp.readEndInSubject - hsp.subjectEnd -
                              origQuery.count('-'))

                # 1. Left part.
                xOffset = readStartInSubject - minX
                queryOffset = 0
                for queryIndex in range(leftRange):
                    color = QUERY_COLORS.get(query[queryOffset + queryIndex],
                                             DEFAULT_BASE_COLOR)
                    baseImage.set(xOffset + queryIndex, y, color)

                # 2. Match part.
                xOffset = hsp.subjectStart - minX
                xIndex = 0
                queryOffset = hsp.subjectStart - hsp.readStartInSubject
                origSubject = hsp.subjectMatchedSequence
                for matchIndex in range(middleRange):
                    if origSubject[matchIndex] == '-':
                        # A gap in the subject was needed to match the query.
                        # In our graph we keep the subject the same even in the
                        # case where BLAST opened gaps in it, so we compensate
                        # for the gap in the subject by not showing this base
                        # of the query.
                        pass
                    else:
                        if origSubject[matchIndex] == origQuery[matchIndex]:
                            # The query matched the subject at this location.
                            # Matching bases are all colored in the same
                            # 'match' color.
                            color = QUERY_COLORS['match']
                        else:
                            if origQuery[matchIndex] == '-':
                                # A gap in the query. All query gaps get the
                                # same 'gap' color.
                                color = QUERY_COLORS['gap']
                            else:
                                # Query doesn't match subject (and is not a
                                # gap).
                                color = QUERY_COLORS.get(origQuery[matchIndex],
                                                         DEFAULT_BASE_COLOR)
                        baseImage.set(xOffset + xIndex, y, color)
                        xIndex += 1

                # 3. Right part.
                xOffset = hsp.subjectEnd - minX
                backQuery = query[-rightRange:].upper()
                for queryIndex in range(rightRange):
                    color = QUERY_COLORS.get(backQuery[queryIndex],
                                             DEFAULT_BASE_COLOR)
                    baseImage.set(xOffset + queryIndex, y, color)

        readsAx.imshow(baseImage.data, aspect='auto', origin='lower',
                       interpolation='nearest',
                       extent=[minX, maxX, minY, maxY])
    else:
        # Add horizontal lines for all the query sequences. These will be the
        # grey 'whiskers' in the plots once we (below) draw the matched part
        # on top of part of them.
        if addQueryLines:
            for hsp in titleAlignments.hsps():
                y = hsp.score.score
                line = Line2D([hsp.readStartInSubject, hsp.readEndInSubject],
                              [y, y], color='#aaaaaa')
                readsAx.add_line(line)

        # Add the horizontal BLAST alignment lines.

        # If an idList is given set things up to look up read colors.
        readColor = {}
        if idList:
            for color, reads in idList.items():
                for read in reads:
                    if read in readColor:
                        raise ValueError('Read %s is specified multiple '
                                         'times in idList' % read)
                    else:
                        readColor[read] = color

        # Draw the matched region.
        for titleAlignment in titleAlignments:
            readId = titleAlignment.read.id
            for hsp in titleAlignment.hsps:
                y = hsp.score.score
                line = Line2D([hsp.subjectStart, hsp.subjectEnd], [y, y],
                              color=readColor.get(readId, 'blue'))
                readsAx.add_line(line)

    if showOrfs:
        subject = readsAlignments.getSubjectSequence(title)
        orfs.addORFs(orfAx, subject.sequence, minX, maxX, adjustOffset)
        orfs.addReversedORFs(orfReversedAx,
                             subject.reverseComplement().sequence,
                             minX, maxX, adjustOffset)

    if showFeatures:
        if subjectIsNucleotides:
            featureAdder = NucleotideFeatureAdder()
        else:
            featureAdder = ProteinFeatureAdder()

        features = featureAdder.add(featureAx, title, minX, maxX,
                                    adjustOffset)

        # If there are features and there weren't too many of them, add
        # vertical feature lines to the reads and ORF axes.
        if features and not featureAdder.tooManyFeaturesToPlot:
            for feature in features:
                start = feature.start
                end = feature.end
                color = feature.color
                readsAx.axvline(x=start, color=color)
                readsAx.axvline(x=end, color='#cccccc')
                if showOrfs:
                    orfAx.axvline(x=start, color=color)
                    orfAx.axvline(x=end, color='#cccccc')
                    orfReversedAx.axvline(x=start, color=color)
                    orfReversedAx.axvline(x=end, color='#cccccc')
    else:
        features = None

    # We'll return some information we've gathered.
    result = {
        'adjustOffset': adjustOffset,
        'features': features,
        'minX': minX,
        'maxX': maxX,
        'minY': minY,
        'maxY': maxY,
    }

    # Allow the class of titlesAlignments to add to the plot, if it has a
    # method for doing so.
    try:
        adjuster = readsAlignments.adjustPlot
    except AttributeError:
        pass
    else:
        adjuster(readsAx)

    # Titles, axis, etc.
    if createFigure:
        readCount = titleAlignments.readCount()
        hspCount = titleAlignments.hspCount()
        figure.suptitle(
            '%s\nLength %d %s, %d read%s, %d HSP%s.' %
            (
                fill(titleAlignments.subjectTitle, 80),
                titleAlignments.subjectLength,
                'nt' if subjectIsNucleotides else 'aa',
                readCount, '' if readCount == 1 else 's',
                hspCount, '' if hspCount == 1 else 's'
            ),
            fontsize=20)

    # Add a title and y-axis label, but only if we made the reads axes.
    if createdReadsAx:
        readsAx.set_title('Read alignments', fontsize=20)
        ylabel = readsAlignments.params.scoreTitle
        if rankScores:
            ylabel += ' rank'
        plt.ylabel(ylabel, fontsize=17)

    # Set the x-axis limits.
    readsAx.set_xlim([minX - 1, maxX + 1])

    readsAx.set_ylim([0, int(maxY * Y_AXIS_UPPER_PADDING)])
    readsAx.grid()
    if createFigure:
        if showFigure:
            plt.show()
        if imageFile:
            figure.savefig(imageFile)
    stop = time()
    if not quiet:
        report('Graph generated in %.3f mins.' % ((stop - startTime) / 60.0))

    return result
예제 #48
0
def alignmentGraph(titlesAlignments,
                   title,
                   accession,
                   addQueryLines=True,
                   showFeatures=True,
                   logLinearXAxis=False,
                   logBase=DEFAULT_LOG_LINEAR_X_AXIS_BASE,
                   rankScores=False,
                   createFigure=True,
                   showFigure=True,
                   readsAx=None,
                   imageFile=None,
                   quiet=False,
                   idList=False,
                   xRange='subject'):
    """
    Align a set of matching reads against a BLAST or DIAMOND hit.

    @param titlesAlignments: A L{dark.titles.TitlesAlignments} instance.
    @param title: A C{str} sequence title that was matched. We plot the
        reads that hit this title.
    @param accession: The C{str} accession number of the matched title.
    @param addQueryLines: if C{True}, draw query lines in full (these will then
        be partly overdrawn by the HSP match against the subject). These are
        the 'whiskers' that potentially protrude from each side of a query.
    @param showFeatures: if C{True}, look online for features of the subject
        sequence (given by hitId).
    @param logLinearXAxis: if C{True}, convert read offsets so that empty
        regions in the plot we're preparing will only be as wide as their
        logged actual values.
    @param logBase: The base of the logarithm to use if logLinearXAxis is
        C{True}.
    @param: rankScores: If C{True}, change the e-values and bit scores for the
        reads for each title to be their rank (worst to best).
    @param createFigure: If C{True}, create a figure and give it a title.
    @param showFigure: If C{True}, show the created figure. Set this to
        C{False} if you're creating a panel of figures or just want to save an
        image (with C{imageFile}).
    @param readsAx: If not None, use this as the subplot for displaying reads.
    @param imageFile: If not None, specifies a filename to write the image to.
    @param quiet: If C{True}, don't print progress / timing output.
    @param idList: a dictionary. The keys is a color and the values is a list
        of read identifiers that should be colored in the respective color.
    @param xRange: set to either 'subject' or 'reads' to indicate the range of
        the X axis.
    """

    startTime = time()

    assert xRange in ('subject',
                      'reads'), ('xRange must be either "subject" or "reads".')

    if createFigure:
        width = 20
        figure = plt.figure(figsize=(width, 20))

    createdReadsAx = readsAx is None

    if showFeatures:
        gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1])
        featureAx = plt.subplot(gs[0, 0])
        readsAx = readsAx or plt.subplot(gs[1, 0])
    else:
        readsAx = readsAx or plt.subplot(111)

    # Make a deep copy of the title alignments. We're potentially going to
    # change the HSP scores, the X axis offsets, etc., and we don't want to
    # interfere with the data we were passed.
    titleAlignments = deepcopy(titlesAlignments[title])

    readsAlignments = titlesAlignments.readsAlignments
    subjectIsNucleotides = readsAlignments.params.subjectIsNucleotides

    # Allow the class of titlesAlignments to adjust HSPs for plotting,
    # if it has a method for doing so.
    try:
        adjuster = readsAlignments.adjustHspsForPlotting
    except AttributeError:
        pass
    else:
        adjuster(titleAlignments)

    if rankScores:
        reverse = titlesAlignments.scoreClass is not HigherIsBetterScore
        for rank, hsp in enumerate(sorted(titleAlignments.hsps(),
                                          reverse=reverse),
                                   start=1):
            hsp.score.score = rank

    if logLinearXAxis:
        readIntervals = ReadIntervals(titleAlignments.subjectLength)
        # Examine all HSPs so we can build an offset adjuster.
        for hsp in titleAlignments.hsps():
            readIntervals.add(hsp.readStartInSubject, hsp.readEndInSubject)
        # Now adjust offsets in all HSPs.
        offsetAdjuster = OffsetAdjuster(readIntervals, base=logBase)
        for hsp in titleAlignments.hsps():
            offsetAdjuster.adjustHSP(hsp)
        # A function for adjusting other offsets, below.
        adjustOffset = offsetAdjuster.adjustOffset
    else:

        def adjustOffset(offset):
            return offset

    # It would be more efficient to only walk through all HSPs once and
    # compute these values all at once, but for now this is simple and clear.
    maxY = int(ceil(titleAlignments.bestHsp().score.score))
    minY = int(titleAlignments.worstHsp().score.score)
    maxX = max(hsp.readEndInSubject for hsp in titleAlignments.hsps())
    minX = min(hsp.readStartInSubject for hsp in titleAlignments.hsps())

    if xRange == 'subject':
        # We'll display a graph for the full subject range. Adjust X axis
        # min/max to make sure we cover at least zero to the sequence length.
        maxX = max(titleAlignments.subjectLength, maxX)
        minX = min(0, minX)

    # Swap min & max Y values, if needed, as it's possible we are dealing
    # with LSPs but that the score adjuster made numerically greater values
    # for those that were small.
    if maxY < minY:
        (maxY, minY) = (minY, maxY)

    if logLinearXAxis:
        # Adjust minX and maxX if we have gaps at the subject start or end.
        gaps = list(readIntervals.walk())
        if gaps:
            # Check start of first gap:
            intervalType, (start, stop) = gaps[0]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(start)
                if adjustedStart < minX:
                    minX = adjustedStart
            # Check stop of last gap:
            intervalType, (start, stop) = gaps[-1]
            if intervalType == ReadIntervals.EMPTY:
                adjustedStop = adjustOffset(stop)
                if adjustedStop > maxX:
                    maxX = adjustedStop

    # We're all set up to start plotting the graph.

    # Add light grey vertical rectangles to show the logarithmic gaps. Add
    # these first so that reads will be plotted on top of them. Only draw
    # gaps that are more than SMALLEST_LOGGED_GAP_TO_DISPLAY pixels wide as
    # we could have millions of tiny gaps for a bacteria and drawing them
    # all will be slow and only serves to make the entire background grey.
    if logLinearXAxis and len(offsetAdjuster.adjustments()) < 100:
        for (intervalType, interval) in readIntervals.walk():
            if intervalType == ReadIntervals.EMPTY:
                adjustedStart = adjustOffset(interval[0])
                adjustedStop = adjustOffset(interval[1])
                width = adjustedStop - adjustedStart
                if width >= SMALLEST_LOGGED_GAP_TO_DISPLAY:
                    readsAx.axvspan(adjustedStart,
                                    adjustedStop,
                                    color='#f4f4f4')
    else:
        # Add horizontal lines for all the query sequences. These will be the
        # grey 'whiskers' in the plots once we (below) draw the matched part
        # on top of part of them.
        if addQueryLines:
            for hsp in titleAlignments.hsps():
                y = hsp.score.score
                line = Line2D([hsp.readStartInSubject, hsp.readEndInSubject],
                              [y, y],
                              color='#aaaaaa')
                readsAx.add_line(line)

        # Add the horizontal BLAST alignment lines.

        # If an idList is given set things up to look up read colors.
        readColor = {}
        if idList:
            for color, reads in idList.items():
                for read in reads:
                    if read in readColor:
                        raise ValueError('Read %s is specified multiple '
                                         'times in idList' % read)
                    else:
                        readColor[read] = color

        # Draw the matched region.
        for titleAlignment in titleAlignments:
            readId = titleAlignment.read.id
            for hsp in titleAlignment.hsps:
                y = hsp.score.score
                line = Line2D([hsp.subjectStart, hsp.subjectEnd], [y, y],
                              color=readColor.get(readId, 'blue'))
                readsAx.add_line(line)

    if showFeatures:
        if subjectIsNucleotides:
            featureAdder = NucleotideFeatureAdder()
        else:
            featureAdder = ProteinFeatureAdder()

        features = featureAdder.add(featureAx, title, minX, maxX, adjustOffset)

        # If there are features and there weren't too many of them, add
        # vertical feature lines to the reads and ORF axes.
        if features and not featureAdder.tooManyFeaturesToPlot:
            for feature in features:
                start = feature.start
                end = feature.end
                color = feature.color
                readsAx.axvline(x=start, color=color)
                readsAx.axvline(x=end, color='#cccccc')
    else:
        features = None

    # We'll return some information we've gathered.
    result = {
        'adjustOffset': adjustOffset,
        'features': features,
        'minX': minX,
        'maxX': maxX,
        'minY': minY,
        'maxY': maxY,
    }

    # Allow the class of titlesAlignments to add to the plot, if it has a
    # method for doing so.
    try:
        adjuster = readsAlignments.adjustPlot
    except AttributeError:
        pass
    else:
        adjuster(readsAx)

    # Titles, axis, etc.
    if createFigure:
        readCount = titleAlignments.readCount()
        hspCount = titleAlignments.hspCount()
        figure.suptitle(
            '%s (%s)\nLength %d %s, %d read%s, %d HSP%s.' %
            (fill(titleAlignments.subjectTitle,
                  80), accession, titleAlignments.subjectLength,
             'nt' if subjectIsNucleotides else 'aa', readCount, '' if readCount
             == 1 else 's', hspCount, '' if hspCount == 1 else 's'),
            fontsize=20)

    # Add a title and y-axis label, but only if we made the reads axes.
    if createdReadsAx:
        readsAx.set_title('Read alignments', fontsize=20)
        ylabel = readsAlignments.params.scoreTitle
        if rankScores:
            ylabel += ' rank'
        plt.ylabel(ylabel, fontsize=17)

    # Set the x-axis limits.
    readsAx.set_xlim([minX - 1, maxX + 1])

    readsAx.set_ylim([0, int(maxY * Y_AXIS_UPPER_PADDING)])
    readsAx.grid()
    if createFigure:
        if showFigure:
            plt.show()
        if imageFile:
            figure.savefig(imageFile)
    stop = time()
    if not quiet:
        report('Graph generated in %.3f mins.' % ((stop - startTime) / 60.0))

    return result