def testResidueCountsCaseConvertLower(self): """ The residueCounts method must return the correct result when asked to convert residues to lower case. HSP1: AcgT HSP2: CGTT """ read = Read('id', 'ACGT') hsp1 = HSP(33, readStart=0, readEnd=4, readStartInSubject=10, readEndInSubject=14, subjectStart=10, subjectEnd=14, readMatchedSequence='AcgT', subjectMatchedSequence='ACGT') hsp2 = HSP(33, readStart=0, readEnd=4, readStartInSubject=11, readEndInSubject=15, subjectStart=11, subjectEnd=15, readMatchedSequence='CGTT', subjectMatchedSequence='CGTT') titleAlignments = TitleAlignments('subject title', 55) titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) self.assertEqual( { 10: {'a': 1}, 11: {'c': 2}, 12: {'g': 2}, 13: {'t': 2}, 14: {'t': 1}, }, titleAlignments.residueCounts(convertCaseTo='lower'))
def testReadsEmpty(self): """ The reads function must return an empty Reads instance if there are no reads for the title. """ titleAlignments = TitleAlignments('subject title', 55) self.assertEqual(0, len(titleAlignments.reads()))
def testResidueCountsOneReadTwoHSPsNotAtStartOfSubject(self): """ The residueCounts method must return the correct result when just one read with two HSPs is aligned to a title and the leftmost HSP is not aligned with the left edge of the subject. HSP1: ACGT HSP2: CGTT """ read = Read('id', 'ACGT') hsp1 = HSP(33, readStart=0, readEnd=4, readStartInSubject=10, readEndInSubject=14, subjectStart=10, subjectEnd=14, readMatchedSequence='ACGT', subjectMatchedSequence='ACGT') hsp2 = HSP(33, readStart=0, readEnd=4, readStartInSubject=11, readEndInSubject=15, subjectStart=11, subjectEnd=15, readMatchedSequence='CGTT', subjectMatchedSequence='CGTT') titleAlignments = TitleAlignments('subject title', 55) titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) self.assertEqual( { 10: {'A': 1}, 11: {'C': 2}, 12: {'G': 2}, 13: {'T': 2}, 14: {'T': 1}, }, titleAlignments.residueCounts())
def testResidueCountsOneReadTwoHSPsNotOverlapping(self): """ The residueCounts method must return the correct result when just one read with two HSPs is aligned to a title and the HSPs do not overlap one another. HSP1: ACGT HSP2: CGTT """ read = Read('id', 'ACGT') hsp1 = HSP(33, readStart=0, readEnd=4, readStartInSubject=0, readEndInSubject=4, subjectStart=0, subjectEnd=4, readMatchedSequence='ACGT', subjectMatchedSequence='ACGT') hsp2 = HSP(33, readStart=0, readEnd=4, readStartInSubject=10, readEndInSubject=14, subjectStart=10, subjectEnd=14, readMatchedSequence='CGTT', subjectMatchedSequence='CGTT') titleAlignments = TitleAlignments('subject title', 55) titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) self.assertEqual( { 0: {'A': 1}, 1: {'C': 1}, 2: {'G': 1}, 3: {'T': 1}, 10: {'C': 1}, 11: {'G': 1}, 12: {'T': 1}, 13: {'T': 1}, }, titleAlignments.residueCounts())
def testReadsEmpty(self): """ The reads function must return an empty Reads instance if there are no reads for the title. """ titleAlignments = TitleAlignments('subject title', 55) self.assertEqual(0, len(list(titleAlignments.reads())))
def testCoverageNoReads(self): """ The coverage method must return zero when a title alignments has no alignments (and therefore no coverage). """ titleAlignments = TitleAlignments('subject title', 100) self.assertEqual(0.0, titleAlignments.coverage())
def testReadIdsEmpty(self): """ The readIds function must return the empty set if no reads matched a title. """ titleAlignments = TitleAlignments('subject title', 55) self.assertEqual(0, len(titleAlignments.readIds()))
def testResidueCountsCaseConvertUpperIsDefault(self): """ The residueCounts method must convert to uppercase by default. HSP1: AcgT HSP2: CGTT """ read = Read('id', 'ACGT') hsp1 = HSP(33, readStart=0, readEnd=4, readStartInSubject=10, readEndInSubject=14, subjectStart=10, subjectEnd=14, readMatchedSequence='AcgT', subjectMatchedSequence='ACGT') hsp2 = HSP(33, readStart=0, readEnd=4, readStartInSubject=11, readEndInSubject=15, subjectStart=11, subjectEnd=15, readMatchedSequence='CGTT', subjectMatchedSequence='CGTT') titleAlignments = TitleAlignments('subject title', 55) titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) self.assertEqual( { 10: {'A': 1}, 11: {'C': 2}, 12: {'G': 2}, 13: {'T': 2}, 14: {'T': 1}, }, titleAlignments.residueCounts())
def testResidueCountsOneReadOneHSPPartialMatch(self): """ The residueCounts method must return the correct result when just one read with one HSP is aligned to a title and only part of the read matched the subject (all the read bases are still counted and returned). """ read = Read('id', 'ACGT') hsp = HSP(33, readStart=0, readEnd=2, readStartInSubject=0, readEndInSubject=4, subjectStart=0, subjectEnd=4, readMatchedSequence='ACGT', subjectMatchedSequence='ACGT') titleAlignments = TitleAlignments('subject title', 55) titleAlignment = TitleAlignment(read, [hsp]) titleAlignments.addAlignment(titleAlignment) self.assertEqual({ 0: { 'A': 1 }, 1: { 'C': 1 }, 2: { 'G': 1 }, 3: { 'T': 1 }, }, titleAlignments.residueCounts())
def testCoverageInfoNoReads(self): """ When a title has no reads aligned to it, the coverageInfo method must return an empty result. """ titleAlignments = TitleAlignments('subject title', 55) coverage = titleAlignments.coverageInfo() self.assertEqual({}, coverage)
def testResidueCountsNoReads(self): """ When a title has no reads aligned to it, the residueCounts method must return an empty result. """ titleAlignments = TitleAlignments('subject title', 55) counts = titleAlignments.residueCounts() self.assertEqual(0, len(counts))
def testResidueCountsNoReads(self): """ When a title has no reads aligned to it, the residueCounts method must retrun an empty result. """ titleAlignments = TitleAlignments('subject title', 55) counts = titleAlignments.residueCounts() self.assertEqual(0, len(counts))
def testMedianScoreWithNoHsps(self): """ The medianScore function must raise ValueError if there are no HSPs. """ titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, []) titleAlignments.addAlignment(titleAlignment) error = '^arg is an empty sequence$' six.assertRaisesRegex(self, ValueError, error, titleAlignments.medianScore)
def testAddAlignment(self): """ It must be possible to add an alignment to an instance of TitleAlignments. """ titleAlignments = TitleAlignments('subject title', 55) read = Read('id', 'AAA') titleAlignment = TitleAlignment(read, []) titleAlignments.addAlignment(titleAlignment) self.assertEqual(read, titleAlignments[0].read) self.assertEqual([], titleAlignments[0].hsps)
def testMedianScoreOfTwo(self): """ The medianScore function must return the median score for the HSPs in all the alignments matching a title when given 2 scores. """ hsp1 = HSP(7) hsp2 = HSP(15) titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) self.assertEqual(11, titleAlignments.medianScore())
def testFullCoverage(self): """ The coverage method must return the correct value when the title is fully covered by its reads. """ hsp1 = HSP(7, subjectStart=0, subjectEnd=50) hsp2 = HSP(8, subjectStart=50, subjectEnd=100) titleAlignments = TitleAlignments('subject title', 100) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) self.assertEqual(1.0, titleAlignments.coverage())
def testFullCoverageCounts(self): """ The coverageCounts method must return the correct result when the title is fully covered by its reads. """ hsp1 = HSP(7, subjectStart=0, subjectEnd=5) hsp2 = HSP(8, subjectStart=5, subjectEnd=10) titleAlignments = TitleAlignments('subject title', 10) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) c = Counter([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) self.assertEqual(c, titleAlignments.coverageCounts())
def makeTitleAlignments(self, *readIds): """ Create a TitleAlignments instance containing reads with the ids given by C{ids}. param readIds: A C{list} of integer ids for reads. @return: A C{TitleAlignments} instance with reads with the given ids. """ titleAlignments = TitleAlignments('subject title', 55) for readId in readIds: titleAlignment = TitleAlignment(Read('id' + str(readId), 'A'), []) titleAlignments.addAlignment(titleAlignment) return titleAlignments
def makeTitleAlignments(self, *readIds): """ Create a TitleAlignments instance containing reads with the ids given by C{ids}. param readIds: A C{list} of integer ids for reads. @return: A C{TitleAlignments} instance with reads with the given ids. """ titleAlignments = TitleAlignments("subject title", 55) for readId in readIds: titleAlignment = TitleAlignment(Read("id" + str(readId), "A"), []) titleAlignments.addAlignment(titleAlignment) return titleAlignments
def testCoverageInfoOneReadWithOneHSP(self): """ When a title has one read with one HSP aligned to it, the coverageInfo method must return just the indices and bases from that read. """ titleAlignments = TitleAlignments('subject title', 55) hsp = HSP(15, subjectStart=3, subjectEnd=6, readMatchedSequence='CGT') read = Read('id1', 'AAACGT') titleAlignment = TitleAlignment(read, [hsp]) titleAlignments.addAlignment(titleAlignment) coverage = titleAlignments.coverageInfo() self.assertEqual({ 3: [(15, 'C')], 4: [(15, 'G')], 5: [(15, 'T')], }, coverage)
def testBestHspWithNoHsps(self): """ The bestHsp function must raise ValueError if there are no HSPs. """ titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, []) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, []) titleAlignments.addAlignment(titleAlignment) if _pypy: error = '^arg is an empty sequence$' else: error = '^max\(\) arg is an empty sequence$' six.assertRaisesRegex(self, ValueError, error, titleAlignments.bestHsp)
def testExpectedAttributes(self): """ An instance of TitleAlignments must have the expected attributes. """ titleAlignments = TitleAlignments('subject title', 55) self.assertEqual('subject title', titleAlignments.subjectTitle) self.assertEqual(55, titleAlignments.subjectLength) self.assertEqual([], titleAlignments)
def testSummaryWhenEmpty(self): """ If summary is called on an instance of TitleAlignments with no alignments a ValueError must be raised. """ titleAlignments = TitleAlignments('subject title', 55) error = '^max\\(\\) arg is an empty sequence$' six.assertRaisesRegex(self, ValueError, error, titleAlignments.summary)
def testResidueCountsTwoReadsTwoHSPsLeftOverhang(self): """ The residueCounts method must return the correct result when two reads, each with one HSP are aligned to a title and the leftmost HSP is aligned before the left edge of the subject (i.e, will include negative subject offsets). Subject: GTT HSP1: ACGT HSP2: CGTT """ read1 = Read('id', 'ACGT') hsp1 = HSP(33, readStart=0, readEnd=4, readStartInSubject=-2, readEndInSubject=2, subjectStart=0, subjectEnd=2, readMatchedSequence='GT', subjectMatchedSequence='GT') read2 = Read('id', 'CGTT') hsp2 = HSP(33, readStart=0, readEnd=4, readStartInSubject=-1, readEndInSubject=3, subjectStart=0, subjectEnd=3, readMatchedSequence='GTT', subjectMatchedSequence='GTT') titleAlignments = TitleAlignments('subject title', 55) titleAlignment = TitleAlignment(read1, [hsp1]) titleAlignments.addAlignment(titleAlignment) titleAlignment = TitleAlignment(read2, [hsp2]) titleAlignments.addAlignment(titleAlignment) self.assertEqual( { -2: {'A': 1}, -1: {'C': 2}, 0: {'G': 2}, 1: {'T': 2}, 2: {'T': 1}, }, titleAlignments.residueCounts())
def testSummary(self): """ The summary method must return the correct result. """ titleAlignments = TitleAlignments('subject title', 10) titleAlignments.addAlignment( TitleAlignment(Read('id1', 'ACGT'), [ HSP(30, subjectStart=0, subjectEnd=2), ])) titleAlignments.addAlignment( TitleAlignment(Read('id2', 'ACGT'), [ HSP(55, subjectStart=2, subjectEnd=4), HSP(40, subjectStart=8, subjectEnd=9), ])) self.assertEqual( { 'bestScore': 55, 'coverage': 0.5, 'hspCount': 3, 'medianScore': 40, 'readCount': 2, 'subjectLength': 10, 'subjectTitle': 'subject title', }, titleAlignments.summary())
def testMedianScoreWithNoAlignments(self): """ The medianScore function must raise IndexError (due to no inputs) if there are no alignments matching a title. """ titleAlignments = TitleAlignments('subject title', 55) error = '^arg is an empty sequence$' six.assertRaisesRegex(self, ValueError, error, titleAlignments.medianScore)
def testCoverageInfoOneReadWithOneHSP(self): """ When a title has one read with one HSP aligned to it, the coverageInfo method must return just the indices and bases from that read. """ titleAlignments = TitleAlignments('subject title', 55) hsp = HSP(15, subjectStart=3, subjectEnd=6, readMatchedSequence='CGT') read = Read('id1', 'AAACGT') titleAlignment = TitleAlignment(read, [hsp]) titleAlignments.addAlignment(titleAlignment) coverage = titleAlignments.coverageInfo() self.assertEqual( { 3: [(15, 'C')], 4: [(15, 'G')], 5: [(15, 'T')], }, coverage)
def testResidueCountsUnknownCaseConversion(self): """ The residueCounts method must raise a ValueError when asked to do an unknown case conversion. """ titleAlignments = TitleAlignments('subject title', 55) error = "convertCaseTo must be one of 'none', 'lower', or 'upper'" six.assertRaisesRegex( self, ValueError, error, titleAlignments.residueCounts, convertCaseTo='xxx')
def testResidueCountsOneReadOneHSP(self): """ The residueCounts method must return the correct result when just one read with one HSP is aligned to a title. """ read = Read('id', 'ACGT') hsp = HSP(33, readStart=0, readEnd=4, readStartInSubject=0, readEndInSubject=4, subjectStart=0, subjectEnd=4, readMatchedSequence='ACGT', subjectMatchedSequence='ACGT') titleAlignments = TitleAlignments('subject title', 55) titleAlignment = TitleAlignment(read, [hsp]) titleAlignments.addAlignment(titleAlignment) self.assertEqual( { 0: {'A': 1}, 1: {'C': 1}, 2: {'G': 1}, 3: {'T': 1}, }, titleAlignments.residueCounts())
def testCoverageInfoOneReadWithTwoHSPs(self): """ When a title has one read with two HSPs aligned to it, the coverageInfo method must return the correct indices and bases from that read. """ titleAlignments = TitleAlignments('subject title', 55) hsp1 = HSP(15, subjectStart=1, subjectEnd=4, readMatchedSequence='A-A') hsp2 = HSP(10, subjectStart=3, subjectEnd=6, readMatchedSequence='CGT') read = Read('id1', 'AAACGT') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) coverage = titleAlignments.coverageInfo() self.assertEqual( { 1: [(15, 'A')], 2: [(15, '-')], 3: [(15, 'A'), (10, 'C')], 4: [(10, 'G')], 5: [(10, 'T')], }, coverage)
def testAddTitle(self): """ The addTitle function must add a title to the TitlesAlignments instance. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) title = 'Squirrelpox virus 23' titleAlignments = TitleAlignments(title, 55) self.assertTrue(title not in titlesAlignments) titlesAlignments.addTitle(title, titleAlignments) self.assertTrue(title in titlesAlignments)
def testAddTitleRepeat(self): """ The addTitle function must raise a C{KeyError} if an attempt is made to add a pre-existing title to a TitlesAlignments instance. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) title = SQUIRRELPOX.id titleAlignments = TitleAlignments(title, 55) error = ("Title 'Squirrelpox' already present in " "TitlesAlignments instance\.") six.assertRaisesRegex( self, KeyError, error, titlesAlignments.addTitle, title, titleAlignments)
def testAddTitle(self): """ The addTitle function must add a title to the TitlesAlignments instance. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) title = 'gi|887699|gb|DQ37780 Squirrelpox virus 23' titleAlignments = TitleAlignments(title, 55) self.assertTrue(title not in titlesAlignments) titlesAlignments.addTitle(title, titleAlignments) self.assertTrue(title in titlesAlignments)
def testReadCount(self): """ The readCount function must indicate how many reads matched a title. """ hsp1 = HSP(7) hsp2 = HSP(14) hsp3 = HSP(21) titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertEqual(2, titleAlignments.readCount())
def testBetterThanTrue(self): """ The hasScoreBetterThan function must return True if there is an HSP with a score better than the passed value. """ hsp1 = HSP(7) hsp2 = HSP(15) hsp3 = HSP(21) titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertTrue(titleAlignments.hasScoreBetterThan(20))
def testPartialCoverage(self): """ The coverage method must return the correct value when the title is partially covered by its reads. """ hsp1 = HSP(7, subjectStart=10, subjectEnd=20) hsp2 = HSP(15, subjectStart=30, subjectEnd=40) hsp3 = HSP(21, subjectStart=50, subjectEnd=60) titleAlignments = TitleAlignments('subject title', 100) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertEqual(0.3, titleAlignments.coverage())
def testHSPs(self): """ The hsps function must produce a list of all HSPs. """ hsp1 = HSP(7) hsp2 = HSP(14) hsp3 = HSP(21) titleAlignments = TitleAlignments("subject title", 55) read = Read("id1", "AAA") titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read("id2", "AAA") titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertEqual([7, 14, 21], [hsp.score.score for hsp in titleAlignments.hsps()])
def testBetterThanFalse(self): """ The hasScoreBetterThan function must return False if there is no HSP with a score better than the passed value. """ hsp1 = LSP(7) hsp2 = LSP(15) hsp3 = LSP(21) titleAlignments = TitleAlignments("subject title", 55) read = Read("id1", "AAA") titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read("id2", "AAA") titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertFalse(titleAlignments.hasScoreBetterThan(7))
def testReadIds(self): """ The readIds function must return the set of read ids for the alignments matching a title. """ hsp1 = LSP(7) hsp2 = LSP(15) hsp3 = LSP(21) titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertEqual(set(['id1', 'id2']), titleAlignments.readIds())
def testBetterThanTrue(self): """ The hasScoreBetterThan function must return True if there is an HSP with a score better than the passed value. """ hsp1 = LSP(7) hsp2 = LSP(15) hsp3 = LSP(21) titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertTrue(titleAlignments.hasScoreBetterThan(9))
def testWorstHsp(self): """ The worstHsp function must return the HSP with the worst score for all the HSPs for all the alignments matching a title. """ hsp1 = LSP(7) hsp2 = LSP(15) hsp3 = LSP(21) titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertEqual(hsp3, titleAlignments.worstHsp())
def testReads(self): """ The reads function must return a Reads instance with the reads for the title. """ hsp1 = HSP(7) hsp2 = HSP(14) hsp3 = HSP(21) titleAlignments = TitleAlignments('subject title', 55) read1 = Read('id1', 'AAA') titleAlignment = TitleAlignment(read1, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read2 = Read('id2', 'AAA') titleAlignment = TitleAlignment(read2, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertEqual([read1, read2], list(titleAlignments.reads()))
def testHspCount(self): """ The hspCount function must indicate how many HSPs were found in total for all the alignments to a title. """ hsp1 = HSP(7) hsp2 = HSP(14) hsp3 = HSP(21) titleAlignments = TitleAlignments('subject title', 55) read = Read('id1', 'AAA') titleAlignment = TitleAlignment(read, [hsp1, hsp2]) titleAlignments.addAlignment(titleAlignment) read = Read('id2', 'AAA') titleAlignment = TitleAlignment(read, [hsp3]) titleAlignments.addAlignment(titleAlignment) self.assertEqual(3, titleAlignments.hspCount())