Example #1
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function must work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #2
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads,
             'file.json',
             'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
         ], list(result.keys()))
Example #3
0
 def testMinMedianScore_EValue(self):
     """
     The filter function work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #4
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #5
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #6
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #7
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 38000
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 35000
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 35000
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 30000
         ], result)
Example #8
0
 def testMedianScore_Bits(self):
     """
     Sorting on median score must work when scores are bit scores,
     including a secondary sort on title.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         reads.add(Read('id4', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('medianScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 25
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 20
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 20
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 20
         ], result)
Example #9
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Example #10
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 38000
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 35000
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 35000
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 30000
             ],
             result)
Example #11
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #12
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Example #13
0
 def testMaxMatchingReads(self):
     """
     The filter function must work correctly when passed a value for
     maxMatchingReads.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxMatchingReads=1)
         # Cowpox virus 15 is not in the results as it is matched by two
         # reads.
         self.assertEqual(
             sorted([
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
             ]),
             sorted(result))
Example #14
0
    def testReadSetFilterStrict(self):
        """
        The filter function work correctly when passed a 1.0 value for
        minNewReads.
        """
        mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 +
                                         RECORD3))
        with patch.object(builtins, 'open', mockOpener):
            readsAlignments = LightReadsAlignments('file.json', DB)
            titlesAlignments = TitlesAlignments(readsAlignments)
            result = titlesAlignments.filter(minNewReads=1.0)

            # Either MUMMYPOX.id invalidates MONKEYPOX.id or vice-versa. It
            # depends on Python's dict walking order. Check for both,
            # making sure just one of them is true.

            assertionCount = 0
            if MUMMYPOX.id in result:
                self.assertTrue(MONKEYPOX.id in
                                result.readSetFilter.invalidates(MUMMYPOX.id))
                assertionCount += 1
            if MONKEYPOX.id in result:
                self.assertTrue(MUMMYPOX.id in
                                result.readSetFilter.invalidates(MONKEYPOX.id))
                assertionCount += 1

            self.assertEqual(1, assertionCount)
Example #15
0
 def testTabSeparatedSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'f.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         summary = titlesAlignments.tabSeparatedSummary(sortOn='title')
         expected = (
             '0.000297\t'
             '20.000000\t'
             '20.000000\t'
             '1\t'
             '1\t'
             '37000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
             '\n'
             '0.000289\t'
             '25.000000\t'
             '25.000000\t'
             '1\t'
             '1\t'
             '38000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 55')
         self.assertEqual(expected, summary)
Example #16
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #17
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Example #18
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0011)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result.keys()))
Example #19
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0003)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result))
Example #20
0
 def testMinMedianScore_EValue(self):
     """
     The filter function must work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #21
0
 def testMedianScore_Bits(self):
     """
     Sorting on median score must work when scores are bit scores,
     including a secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         reads.add(Read('id4', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('medianScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 25
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 20
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 20
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 20
         ], result)
Example #22
0
 def testMedianScore_EValue(self):
     """
     Sorting on median score must work when scores are bit scores,
     including a secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         reads.add(Read('id4', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('medianScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # worst :-)
         ], result)
Example #23
0
 def testMaxTitlesTwoSortOnLength(self):
     """
     The filter function must return the two titles whose sequences are the
     longest when maxTitles is 2 and sortOn is 'length'.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=2, sortOn='length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #24
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Example #25
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', 'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             ],
             list(result.keys()))
Example #26
0
 def testTabSeparatedSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'f.json', 'db')
         titlesAlignments = TitlesAlignments(readsAlignments)
         summary = titlesAlignments.tabSeparatedSummary(sortOn='title')
         expected = (
             '0.000297\t'
             '20.000000\t'
             '20.000000\t'
             '1\t'
             '1\t'
             '37000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
             '\n'
             '0.000289\t'
             '25.000000\t'
             '25.000000\t'
             '1\t'
             '1\t'
             '38000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 55')
         self.assertEqual(expected, summary)
Example #27
0
 def testSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             [
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Monkeypox virus 456'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.0002972972972972973,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 37000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'),
                 },
                 {
                     'bestScore': 25.0,
                     'coverage': 0.00028947368421052634,
                     'hspCount': 1,
                     'medianScore': 25.0,
                     'readCount': 1,
                     'subjectLength': 38000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 55'),
                 },
             ],
             list(titlesAlignments.summary(sortOn='title')))
Example #28
0
 def testSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             [
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Monkeypox virus 456'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.0002972972972972973,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 37000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'),
                 },
                 {
                     'bestScore': 25.0,
                     'coverage': 0.00028947368421052634,
                     'hspCount': 1,
                     'medianScore': 25.0,
                     'readCount': 1,
                     'subjectLength': 38000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 55'),
                 },
             ],
             list(titlesAlignments.summary(sortOn='title')))
Example #29
0
 def testEmpty(self):
     """
     Sorting when there are no titles must return the empty list.
     """
     mockOpener = mockOpen(read_data=PARAMS)
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([], result)
Example #30
0
 def testEmpty(self):
     """
     An instance of TitlesAlignments must have no titles if passed an
     empty readsAlignments instance.
     """
     mockOpener = mockOpen(read_data=(PARAMS))
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual([], list(titlesAlignments.keys()))
Example #31
0
 def testEmpty(self):
     """
     Sorting when there are no titles must return the empty list.
     """
     mockOpener = mock_open(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([], result)
Example #32
0
 def testEmpty(self):
     """
     Sorting when there are no titles must return the empty list.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([], result)
Example #33
0
 def testEmpty(self):
     """
     An instance of TitlesAlignments must have no titles if passed an
     empty readsAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual([], list(titlesAlignments.keys()))
Example #34
0
 def testCoverageExcludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     no titles if none of its titles has sufficient coverage.
     """
     mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 +
                                      RECORD3))
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.1)
         self.assertEqual(0, len(result))
Example #35
0
 def testEmpty(self):
     """
     An instance of TitlesAlignments must have no titles if passed an
     empty readsAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual([], list(titlesAlignments.keys()))
Example #36
0
 def testAddTitle(self):
     """
     The addTitle function must add a title to the TitlesAlignments
     instance.
     """
     mockOpener = mockOpen(read_data=(PARAMS + RECORD0))
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         title = 'Squirrelpox virus 23'
         titleAlignments = TitleAlignments(title, 55)
         self.assertTrue(title not in titlesAlignments)
         titlesAlignments.addTitle(title, titleAlignments)
         self.assertTrue(title in titlesAlignments)
Example #37
0
def main(recordFilenames, fastaFilename, title, xRange, bitRange):
    """
    Print reads that match in a specified X-axis and bit score range.

    @param recordFilenames: A C{list} of C{str} file names contain results of a
        BLAST run, in JSON format.
    @param fastaFilename: The C{str} name of the FASTA file that was originally
        BLASTed.
    @param title: The C{str} title of the subject sequence, as output by BLAST.
    @param xRange: A (start, end) list of C{int}s, giving an X-axis range or
        C{None} if the entire X axis range should be printed.
    @param bitRange: A (start, end) list of C{int}s, giving a bit score range
        or C{None} if the entire bit score range should be printed.
    """
    reads = FastaReads(fastaFilename)
    blastReadsAlignments = BlastReadsAlignments(reads, recordFilenames)
    filtered = blastReadsAlignments.filter(whitelist=set([title]),
                                           negativeTitleRegex='.')
    titlesAlignments = TitlesAlignments(filtered)

    if title not in titlesAlignments:
        print('%s: Title %r not found in BLAST output' % (sys.argv[0], title))
        sys.exit(3)

    for titleAlignment in titlesAlignments[title]:
        for hsp in titleAlignment.hsps:
            if ((xRange is None or (xRange[0] <= hsp.subjectEnd
                                    and xRange[1] >= hsp.subjectStart))
                    and (bitRange is None or
                         (bitRange[0] <= hsp.score.score <= bitRange[1]))):
                print(('query: %s, start: %d, end: %d, score: %d' %
                       (titleAlignment.read.id, hsp.subjectStart,
                        hsp.subjectEnd, hsp.score.score)))
Example #38
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from those reads collected properly.
        """
        mockOpener = mockOpen(read_data=(PARAMS + RECORD2 + RECORD3))
        with patch.object(builtins, 'open', mockOpener):
            readsAlignments = LightReadsAlignments('file.json', DB)
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = COWPOX.id
            titleAlignments = titlesAlignments[title]
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(len(COWPOX.sequence),
                             titleAlignments.subjectLength)

            self.assertEqual(READ2, titleAlignments[0].read)
            self.assertEqual(READ2_COWPOX_SCORE,
                             titleAlignments[0].hsps[0].score.score)

            self.assertEqual(READ3, titleAlignments[1].read)
            self.assertEqual(READ3_COWPOX_SCORE,
                             titleAlignments[1].hsps[0].score.score)
Example #39
0
 def testMinMatchingReads(self):
     """
     The filter function work correctly when passed a value for
     minMatchingReads.
     """
     mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 +
                                      RECORD3))
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMatchingReads=2)
         self.assertEqual(
             [
                 COWPOX.id,
             ],
             list(result.keys()))
Example #40
0
 def testWithScoreBetterThan(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan.
     """
     mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 +
                                      RECORD3))
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=0.9)
         self.assertEqual(
             [
                 SQUIRRELPOX.id,
             ],
             list(result.keys()))
Example #41
0
    def testExpectedTitleDetails(self):
        """
        An instance of TitleAlignments in a TitlesAlignments instance must
        have the expected attributes.
        """
        mockOpener = mock_open(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read = Read('id0', 'A' * 70)
            reads.add(read)
            readsAlignments = DiamondReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(37000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 55'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(38000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(25), titleAlignments[0].hsps[0])
Example #42
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from both reads collected properly.
        """
        mockOpener = mock_open(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD2) + '\n' +
            dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read2 = Read('id2', 'A' * 70)
            read3 = Read('id3', 'A' * 70)
            reads.add(read2)
            reads.add(read3)
            readsAlignments = DiamondReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Cowpox virus 15'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(30000, titleAlignments.subjectLength)
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(read2, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            self.assertEqual(read3, titleAlignments[1].read)
            self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
Example #43
0
    def testExpectedTitleDetails(self):
        """
        An instance of TitleAlignments in a TitlesAlignments instance must
        have the expected attributes.
        """
        mockOpener = mockOpen(read_data=(PARAMS + RECORD0))
        with patch.object(builtins, 'open', mockOpener):

            readsAlignments = LightReadsAlignments('file.json', DB)
            titlesAlignments = TitlesAlignments(readsAlignments)

            titleAlignments = titlesAlignments[SQUIRRELPOX.id]
            self.assertEqual(SQUIRRELPOX.id, titleAlignments.subjectTitle)
            self.assertEqual(len(SQUIRRELPOX.sequence),
                             titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(READ0, titleAlignments[0].read)
            self.assertEqual(READ0_SQUIRRELPOX_SCORE,
                             titleAlignments[0].hsps[0].score.score)

            titleAlignments = titlesAlignments[CATPOX.id]
            self.assertEqual(CATPOX.id, titleAlignments.subjectTitle)
            self.assertEqual(len(CATPOX.sequence),
                             titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(READ0, titleAlignments[0].read)
            self.assertEqual(READ0_CATPOX_SCORE,
                             titleAlignments[0].hsps[0].score.score)
Example #44
0
 def testExpectedTitles(self):
     """
     An instance of TitlesAlignments must have the expected titles.
     """
     mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2))
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             sorted([
                 COWPOX.id,
                 MONKEYPOX.id,
                 MUMMYPOX.id,
                 SQUIRRELPOX.id,
                 CATPOX.id,
             ]),
             sorted(titlesAlignments.keys()))
Example #45
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 +
                                      RECORD3))
     with patch.object(builtins, 'open', mockOpener):
         readsAlignments = LightReadsAlignments('file.json', DB)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             CATPOX.id,
             COWPOX.id,
             MONKEYPOX.id,
             MUMMYPOX.id,
             SQUIRRELPOX.id,
         ], result)
Example #46
0
 def testAddTitle(self):
     """
     The addTitle function must add a title to the TitlesAlignments
     instance.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         title = 'gi|887699|gb|DQ37780 Squirrelpox virus 23'
         titleAlignments = TitleAlignments(title, 55)
         self.assertTrue(title not in titlesAlignments)
         titlesAlignments.addTitle(title, titleAlignments)
         self.assertTrue(title in titlesAlignments)
Example #47
0
 def testAddTitle(self):
     """
     The addTitle function must add a title to the TitlesAlignments
     instance.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         title = 'gi|887699|gb|DQ37780 Squirrelpox virus 23'
         titleAlignments = TitleAlignments(title, 55)
         self.assertTrue(title not in titlesAlignments)
         titlesAlignments.addTitle(title, titleAlignments)
         self.assertTrue(title in titlesAlignments)
Example #48
0
 def testMaxTitlesZero(self):
     """
     The filter function must return an empty result when maxTitles is zero.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=0, sortOn='maxScore')
         self.assertEqual(0, len(result))
Example #49
0
 def testCoverageExcludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     no titles if none of its titles has sufficient coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.1)
         self.assertEqual(0, len(result))
Example #50
0
 def testHsps(self):
     """
     The hsps function must yield all the hsps for all titles in a
     TitlesAlignments instance.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = list(titlesAlignments.hsps())
         self.assertEqual(
             sorted([HSP(20), HSP(25), HSP(20), HSP(20), HSP(20)]),
             sorted(result))
Example #51
0
    def testReadSetFilterStrict(self):
        """
        The filter function work correctly when passed a 1.0 value for
        minNewReads.
        """
        mockOpener = mockOpen(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
            dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
            dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            reads.add(Read('id0', 'A' * 70))
            reads.add(Read('id1', 'A' * 70))
            reads.add(Read('id2', 'A' * 70))
            reads.add(Read('id3', 'A' * 70))
            readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                     'database.fasta')
            titlesAlignments = TitlesAlignments(readsAlignments)
            result = titlesAlignments.filter(minNewReads=1.0)

            # Either 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
            # invalidates 'gi|887699|gb|DQ37780 Monkeypox virus 456' or
            # vice-versa. It depends on Python's dict walking order. Check
            # for both, making sure just one of them is true.

            mummypox = 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
            monkeypox = 'gi|887699|gb|DQ37780 Monkeypox virus 456'

            assertionCount = 0
            if mummypox in result:
                self.assertTrue(monkeypox in
                                result.readSetFilter.invalidates(mummypox))
                assertionCount += 1
            if monkeypox in result:
                self.assertTrue(mummypox in
                                result.readSetFilter.invalidates(monkeypox))
                assertionCount += 1

            self.assertEqual(1, assertionCount)
Example #52
0
 def testExpectedTitles(self):
     """
     An instance of TitlesAlignments must have the expected titles.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(titlesAlignments.keys()))
Example #53
0
 def testWithScoreBetterThan_Bits(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using bit scores.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=24)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             list(result.keys()))
Example #54
0
 def testMinMedianScore_Bits(self):
     """
     The filter function work correctly when passed a value for
     minMedianScore when using bit scores.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=22)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             result.keys())
Example #55
0
 def testMaxTitlesOne(self):
     """
     The filter function must return just the best title when maxTitles
     is one.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=1, sortOn='maxScore')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #56
0
 def testMinMatchingReads(self):
     """
     The filter function work correctly when passed a value for
     minMatchingReads.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMatchingReads=2)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
             ],
             list(result.keys()))
Example #57
0
        maxSequenceLen=args.maxSequenceLen,
        minStart=args.minStart,
        maxStop=args.maxStop,
        oneAlignmentPerRead=args.oneAlignmentPerRead,
        maxHspsPerHit=args.maxHspsPerHit,
        scoreCutoff=args.scoreCutoff,
        whitelist=set(args.whitelist) if args.whitelist else None,
        blacklist=set(args.blacklist) if args.blacklist else None,
        titleRegex=args.titleRegex,
        negativeTitleRegex=args.negativeTitleRegex,
        truncateTitlesAfter=args.truncateTitlesAfter,
        taxonomy=args.taxonomy)

    titlesAlignments = TitlesAlignments(readsAlignments).filter(
        minMatchingReads=args.minMatchingReads,
        minMedianScore=args.minMedianScore,
        withScoreBetterThan=args.withScoreBetterThan,
        minNewReads=args.minNewReads)

    nTitles = len(titlesAlignments)
    print('Found %d interesting title%s.' % (nTitles,
                                             '' if nTitles == 1 else 's'))

    if args.earlyExit:
        print('Matched titles (sorted by best score, descending):')
        print('\n'.join(titlesAlignments.sortTitles('maxScore')))
        sys.exit(0)

    alignmentPanel(titlesAlignments, sortOn=args.sortOn, interactive=True,
                   outputDir=args.outputDir,
                   idList=parseColors(args.color) if args.color else None,
            reads, jsonFiles, args.diamondDatabaseFastaFilename)

    readsAlignments.filter(
        minSequenceLen=args.minSequenceLen,
        maxSequenceLen=args.maxSequenceLen,
        minStart=args.minStart, maxStop=args.maxStop,
        oneAlignmentPerRead=args.oneAlignmentPerRead,
        maxHspsPerHit=args.maxHspsPerHit,
        scoreCutoff=args.scoreCutoff,
        whitelist=whitelist, blacklist=blacklist,
        titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex,
        truncateTitlesAfter=args.truncateTitlesAfter, taxonomy=args.taxonomy)

    titlesAlignments = TitlesAlignments(readsAlignments).filter(
        minMatchingReads=args.minMatchingReads,
        minMedianScore=args.minMedianScore,
        withScoreBetterThan=args.withScoreBetterThan,
        minNewReads=args.minNewReads, maxTitles=args.maxTitles,
        sortOn=args.sortOn, minCoverage=args.minCoverage)

    nTitles = len(titlesAlignments)
    print('Found %d interesting title%s.' %
          (nTitles, '' if nTitles == 1 else 's'), file=sys.stderr)

    if nTitles:
        print(titlesAlignments.tabSeparatedSummary(sortOn=args.sortOn))

    if args.earlyExit:
        sys.exit(0)

    if nTitles == 0:
        print('No alignment panel generated due to no matching titles.',
        scoreCutoff=args.scoreCutoff,
        whitelist=args.whitelist,
        blacklist=args.blacklist,
        titleRegex=args.titleRegex,
        negativeTitleRegex=args.negativeTitleRegex,
        truncateTitlesAfter=args.truncateTitlesAfter,
        taxonomy=args.taxonomy)

    reads = Reads()

    if (args.minMatchingReads is None and args.minMedianScore is None and
            args.withScoreBetterThan is None and args.minNewReads is None):
        # No need to collect into titles, just get the read ids from
        # the matching alignments.
        for readAlignment in readsAlignments:
            reads.add(readAlignment.read)
    else:
        # We need to collect alignments into titles.
        titlesAlignments = TitlesAlignments(readsAlignments).filter(
            minMatchingReads=args.minMatchingReads,
            minMedianScore=args.minMedianScore,
            withScoreBetterThan=args.withScoreBetterThan,
            minNewReads=args.minNewReads)

        for titleAlignments in titlesAlignments.values():
            for alignment in titleAlignments.alignments:
                reads.add(alignment.read)

    reads.save(sys.stdout)
    print('Found %d matching reads.' % len(reads), file=sys.stderr)
        scoreCutoff=args.scoreCutoff,
        whitelist=args.whitelist,
        blacklist=args.blacklist,
        titleRegex=args.titleRegex,
        negativeTitleRegex=args.negativeTitleRegex,
        truncateTitlesAfter=args.truncateTitlesAfter,
        taxonomy=args.taxonomy)

    reads = Reads()

    if (args.minMatchingReads is None and args.minMedianScore is None and
            args.withScoreBetterThan is None and args.minNewReads is None):
        # No need to collect into titles, just get the read ids from
        # the matching alignments.
        for readAlignment in readsAlignments:
            reads.add(readAlignment.read)
    else:
        # We need to collect alignments into titles.
        titlesAlignments = TitlesAlignments(readsAlignments).filter(
            minMatchingReads=args.minMatchingReads,
            minMedianScore=args.minMedianScore,
            withScoreBetterThan=args.withScoreBetterThan,
            minNewReads=args.minNewReads)

        for titleAlignments in titlesAlignments.itervalues():
            for alignment in titleAlignments.alignments:
                reads.add(alignment.read)

    reads.save(sys.stdout)
    print >>sys.stderr, 'Found %d matching reads.' % len(reads)