def testReadSetFilterAllowAnything(self): """ The filter function must work correctly when passed a 0.0 value for minNewReads, i.e. that considers any read set sufficiently novel. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minNewReads=0.0) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result))
def testWithScoreBetterThan_EValue(self): """ The filter function work correctly when passed a value for withScoreBetterThan when using e values. """ mockOpener = mockOpen( read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments( reads, 'file.json', 'database.fasta', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(withScoreBetterThan=1e-10) self.assertEqual([ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', ], list(result.keys()))
def testMinMedianScore_EValue(self): """ The filter function work correctly when passed a value for minMedianScore when using e values. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minMedianScore=1e-9) self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result.keys()))
def testCoverageIncludesAll(self): """ The coverage function must return an titlesAlignments instance with all titles if all its titles has sufficient coverage. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minCoverage=0.0) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result))
def testFilterWithNoArguments(self): """ The filter function must return a TitlesAlignments instance with all the titles of the original when called with no arguments. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter() self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result.keys()))
def testCoverageIncludesAll(self): """ The coverage function must return an titlesAlignments instance with all titles if all its titles has sufficient coverage. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minCoverage=0.0) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result.keys()))
def testLength(self): """ Sorting on sequence length must work, including a secondary sort on title. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch('__builtin__.open', mockOpener, create=True): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('length') self.assertEqual([ 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 38000 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 37000 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 35000 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 35000 'gi|887699|gb|DQ37780 Cowpox virus 15', # 30000 ], result)
def testMedianScore_Bits(self): """ Sorting on median score must work when scores are bit scores, including a secondary sort on title. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) reads.add(Read('id4', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('medianScore') self.assertEqual([ 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 25 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 20 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 20 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 20 'gi|887699|gb|DQ37780 Cowpox virus 15', # 20 ], result)
def testTitle(self): """ Sorting on title must work. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('title') self.assertEqual([ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], result)
def testLength(self): """ Sorting on sequence length must work, including a secondary sort on title. """ mockOpener = mockOpen( read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('length') self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 38000 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 37000 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 35000 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 35000 'gi|887699|gb|DQ37780 Cowpox virus 15', # 30000 ], result)
def testReadSetFilterAllowAnything(self): """ The filter function work correctly when passed a 0.0 value for minNewReads, i.e. that considers any read set sufficiently novel. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minNewReads=0.0) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result.keys()))
def testMaxScore_EValue(self): """ Sorting on max score must work when scores are e values, including a secondary sort on title. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('maxScore') # self.assertEqual([ # 'gi|887699|gb|DQ37780 Cowpox virus 15', # 1e-6 # 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 1e-7 # 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 1e-8 # 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 1e-10 # 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 1e-11 # ], result) self.assertEqual([ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 1e-11 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 1e-10 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 1e-8 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 1e-7 'gi|887699|gb|DQ37780 Cowpox virus 15', # 1e-6 ], result)
def testMaxMatchingReads(self): """ The filter function must work correctly when passed a value for maxMatchingReads. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(maxMatchingReads=1) # Cowpox virus 15 is not in the results as it is matched by two # reads. self.assertEqual( sorted([ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.' ]), sorted(result))
def testReadSetFilterStrict(self): """ The filter function work correctly when passed a 1.0 value for minNewReads. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minNewReads=1.0) # Either MUMMYPOX.id invalidates MONKEYPOX.id or vice-versa. It # depends on Python's dict walking order. Check for both, # making sure just one of them is true. assertionCount = 0 if MUMMYPOX.id in result: self.assertTrue(MONKEYPOX.id in result.readSetFilter.invalidates(MUMMYPOX.id)) assertionCount += 1 if MONKEYPOX.id in result: self.assertTrue(MUMMYPOX.id in result.readSetFilter.invalidates(MONKEYPOX.id)) assertionCount += 1 self.assertEqual(1, assertionCount)
def testTabSeparatedSummary(self): """ The summary function must return the correct result. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'f.json') titlesAlignments = TitlesAlignments(readsAlignments) summary = titlesAlignments.tabSeparatedSummary(sortOn='title') expected = ( '0.000297\t' '20.000000\t' '20.000000\t' '1\t' '1\t' '37000\t' 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99' '\n' '0.000289\t' '25.000000\t' '25.000000\t' '1\t' '1\t' '38000\t' 'gi|887699|gb|DQ37780 Squirrelpox virus 55') self.assertEqual(expected, summary)
def testFilterWithNoArguments(self): """ The filter function must return a TitlesAlignments instance with all the titles of the original when called with no arguments. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter() self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result))
def testTitle(self): """ Sorting on title must work. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('title') self.assertEqual([ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], result)
def testCoverageIncludesSome(self): """ The coverage function must return an titlesAlignments instance with only the expected titles if only some of its titles have sufficient coverage. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) # To understand why the following produces the result it does, # you need to look at the HSP coverage in sample_data.py and # calculate the coverage by hand. result = titlesAlignments.filter(minCoverage=0.0011) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', ], sorted(result.keys()))
def testCoverageIncludesSome(self): """ The coverage function must return an titlesAlignments instance with only the expected titles if only some of its titles have sufficient coverage. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) # To understand why the following produces the result it does, # you need to look at the HSP coverage in sample_data.py and # calculate the coverage by hand. result = titlesAlignments.filter(minCoverage=0.0003) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', ], sorted(result))
def testMinMedianScore_EValue(self): """ The filter function must work correctly when passed a value for minMedianScore when using e values. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minMedianScore=1e-9) self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result))
def testMedianScore_Bits(self): """ Sorting on median score must work when scores are bit scores, including a secondary sort on title. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) reads.add(Read('id4', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('medianScore') self.assertEqual([ 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 25 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 20 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 20 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 20 'gi|887699|gb|DQ37780 Cowpox virus 15', # 20 ], result)
def testMedianScore_EValue(self): """ Sorting on median score must work when scores are bit scores, including a secondary sort on title. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n')) with patch('__builtin__.open', mockOpener, create=True): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) reads.add(Read('id4', 'A' * 70)) readsAlignments = BlastReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('medianScore') self.assertEqual([ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 1e-11 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 1e-10 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 1e-8 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 1e-7 'gi|887699|gb|DQ37780 Cowpox virus 15', # worst :-) ], result)
def testMaxTitlesTwoSortOnLength(self): """ The filter function must return the two titles whose sequences are the longest when maxTitles is 2 and sortOn is 'length'. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json', 'database.fasta') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(maxTitles=2, sortOn='length') self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result.keys()))
def testMaxScore_EValue(self): """ Sorting on max score must work when scores are e values, including a secondary sort on title. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments( reads, 'file.json', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('maxScore') # self.assertEqual([ # 'gi|887699|gb|DQ37780 Cowpox virus 15', # 1e-6 # 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 1e-7 # 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 1e-8 # 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 1e-10 # 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 1e-11 # ], result) self.assertEqual([ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', # 1e-11 'gi|887699|gb|DQ37780 Squirrelpox virus 55', # 1e-10 'gi|887699|gb|DQ37780 Monkeypox virus 456', # 1e-8 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', # 1e-7 'gi|887699|gb|DQ37780 Cowpox virus 15', # 1e-6 ], result)
def testWithScoreBetterThan_EValue(self): """ The filter function work correctly when passed a value for withScoreBetterThan when using e values. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments( reads, 'file.json', 'database.fasta', scoreClass=LowerIsBetterScore) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(withScoreBetterThan=1e-10) self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', ], list(result.keys()))
def testTabSeparatedSummary(self): """ The summary function must return the correct result. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'f.json', 'db') titlesAlignments = TitlesAlignments(readsAlignments) summary = titlesAlignments.tabSeparatedSummary(sortOn='title') expected = ( '0.000297\t' '20.000000\t' '20.000000\t' '1\t' '1\t' '37000\t' 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99' '\n' '0.000289\t' '25.000000\t' '25.000000\t' '1\t' '1\t' '38000\t' 'gi|887699|gb|DQ37780 Squirrelpox virus 55') self.assertEqual(expected, summary)
def testSummary(self): """ The summary function must return the correct result. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json', 'database.fasta') titlesAlignments = TitlesAlignments(readsAlignments) self.assertEqual( [ { 'bestScore': 20.0, 'coverage': 0.00031428571428571427, 'hspCount': 1, 'medianScore': 20.0, 'readCount': 1, 'subjectLength': 35000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Monkeypox virus 456'), }, { 'bestScore': 20.0, 'coverage': 0.00031428571428571427, 'hspCount': 1, 'medianScore': 20.0, 'readCount': 1, 'subjectLength': 35000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'), }, { 'bestScore': 20.0, 'coverage': 0.0002972972972972973, 'hspCount': 1, 'medianScore': 20.0, 'readCount': 1, 'subjectLength': 37000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'), }, { 'bestScore': 25.0, 'coverage': 0.00028947368421052634, 'hspCount': 1, 'medianScore': 25.0, 'readCount': 1, 'subjectLength': 38000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Squirrelpox virus 55'), }, ], list(titlesAlignments.summary(sortOn='title')))
def testSummary(self): """ The summary function must return the correct result. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) self.assertEqual( [ { 'bestScore': 20.0, 'coverage': 0.00031428571428571427, 'hspCount': 1, 'medianScore': 20.0, 'readCount': 1, 'subjectLength': 35000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Monkeypox virus 456'), }, { 'bestScore': 20.0, 'coverage': 0.00031428571428571427, 'hspCount': 1, 'medianScore': 20.0, 'readCount': 1, 'subjectLength': 35000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'), }, { 'bestScore': 20.0, 'coverage': 0.0002972972972972973, 'hspCount': 1, 'medianScore': 20.0, 'readCount': 1, 'subjectLength': 37000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'), }, { 'bestScore': 25.0, 'coverage': 0.00028947368421052634, 'hspCount': 1, 'medianScore': 25.0, 'readCount': 1, 'subjectLength': 38000, 'subjectTitle': ( 'gi|887699|gb|DQ37780 Squirrelpox virus 55'), }, ], list(titlesAlignments.summary(sortOn='title')))
def testEmpty(self): """ Sorting when there are no titles must return the empty list. """ mockOpener = mockOpen(read_data=PARAMS) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('title') self.assertEqual([], result)
def testEmpty(self): """ An instance of TitlesAlignments must have no titles if passed an empty readsAlignments instance. """ mockOpener = mockOpen(read_data=(PARAMS)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) self.assertEqual([], list(titlesAlignments.keys()))
def testEmpty(self): """ Sorting when there are no titles must return the empty list. """ mockOpener = mock_open(read_data=dumps(PARAMS) + '\n') with patch.object(builtins, 'open', mockOpener): reads = Reads() readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('title') self.assertEqual([], result)
def testEmpty(self): """ Sorting when there are no titles must return the empty list. """ mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n') with patch.object(builtins, 'open', mockOpener): reads = Reads() readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('title') self.assertEqual([], result)
def testEmpty(self): """ An instance of TitlesAlignments must have no titles if passed an empty readsAlignments instance. """ mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) self.assertEqual([], list(titlesAlignments.keys()))
def testCoverageExcludesAll(self): """ The coverage function must return an titlesAlignments instance with no titles if none of its titles has sufficient coverage. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minCoverage=0.1) self.assertEqual(0, len(result))
def testEmpty(self): """ An instance of TitlesAlignments must have no titles if passed an empty readsAlignments instance. """ mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() readsAlignments = DiamondReadsAlignments(reads, 'file.json', 'database.fasta') titlesAlignments = TitlesAlignments(readsAlignments) self.assertEqual([], list(titlesAlignments.keys()))
def testAddTitle(self): """ The addTitle function must add a title to the TitlesAlignments instance. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) title = 'Squirrelpox virus 23' titleAlignments = TitleAlignments(title, 55) self.assertTrue(title not in titlesAlignments) titlesAlignments.addTitle(title, titleAlignments) self.assertTrue(title in titlesAlignments)
def main(recordFilenames, fastaFilename, title, xRange, bitRange): """ Print reads that match in a specified X-axis and bit score range. @param recordFilenames: A C{list} of C{str} file names contain results of a BLAST run, in JSON format. @param fastaFilename: The C{str} name of the FASTA file that was originally BLASTed. @param title: The C{str} title of the subject sequence, as output by BLAST. @param xRange: A (start, end) list of C{int}s, giving an X-axis range or C{None} if the entire X axis range should be printed. @param bitRange: A (start, end) list of C{int}s, giving a bit score range or C{None} if the entire bit score range should be printed. """ reads = FastaReads(fastaFilename) blastReadsAlignments = BlastReadsAlignments(reads, recordFilenames) filtered = blastReadsAlignments.filter(whitelist=set([title]), negativeTitleRegex='.') titlesAlignments = TitlesAlignments(filtered) if title not in titlesAlignments: print('%s: Title %r not found in BLAST output' % (sys.argv[0], title)) sys.exit(3) for titleAlignment in titlesAlignments[title]: for hsp in titleAlignment.hsps: if ((xRange is None or (xRange[0] <= hsp.subjectEnd and xRange[1] >= hsp.subjectStart)) and (bitRange is None or (bitRange[0] <= hsp.score.score <= bitRange[1]))): print(('query: %s, start: %d, end: %d, score: %d' % (titleAlignment.read.id, hsp.subjectStart, hsp.subjectEnd, hsp.score.score)))
def testTitleCollection(self): """ A title that occurs in the alignments of multiple reads must have the data from those reads collected properly. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) title = COWPOX.id titleAlignments = titlesAlignments[title] self.assertEqual(2, len(titleAlignments)) self.assertEqual(title, titleAlignments.subjectTitle) self.assertEqual(len(COWPOX.sequence), titleAlignments.subjectLength) self.assertEqual(READ2, titleAlignments[0].read) self.assertEqual(READ2_COWPOX_SCORE, titleAlignments[0].hsps[0].score.score) self.assertEqual(READ3, titleAlignments[1].read) self.assertEqual(READ3_COWPOX_SCORE, titleAlignments[1].hsps[0].score.score)
def testMinMatchingReads(self): """ The filter function work correctly when passed a value for minMatchingReads. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minMatchingReads=2) self.assertEqual( [ COWPOX.id, ], list(result.keys()))
def testWithScoreBetterThan(self): """ The filter function work correctly when passed a value for withScoreBetterThan. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(withScoreBetterThan=0.9) self.assertEqual( [ SQUIRRELPOX.id, ], list(result.keys()))
def testExpectedTitleDetails(self): """ An instance of TitleAlignments in a TitlesAlignments instance must have the expected attributes. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() read = Read('id0', 'A' * 70) reads.add(read) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99' titleAlignments = titlesAlignments[title] self.assertEqual(title, titleAlignments.subjectTitle) self.assertEqual(37000, titleAlignments.subjectLength) self.assertEqual(1, len(titleAlignments)) self.assertEqual(read, titleAlignments[0].read) self.assertEqual(HSP(20), titleAlignments[0].hsps[0]) title = 'gi|887699|gb|DQ37780 Squirrelpox virus 55' titleAlignments = titlesAlignments[title] self.assertEqual(title, titleAlignments.subjectTitle) self.assertEqual(38000, titleAlignments.subjectLength) self.assertEqual(1, len(titleAlignments)) self.assertEqual(read, titleAlignments[0].read) self.assertEqual(HSP(25), titleAlignments[0].hsps[0])
def testTitleCollection(self): """ A title that occurs in the alignments of multiple reads must have the data from both reads collected properly. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() read2 = Read('id2', 'A' * 70) read3 = Read('id3', 'A' * 70) reads.add(read2) reads.add(read3) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) title = 'gi|887699|gb|DQ37780 Cowpox virus 15' titleAlignments = titlesAlignments[title] self.assertEqual(title, titleAlignments.subjectTitle) self.assertEqual(30000, titleAlignments.subjectLength) self.assertEqual(2, len(titleAlignments)) self.assertEqual(read2, titleAlignments[0].read) self.assertEqual(HSP(20), titleAlignments[0].hsps[0]) self.assertEqual(read3, titleAlignments[1].read) self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
def testExpectedTitleDetails(self): """ An instance of TitleAlignments in a TitlesAlignments instance must have the expected attributes. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) titleAlignments = titlesAlignments[SQUIRRELPOX.id] self.assertEqual(SQUIRRELPOX.id, titleAlignments.subjectTitle) self.assertEqual(len(SQUIRRELPOX.sequence), titleAlignments.subjectLength) self.assertEqual(1, len(titleAlignments)) self.assertEqual(READ0, titleAlignments[0].read) self.assertEqual(READ0_SQUIRRELPOX_SCORE, titleAlignments[0].hsps[0].score.score) titleAlignments = titlesAlignments[CATPOX.id] self.assertEqual(CATPOX.id, titleAlignments.subjectTitle) self.assertEqual(len(CATPOX.sequence), titleAlignments.subjectLength) self.assertEqual(1, len(titleAlignments)) self.assertEqual(READ0, titleAlignments[0].read) self.assertEqual(READ0_CATPOX_SCORE, titleAlignments[0].hsps[0].score.score)
def testExpectedTitles(self): """ An instance of TitlesAlignments must have the expected titles. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) self.assertEqual( sorted([ COWPOX.id, MONKEYPOX.id, MUMMYPOX.id, SQUIRRELPOX.id, CATPOX.id, ]), sorted(titlesAlignments.keys()))
def testTitle(self): """ Sorting on title must work. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.sortTitles('title') self.assertEqual([ CATPOX.id, COWPOX.id, MONKEYPOX.id, MUMMYPOX.id, SQUIRRELPOX.id, ], result)
def testAddTitle(self): """ The addTitle function must add a title to the TitlesAlignments instance. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) title = 'gi|887699|gb|DQ37780 Squirrelpox virus 23' titleAlignments = TitleAlignments(title, 55) self.assertTrue(title not in titlesAlignments) titlesAlignments.addTitle(title, titleAlignments) self.assertTrue(title in titlesAlignments)
def testAddTitle(self): """ The addTitle function must add a title to the TitlesAlignments instance. """ mockOpener = mock_open(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) title = 'gi|887699|gb|DQ37780 Squirrelpox virus 23' titleAlignments = TitleAlignments(title, 55) self.assertTrue(title not in titlesAlignments) titlesAlignments.addTitle(title, titleAlignments) self.assertTrue(title in titlesAlignments)
def testMaxTitlesZero(self): """ The filter function must return an empty result when maxTitles is zero. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(maxTitles=0, sortOn='maxScore') self.assertEqual(0, len(result))
def testCoverageExcludesAll(self): """ The coverage function must return an titlesAlignments instance with no titles if none of its titles has sufficient coverage. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minCoverage=0.1) self.assertEqual(0, len(result))
def testHsps(self): """ The hsps function must yield all the hsps for all titles in a TitlesAlignments instance. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = list(titlesAlignments.hsps()) self.assertEqual( sorted([HSP(20), HSP(25), HSP(20), HSP(20), HSP(20)]), sorted(result))
def testReadSetFilterStrict(self): """ The filter function work correctly when passed a 1.0 value for minNewReads. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = DiamondReadsAlignments(reads, 'file.json', 'database.fasta') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minNewReads=1.0) # Either 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.' # invalidates 'gi|887699|gb|DQ37780 Monkeypox virus 456' or # vice-versa. It depends on Python's dict walking order. Check # for both, making sure just one of them is true. mummypox = 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.' monkeypox = 'gi|887699|gb|DQ37780 Monkeypox virus 456' assertionCount = 0 if mummypox in result: self.assertTrue(monkeypox in result.readSetFilter.invalidates(mummypox)) assertionCount += 1 if monkeypox in result: self.assertTrue(mummypox in result.readSetFilter.invalidates(monkeypox)) assertionCount += 1 self.assertEqual(1, assertionCount)
def testExpectedTitles(self): """ An instance of TitlesAlignments must have the expected titles. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', 'gi|887699|gb|DQ37780 Monkeypox virus 456', 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.', 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99', 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(titlesAlignments.keys()))
def testWithScoreBetterThan_Bits(self): """ The filter function work correctly when passed a value for withScoreBetterThan when using bit scores. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(withScoreBetterThan=24) self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], list(result.keys()))
def testMinMedianScore_Bits(self): """ The filter function work correctly when passed a value for minMedianScore when using bit scores. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch('__builtin__.open', mockOpener, create=True): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minMedianScore=22) self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], result.keys())
def testMaxTitlesOne(self): """ The filter function must return just the best title when maxTitles is one. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(maxTitles=1, sortOn='maxScore') self.assertEqual( [ 'gi|887699|gb|DQ37780 Squirrelpox virus 55', ], sorted(result.keys()))
def testMinMatchingReads(self): """ The filter function work correctly when passed a value for minMatchingReads. """ mockOpener = mockOpen(read_data=( dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' + dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' + dumps(RECORD3) + '\n')) with patch.object(builtins, 'open', mockOpener): reads = Reads() reads.add(Read('id0', 'A' * 70)) reads.add(Read('id1', 'A' * 70)) reads.add(Read('id2', 'A' * 70)) reads.add(Read('id3', 'A' * 70)) readsAlignments = BlastReadsAlignments(reads, 'file.json') titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minMatchingReads=2) self.assertEqual( [ 'gi|887699|gb|DQ37780 Cowpox virus 15', ], list(result.keys()))
maxSequenceLen=args.maxSequenceLen, minStart=args.minStart, maxStop=args.maxStop, oneAlignmentPerRead=args.oneAlignmentPerRead, maxHspsPerHit=args.maxHspsPerHit, scoreCutoff=args.scoreCutoff, whitelist=set(args.whitelist) if args.whitelist else None, blacklist=set(args.blacklist) if args.blacklist else None, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, truncateTitlesAfter=args.truncateTitlesAfter, taxonomy=args.taxonomy) titlesAlignments = TitlesAlignments(readsAlignments).filter( minMatchingReads=args.minMatchingReads, minMedianScore=args.minMedianScore, withScoreBetterThan=args.withScoreBetterThan, minNewReads=args.minNewReads) nTitles = len(titlesAlignments) print('Found %d interesting title%s.' % (nTitles, '' if nTitles == 1 else 's')) if args.earlyExit: print('Matched titles (sorted by best score, descending):') print('\n'.join(titlesAlignments.sortTitles('maxScore'))) sys.exit(0) alignmentPanel(titlesAlignments, sortOn=args.sortOn, interactive=True, outputDir=args.outputDir, idList=parseColors(args.color) if args.color else None,
reads, jsonFiles, args.diamondDatabaseFastaFilename) readsAlignments.filter( minSequenceLen=args.minSequenceLen, maxSequenceLen=args.maxSequenceLen, minStart=args.minStart, maxStop=args.maxStop, oneAlignmentPerRead=args.oneAlignmentPerRead, maxHspsPerHit=args.maxHspsPerHit, scoreCutoff=args.scoreCutoff, whitelist=whitelist, blacklist=blacklist, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, truncateTitlesAfter=args.truncateTitlesAfter, taxonomy=args.taxonomy) titlesAlignments = TitlesAlignments(readsAlignments).filter( minMatchingReads=args.minMatchingReads, minMedianScore=args.minMedianScore, withScoreBetterThan=args.withScoreBetterThan, minNewReads=args.minNewReads, maxTitles=args.maxTitles, sortOn=args.sortOn, minCoverage=args.minCoverage) nTitles = len(titlesAlignments) print('Found %d interesting title%s.' % (nTitles, '' if nTitles == 1 else 's'), file=sys.stderr) if nTitles: print(titlesAlignments.tabSeparatedSummary(sortOn=args.sortOn)) if args.earlyExit: sys.exit(0) if nTitles == 0: print('No alignment panel generated due to no matching titles.',
scoreCutoff=args.scoreCutoff, whitelist=args.whitelist, blacklist=args.blacklist, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, truncateTitlesAfter=args.truncateTitlesAfter, taxonomy=args.taxonomy) reads = Reads() if (args.minMatchingReads is None and args.minMedianScore is None and args.withScoreBetterThan is None and args.minNewReads is None): # No need to collect into titles, just get the read ids from # the matching alignments. for readAlignment in readsAlignments: reads.add(readAlignment.read) else: # We need to collect alignments into titles. titlesAlignments = TitlesAlignments(readsAlignments).filter( minMatchingReads=args.minMatchingReads, minMedianScore=args.minMedianScore, withScoreBetterThan=args.withScoreBetterThan, minNewReads=args.minNewReads) for titleAlignments in titlesAlignments.values(): for alignment in titleAlignments.alignments: reads.add(alignment.read) reads.save(sys.stdout) print('Found %d matching reads.' % len(reads), file=sys.stderr)
scoreCutoff=args.scoreCutoff, whitelist=args.whitelist, blacklist=args.blacklist, titleRegex=args.titleRegex, negativeTitleRegex=args.negativeTitleRegex, truncateTitlesAfter=args.truncateTitlesAfter, taxonomy=args.taxonomy) reads = Reads() if (args.minMatchingReads is None and args.minMedianScore is None and args.withScoreBetterThan is None and args.minNewReads is None): # No need to collect into titles, just get the read ids from # the matching alignments. for readAlignment in readsAlignments: reads.add(readAlignment.read) else: # We need to collect alignments into titles. titlesAlignments = TitlesAlignments(readsAlignments).filter( minMatchingReads=args.minMatchingReads, minMedianScore=args.minMedianScore, withScoreBetterThan=args.withScoreBetterThan, minNewReads=args.minNewReads) for titleAlignments in titlesAlignments.itervalues(): for alignment in titleAlignments.alignments: reads.add(alignment.read) reads.save(sys.stdout) print >>sys.stderr, 'Found %d matching reads.' % len(reads)