def testLimitZero(self): """ If L{LightReadsAlignments} is limited to zero results, that limit must be respected. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(limit=0)) self.assertEqual(0, len(result))
def testReadIdCaseSensitive(self): """ Filtering alignments based on a regex for read ids must be case sensitive. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='^READ0$')) self.assertEqual(0, len(result))
def testReadIdNoMatches(self): """ When filtering on alignments based on a regex for read ids that matches no ids, an empty generator must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='blah')) self.assertEqual(0, len(result))
def testTitleByNegativeRegexMatchesAll(self): """ Filtering with a negative title regex that matches all alignments must remove everything and return an empty result. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(negativeTitleRegex='pox')) self.assertEqual(0, len(result))
def testGetSubjectSequence(self): """ The getSubjectSequence function must return a correct C{SeqIO.read} instance. """ mockOpener = mockOpen(read_data=PARAMS) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) subject = readsAlignments.getSubjectSequence(COWPOX.id) self.assertEqual(COWPOX.sequence, subject.sequence) self.assertEqual(COWPOX.id, subject.id)
def testNoResultNoFilteringArgs(self): """ If the L{LightReadsAlignments} filter function is called with no arguments, and there are no hits, it should produce a generator that yields no result. """ mockOpener = mockOpen(read_data=PARAMS) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter()) self.assertEqual(0, len(result))
def testReadIdAnchored(self): """ It must be possible to filter alignments based on a regex for read ids that is anchored at start and end. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='^read0$')) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id)
def testMinTitleSequenceLengthNoHits(self): """ It must be possible to filter alignments based on minimum hit sequence length and if nothing sufficiently long matches, an empty list of alignments must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(minSequenceLen=1000000)) self.assertEqual(0, len(result))
def testTitleByRegexCaseInvariant(self): """ Filtering with a title regex must work independent of case. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(titleRegex='sqUIRRel')) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id) self.assertEqual(SQUIRRELPOX.id, result[0][0].subjectTitle)
def testMaxStopNoHits(self): """ It must be possible to filter alignments based on maximum offset in the hit sequence, and if no hsps match then an empty result set must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(maxStop=100)) self.assertEqual(0, len(result))
def testLimitOne(self): """ If L{LightReadsAlignments} is limited to one hit, that limit must be respected. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0 + RECORD1) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(limit=1)) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id)
def testMinStartAndMaxstop(self): """ It must be possible to filter alignments based simultaneously on mininum and maximum offset in the hit sequence. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(minStart=9000, maxStop=12000)) self.assertEqual(1, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual(2, len(result[0]))
def testTitleByRegexOneAlignments(self): """ Filtering with a title regex must work in the case that only some alignments for a hit match the regex. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(titleRegex='Mummy')) self.assertEqual(1, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual(MUMMYPOX.id, result[0][0].subjectTitle)
def testReadId(self): """ It must be possible to filter alignments based on a regex for read ids. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(readIdRegex='read[12]')) self.assertEqual(2, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual('read2', result[1].read.id)
def testOneAlignmentPerRead(self): """ If L{LightReadsAlignments} is asked to deliver only the best alignment for each read, that must be respected. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(oneAlignmentPerRead=True)) self.assertEqual(1, len(result)) self.assertEqual(1, len(result[0])) self.assertEqual(SQUIRRELPOX.id, result[0][0].subjectTitle)
def testOneHitNoFilteringArgs(self): """ If the L{LightReadsAlignments} filter function is called with no arguments, and there is one hit, it should produce a generator that yields that hit. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter()) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id)
def testMaxTitleSequenceLengthNoHits(self): """ It must be possible to filter alignments based on maximum hit sequence length and if no sufficiently short sequences match, an empty list of alignments must be returned. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list( readsAlignments.filter(maxSequenceLen=len(COWPOX.sequence) - 1)) self.assertEqual(0, len(result))
def testMaxStop(self): """ It must be possible to filter alignments based on maximum offset in the hit sequence. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(maxStop=1500)) self.assertEqual(1, len(result)) self.assertEqual('read2', result[0].read.id) self.assertEqual(1, len(result[0])) self.assertEqual(COWPOX.id, result[0][0].subjectTitle)
def testTitleByRegexMatchingAllWithBlacklist(self): """ Filtering with a title regex that matches all alignments must keep everything, except for any blacklisted titles. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) blacklist = [SQUIRRELPOX.id, CATPOX.id] result = list( readsAlignments.filter(titleRegex='pox', blacklist=blacklist)) self.assertEqual(2, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual('read2', result[1].read.id)
def testMinStart(self): """ It must be possible to filter alignments based on minimum offset in the hit sequence. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(minStart=15300)) self.assertEqual(1, len(result)) self.assertEqual('read0', result[0].read.id) self.assertEqual(1, len(result[0])) self.assertEqual('Squirrelpox virus 1296/99', result[0][0].subjectTitle)
def testTitleByNegativeRegexOneAlignment(self): """ Filtering with a negative title regex must work in the case that only some alignments for a hit are ruled out (in which case only those alignments must be removed but the hit is still valid). """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list(readsAlignments.filter(negativeTitleRegex='Mummy')) self.assertEqual(3, len(result)) self.assertEqual('read1', result[1].read.id) self.assertEqual(1, len(result[1])) self.assertEqual(MONKEYPOX.id, result[1][0].subjectTitle)
def testRepeatedFilter_MinStartThenMaxstop(self): """ It must be possible to filter alignments multiple times using different filter parameters. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) readsAlignments.filter(minStart=9000) readsAlignments.filter(maxStop=12000) result = list(readsAlignments) self.assertEqual(1, len(result)) self.assertEqual('read1', result[0].read.id) self.assertEqual(2, len(result[0]))
def testMinTitleSequenceLength(self): """ It must be possible to filter alignments based on minimum hit sequence length. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list( readsAlignments.filter(minSequenceLen=len(MUMMYPOX.sequence))) self.assertEqual(1, len(result)) self.assertEqual(READ1.id, result[0].read.id) self.assertEqual(1, len(result[0])) self.assertEqual(MUMMYPOX.id, result[0][0].subjectTitle)
def testScoreCutoffRemovesEntireAlignment(self): """ If the L{LightReadsAlignments} filter function is supposed to filter on a scoreCutoff and the cut-off value results in an alignment with no HSPs, then the alignment must be removed entirely. """ mockOpener = mockOpen(read_data=PARAMS + RECORD0) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) result = list( readsAlignments.filter(scoreCutoff=READ0_SQUIRRELPOX_SCORE - 0.01)) self.assertEqual(1, len(result)) self.assertEqual(1, len(result[0])) self.assertEqual(SQUIRRELPOX.id, result[0][0].subjectTitle)
def testReadSetFilterStrict(self): """ The filter function work correctly when passed a 1.0 value for minNewReads. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0 + RECORD1 + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) result = titlesAlignments.filter(minNewReads=1.0) # Either MUMMYPOX.id invalidates MONKEYPOX.id or vice-versa. It # depends on Python's dict walking order. Check for both, # making sure just one of them is true. assertionCount = 0 if MUMMYPOX.id in result: self.assertTrue(MONKEYPOX.id in result.readSetFilter.invalidates(MUMMYPOX.id)) assertionCount += 1 if MONKEYPOX.id in result: self.assertTrue(MUMMYPOX.id in result.readSetFilter.invalidates(MONKEYPOX.id)) assertionCount += 1 self.assertEqual(1, assertionCount)
def testTwoCompressedJSONInputs(self): """ If two compressed (bz2) JSON files are passed to L{LightReadsAlignments} each with a parameters section and one record, both records must be read correctly and the result should have 2 records. """ class SideEffect(object): def __init__(self): self.first = True def sideEffect(self, _ignoredFilename): if self.first: self.first = False return BZ2([PARAMS, RECORD0]) else: return BZ2([PARAMS, RECORD1]) sideEffect = SideEffect() with patch.object(bz2, 'BZ2File') as mockMethod: mockMethod.side_effect = sideEffect.sideEffect readsAlignments = LightReadsAlignments( ['file1.json.bz2', 'file2.json.bz2'], DB) result = list(readsAlignments) self.assertEqual(2, len(result)) self.assertEqual('read0', result[0].read.id) self.assertEqual('read1', result[1].read.id)
def testTitleCollection(self): """ A title that occurs in the alignments of multiple reads must have the data from those reads collected properly. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD2 + RECORD3)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) title = COWPOX.id titleAlignments = titlesAlignments[title] self.assertEqual(2, len(titleAlignments)) self.assertEqual(title, titleAlignments.subjectTitle) self.assertEqual(len(COWPOX.sequence), titleAlignments.subjectLength) self.assertEqual(READ2, titleAlignments[0].read) self.assertEqual(READ2_COWPOX_SCORE, titleAlignments[0].hsps[0].score.score) self.assertEqual(READ3, titleAlignments[1].read) self.assertEqual(READ3_COWPOX_SCORE, titleAlignments[1].hsps[0].score.score)
def testIncompatibleParameters(self): """ If two compressed (bz2) JSON files with incompatible parameters are given to L{LightReadsAlignments}, a C{ValueError} must be raised when the files are read. """ class SideEffect(object): def __init__(self): self._first = True def sideEffect(self, _ignoredFilename): if self._first: self._first = False return BZ2([PARAMS, RECORD0]) else: dbParams = loads(PARAMS) dbParams['limitPerLandmark'] = 100 return BZ2([dumps(dbParams) + '\n', RECORD1]) sideEffect = SideEffect() with patch.object(bz2, 'BZ2File') as mockMethod: mockMethod.side_effect = sideEffect.sideEffect error = ("^Incompatible light matter parameters found\. The " "parameters in file2\.json\.bz2 differ from those " "originally found in file1\.json\.bz2\. Summary of " "differences:\n Param 'limitPerLandmark' values 10 " "and 100 differ\.$") readsAlignments = LightReadsAlignments( ['file1.json.bz2', 'file2.json.bz2'], DB) six.assertRaisesRegex(self, ValueError, error, list, readsAlignments)
def testExpectedTitleDetails(self): """ An instance of TitleAlignments in a TitlesAlignments instance must have the expected attributes. """ mockOpener = mockOpen(read_data=(PARAMS + RECORD0)) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) titlesAlignments = TitlesAlignments(readsAlignments) titleAlignments = titlesAlignments[SQUIRRELPOX.id] self.assertEqual(SQUIRRELPOX.id, titleAlignments.subjectTitle) self.assertEqual(len(SQUIRRELPOX.sequence), titleAlignments.subjectLength) self.assertEqual(1, len(titleAlignments)) self.assertEqual(READ0, titleAlignments[0].read) self.assertEqual(READ0_SQUIRRELPOX_SCORE, titleAlignments[0].hsps[0].score.score) titleAlignments = titlesAlignments[CATPOX.id] self.assertEqual(CATPOX.id, titleAlignments.subjectTitle) self.assertEqual(len(CATPOX.sequence), titleAlignments.subjectLength) self.assertEqual(1, len(titleAlignments)) self.assertEqual(READ0, titleAlignments[0].read) self.assertEqual(READ0_CATPOX_SCORE, titleAlignments[0].hsps[0].score.score)
def testScoreTitle(self): """ The score title must be as expected. """ mockOpener = mockOpen(read_data=PARAMS) with patch.object(builtins, 'open', mockOpener): readsAlignments = LightReadsAlignments('file.json', DB) self.assertEqual('Score', readsAlignments.params.scoreTitle)