Example #1
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from both reads collected properly.
        """
        mockOpener = mockOpen(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD2) + '\n' +
            dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read2 = Read('id2', 'A' * 70)
            read3 = Read('id3', 'A' * 70)
            reads.add(read2)
            reads.add(read3)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Cowpox virus 15'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(30000, titleAlignments.subjectLength)
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(read2, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            self.assertEqual(read3, titleAlignments[1].read)
            self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
Example #2
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #3
0
 def testMaxScore_Bits(self):
     """
     Sorting on max score must work when scores are bit scores, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 25
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 20
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 20
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 20
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
         ], result)
Example #4
0
 def testMinMedianScore_EValue(self):
     """
     The filter function work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #5
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #6
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', 'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             ],
             list(result.keys()))
Example #7
0
    def add(self, virusTitle, sampleName):
        """
        Add a a virus title, sample name combination and get its FASTA file
        name. Write the FASTA file if it does not already exist.

        @param virusTitle: A C{str} virus title.
        @param sampleName: A C{str} sample name.
        @return: A C{str} FASTA file name holding all the reads (without
            duplicates) from the sample that matched the proteins in the given
            virus.
        """
        virusIndex = self._viruses.setdefault(virusTitle, len(self._viruses))
        sampleIndex = self._samples.setdefault(sampleName, len(self._samples))

        try:
            return self._fastaFilenames[(virusIndex, sampleIndex)]
        except KeyError:
            result = Reads()
            for proteinMatch in self._proteinGrouper.virusTitles[
                    virusTitle][sampleName]:
                for read in FastaReads(proteinMatch['fastaFilename'],
                                       checkAlphabet=0):
                    result.add(read)
            saveFilename = join(
                proteinMatch['outDir'],
                'virus-%d-sample-%d.fasta' % (virusIndex, sampleIndex))
            result.filter(removeDuplicates=True).save(saveFilename)
            self._fastaFilenames[(virusIndex, sampleIndex)] = saveFilename
            return saveFilename
Example #8
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Example #9
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Example #10
0
    def testExpectedTitleDetails(self):
        """
        An instance of TitleAlignments in a TitlesAlignments instance must
        have the expected attributes.
        """
        mockOpener = mockOpen(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read = Read('id0', 'A' * 70)
            reads.add(read)
            readsAlignments = BlastReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(37000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            title = 'gi|887699|gb|DQ37780 Squirrelpox virus 55'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(38000, titleAlignments.subjectLength)
            self.assertEqual(1, len(titleAlignments))
            self.assertEqual(read, titleAlignments[0].read)
            self.assertEqual(HSP(25), titleAlignments[0].hsps[0])
Example #11
0
 def testLengthOne(self):
     """
     A FASTA list with just one item gets de-duped to the same one item.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Example #12
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch('__builtin__.open', mockOpener, create=True):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 38000
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 35000
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 35000
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 30000
         ], result)
Example #13
0
 def testMaxTitlesTwoSortOnLength(self):
     """
     The filter function must return the two titles whose sequences are the
     longest when maxTitles is 2 and sortOn is 'length'.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxTitles=2, sortOn='length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #14
0
 def testTabSeparatedSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'f.json', 'db')
         titlesAlignments = TitlesAlignments(readsAlignments)
         summary = titlesAlignments.tabSeparatedSummary(sortOn='title')
         expected = (
             '0.000297\t'
             '20.000000\t'
             '20.000000\t'
             '1\t'
             '1\t'
             '37000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
             '\n'
             '0.000289\t'
             '25.000000\t'
             '25.000000\t'
             '1\t'
             '1\t'
             '38000\t'
             'gi|887699|gb|DQ37780 Squirrelpox virus 55')
         self.assertEqual(expected, summary)
Example #15
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result.keys()))
Example #16
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0011)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result.keys()))
Example #17
0
 def testRemovalOfIdenticalSequences(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
Example #18
0
 def __init__(self, _files, readClass=SSAARead, upperCase=False):
     self._files = _files if isinstance(_files, (list, tuple)) else [_files]
     self._readClass = readClass
     self._upperCase = upperCase
     if PY3:
         super().__init__()
     else:
         Reads.__init__(self)
Example #19
0
 def testSummary(self):
     """
     The summary function must return the correct result.
     """
     mockOpener = mockOpen(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual(
             [
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Monkeypox virus 456'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.00031428571428571427,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 35000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'),
                 },
                 {
                     'bestScore': 20.0,
                     'coverage': 0.0002972972972972973,
                     'hspCount': 1,
                     'medianScore': 20.0,
                     'readCount': 1,
                     'subjectLength': 37000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'),
                 },
                 {
                     'bestScore': 25.0,
                     'coverage': 0.00028947368421052634,
                     'hspCount': 1,
                     'medianScore': 25.0,
                     'readCount': 1,
                     'subjectLength': 38000,
                     'subjectTitle': (
                         'gi|887699|gb|DQ37780 Squirrelpox virus 55'),
                 },
             ],
             list(titlesAlignments.summary(sortOn='title')))
Example #20
0
 def testManuallyAddedReadsLength(self):
     """
     A Reads instance with reads added manually must have the correct
     length.
     """
     reads = Reads()
     reads.add(Read('id1', 'AT'))
     reads.add(Read('id2', 'AC'))
     self.assertEqual(2, len(reads))
Example #21
0
 def testRemovalOfIdenticalSequencesWithDifferingIds(self):
     """
     A list with 2 copies of the same seq is de-duped to have 1 copy,
     including when the read ids differ.
     """
     reads = Reads()
     reads.add(Read('id1', 'GGG'))
     reads.add(Read('id2', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id1', 'GGG')])
Example #22
0
    def reads(self):
        """
        Find the set of reads matching this title.

        @return: An instance of C{dark.reads.Reads}.
        """
        reads = Reads()
        for alignment in self:
            reads.add(alignment.read)
        return reads
Example #23
0
 def testManuallyAddedReads(self):
     """
     A Reads instance with reads added manually must be able to be listed.
     """
     reads = Reads()
     read1 = Read('id1', 'AT')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     self.assertEqual([read1, read2], list(reads))
Example #24
0
 def testFilterOnMaxLength(self):
     """
     Filtering on maximal length must work.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(maxLength=3)
     self.assertEqual([read2], list(result))
Example #25
0
 def testFilterWithMinLengthEqualToMaxLength(self):
     """
     When filtering on length, a read should be returned if its length
     equals a passed minimum and maximum length.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=4, maxLength=4)
     self.assertEqual([read1], list(result))
Example #26
0
 def testFilterOnLengthEverythingMatches(self):
     """
     When filtering on length, all reads should be returned if they all
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=2, maxLength=5)
     self.assertEqual([read1, read2], list(result))
Example #27
0
 def testFilterOnLengthNothingMatches(self):
     """
     When filtering on length, no reads should be returned if none of them
     satisfy the length requirements.
     """
     reads = Reads()
     read1 = Read('id1', 'ATCG')
     read2 = Read('id2', 'ACG')
     reads.add(read1)
     reads.add(read2)
     result = reads.filter(minLength=10, maxLength=15)
     self.assertEqual([], list(result))
Example #28
0
 def testSaveWithUnknownFormat(self):
     """
     A Reads instance must raise ValueError if asked to save in an unknown
     format.
     """
     reads = Reads()
     read1 = Read('id1', 'AT', '!!')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     error = "Save format must be either 'fasta' or 'fastq'\\."
     self.assertRaisesRegexp(ValueError, error, reads.save, 'file', 'xxx')
Example #29
0
 def testSaveAsFASTQFailsOnReadWithNoQuality(self):
     """
     A Reads instance must raise a ValueError if asked to save in FASTQ
     format and there is a read with no quality present.
     """
     reads = Reads()
     read1 = Read('id1', 'AT', '!!')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     error = "Read 'id2' has no quality information"
     self.assertRaisesRegexp(ValueError, error, reads.save, 'file', 'fastq')
Example #30
0
 def testSaveToFileDescriptor(self):
     """
     A Reads instance must save to a file-like object if not passed a string
     filename.
     """
     reads = Reads()
     read1 = Read('id1', 'AT')
     read2 = Read('id2', 'AC')
     reads.add(read1)
     reads.add(read2)
     fp = StringIO()
     reads.save(fp)
     self.assertEqual('>id1\nAT\n>id2\nAC\n', fp.getvalue())
Example #31
0
 def testUnknown(self):
     """
     Sorting on an unknown attribute must raise C{ValueError}.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertRaises(ValueError, titlesAlignments.sortTitles, 'xxx')
Example #32
0
 def testEmpty(self):
     """
     If passed an empty readsAlignments, titleCounts must return an
     empty dictionary.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         self.assertEqual({}, titleCounts(readsAlignments))
Example #33
0
 def testAddTitleRepeat(self):
     """
     The addTitle function must raise a C{KeyError} if an attempt is made
     to add a pre-existing title to a TitlesAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         title = 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99'
         titleAlignments = TitleAlignments(title, 55)
         error = ("Title 'gi\|887699\|gb\|DQ37780 Squirrelpox virus "
                  "1296/99' already present in TitlesAlignments instance\.")
         six.assertRaisesRegex(self, KeyError, error,
                               titlesAlignments.addTitle, title,
                               titleAlignments)
Example #34
0
 def testUnknownSortOn(self):
     """
     The filter function must raise a ValueError if the passed sortOn
     value isn't recognized.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n' +
                                      dumps(RECORD0) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         error = ('^Sort attribute must be one of "length", "maxScore", '
                  '"medianScore", "readCount", "title"\.$')
         six.assertRaisesRegex(self,
                               ValueError,
                               error,
                               titlesAlignments.filter,
                               maxTitles=0,
                               sortOn='unknown')
Example #35
0
    def testHeterogeneousReadsTwoDifferences(self):
        """
        heterogeneousSites must return a dictionary with two entries as expected if 
        reads given differ at two sites.
        """
        read = Read('id', 'ACCG')
        reads = Reads([read, Read('id2', 'TCCC')])

        self.assertEqual(({0: {'A': 1, 'T': 1}, 3: {'G': 1, 'C': 1}},
                          {0: {'A': ['id'], 'T': ['id2']},
                           3: {'C': ['id2'], 'G': ['id']}}, [0, 3]),
                           heterogeneousSites(reads, len(read), 1))
Example #36
0
 def testEmpty(self):
     """
     Sorting when there are no titles must return the empty list.
     """
     mockOpener = mockOpen(read_data=dumps(PARAMS) + '\n')
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([], result)
Example #37
0
 def testRecombinantFile(self):
     """
     Test that the recombinantFile method produces the expected string.
     """
     reads = Reads([
         Read('id1', 'A' * 200 + 'G' * 200),
         Read('id2', 'A' * 400),
         Read('id3', 'G' * 400),
     ])
     self.ra.run(reads)
     self.assertEqual(join(self.ra.tmpDir, _OUTPUT_PREFIX + '.3s.rec'),
                      self.ra.recombinantFile())
Example #38
0
 def testEmpty(self):
     """
     An instance of TitlesAlignments must have no titles if passed an
     empty readsAlignments instance.
     """
     mockOpener = mockOpen(read_data=(dumps(PARAMS) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         readsAlignments = DiamondReadsAlignments(reads, 'file.json',
                                                  'database.fasta')
         titlesAlignments = TitlesAlignments(readsAlignments)
         self.assertEqual([], list(titlesAlignments.keys()))
Example #39
0
 def testRunThresholdString(self):
     """
     Test that the run function returns an exit status of 0, if run with a
     non-default (str) threshold argument.
     """
     reads = Reads([
         Read('id1', 'A' * 200 + 'G' * 200),
         Read('id2', 'A' * 400),
         Read('id3', 'G' * 400),
     ])
     result = self.ra.run(reads, t='0.0')
     self.assertEqual(0, result.returncode)
Example #40
0
    def testHeterogeneousReadsFractionHigh(self):
        """
        heterogeneousSites must return a dictionary with one entry as expected if 
        reads given differ and are less homogeneous than specified by the homogeneity 
        cutoff fraction.
        """
        read = Read('id', 'ACCG')
        reads = Reads([read, Read('id2', 'ACCC'), Read('id3', 'ACCC')])

        self.assertEqual(({3: {'C': 2, 'G': 1}},
                          {3: {'G': ['id'], 'C': ['id2', 'id3']}}, [3]),
                           heterogeneousSites(reads, len(read), 0.7))
Example #41
0
    def testTwoJSONInputsWithSubjectInCommon(self):
        """
        If two JSON files are passed to L{BlastReadsAlignments} with a matched
        subject in common and a TitlesAlignments is made, the title in the
        TitlesAlignments must have information from both reads, including the
        correct HSP scores.
        """
        class SideEffect(object):
            def __init__(self):
                self.first = True

            def sideEffect(self, _ignoredFilename, **kwargs):
                if self.first:
                    self.first = False
                    return File([dumps(PARAMS) + '\n', dumps(RECORD2) + '\n'])
                else:
                    return File([dumps(PARAMS) + '\n', dumps(RECORD4) + '\n'])

        title = 'gi|887699|gb|DQ37780 Cowpox virus 15'

        sideEffect = SideEffect()
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect.sideEffect
            reads = Reads()
            reads.add(Read('id2', 'A' * 70))
            reads.add(Read('id4', 'A' * 70))
            readsAlignments = BlastReadsAlignments(
                reads, ['file1.json', 'file2.json'])
            titlesAlignments = TitlesAlignments(readsAlignments)
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(4, titleAlignments.hspCount())
            self.assertEqual('id2', titleAlignments[0].read.id)
            self.assertEqual('id4', titleAlignments[1].read.id)
            # First matching read has one HSP.
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])
            # Second matching read has three HSPs.
            self.assertEqual(HSP(10), titleAlignments[1].hsps[0])
            self.assertEqual(HSP(5), titleAlignments[1].hsps[1])
            self.assertEqual(HSP(3), titleAlignments[1].hsps[2])
Example #42
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Example #43
0
    def testFromThreeSequences(self):
        """
        If three sequences with no features are used to create an NJTree
        instance, the instance must 1) have a distance matrix that is zero
        on the diagonal and ones elsewhere, 2) save the labels, and 3) produce
        a simple tree with three children.
        """
        sequences = Reads()
        sequences.add(AARead('id1', 'A'))
        sequences.add(AARead('id2', 'A'))
        sequences.add(AARead('id3', 'A'))
        labels = ['x', 'y', 'z']
        njtree = NJTree.fromSequences(labels, sequences,
                                      landmarks=['AlphaHelix'])
        self.assertTrue(np.array_equal(
            [
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 0],
            ],
            njtree.distance))

        self.assertIs(labels, njtree.labels)
        self.assertEqual(['x:0.5;\n', 'y:0.5;\n', 'z:0.5;\n'],
                         sorted(str(child) for child in njtree.tree.children))
Example #44
0
 def testMaxScore_Bits(self):
     """
     Sorting on max score must work when scores are bit scores, including a
     secondary sort on title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 25
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 20
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 20
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 20
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
             ],
             result)
Example #45
0
 def testWithScoreBetterThan_EValue(self):
     """
     The filter function work correctly when passed a value for
     withScoreBetterThan when using e values.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads,
             'file.json',
             'database.fasta',
             scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(withScoreBetterThan=1e-10)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
         ], list(result.keys()))
Example #46
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 38000
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 35000
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 35000
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 30000
             ],
             result)
Example #47
0
 def testMaxMatchingReads(self):
     """
     The filter function must work correctly when passed a value for
     maxMatchingReads.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxMatchingReads=1)
         # Cowpox virus 15 is not in the results as it is matched by two
         # reads.
         self.assertEqual(
             sorted([
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
             ]),
             sorted(result))
Example #48
0
 def testMinMedianScore_EValue(self):
     """
     The filter function must work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #49
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function must work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #50
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0003)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result))
Example #51
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #52
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Example #53
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Example #54
0
 def testRemoveOutput(self, rmtreeMock):
     """
     Test that the removeOutput method is called with the expected
     temporary directory name.
     """
     reads = Reads([
         Read('id1', 'A' * 200 + 'G' * 200),
         Read('id2', 'A' * 400),
         Read('id3', 'G' * 400),
     ])
     self.ra.run(reads)
     self.ra.removeOutput()
     rmtreeMock.assert_called_once_with(self.ra.tmpDir)
Example #55
0
    def testHeterogeneousReadsFractionLowWithOneDifference(self):
        """
        heterogeneousSites must return a dictionary with one entry if reads given differ 
        at two sites and at one site are more homogeneous than specified by the homogeneity 
        cutoff fraction; at the other site less homogeneous than specified by the 
        homogeneity cutoff fraction.
        """
        read = Read('id', 'ACCG')
        reads = Reads([read, Read('id2', 'TCCG'), Read('id3', 'TCCG'), Read('id4', 'ACCG')])

        self.assertEqual(({0: {'A': 2, 'T': 2}},
                          {0: {'A': ['id', 'id4'], 'T': ['id2', 'id3']}}, [0]),
                           heterogeneousSites(reads, len(read), 0.6))
Example #56
0
 def testExpectedAttrs(self):
     """
     A ReadsAlignments instance must have the expected attributes.
     """
     reads = Reads()
     params = {
         'application': 'app name'
     }
     readsAlignments = ReadsAlignments(reads, params)
     self.assertIs(readsAlignments.reads, reads)
     self.assertEqual('app name', readsAlignments.params['application'])
     self.assertIs(params, readsAlignments.params)
     self.assertIs(HigherIsBetterScore, readsAlignments.scoreClass)
Example #57
0
 def testTwoByThreeWithRepeatedQueryAndSubjectIds(self):
     """
     If affinityMatrix is called with two reads and the database has three
     subjects, the resulting matrix must be 2x3, and the fact that query
     and subject ids are not all different must not cause a problem (as it
     would if we called affinityMatrix with returnDict=True).
     """
     reads = Reads()
     reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
     reads.add(AARead('id1', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects = Reads()
     subjects.add(AARead('id2', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     subjects.add(AARead('id3', 'FRRRFRRRFAAAFRRRFRRRF'))
     matrix = affinityMatrix(reads, landmarks=['AlphaHelix'],
                             subjects=subjects, computeDiagonal=True)
     self.assertEqual(
         [
             [1.0, 1.0, 1.0],
             [1.0, 1.0, 1.0]
         ],
         matrix)
Example #58
0
 def testPopulationNotAllowed(self):
     """
     Passing a subjects keyword must result in a ValueError if database
     population has not been enabled.
     """
     subjects = Reads()
     specifier = DatabaseSpecifier(allowPopulation=False)
     error = '^Database population is not enabled.$'
     six.assertRaisesRegex(self,
                           ValueError,
                           error,
                           specifier.getDatabaseFromKeywords,
                           subjects=subjects)
Example #59
0
 def testIdenticalMatrixIsReturnedOnRepeatedRequest(self):
     """
     An AffinityMatrices instance must return the identical affinity matrix
     object when asked for it a second time.
     """
     parameterSets = {
         'test': {
             'dbParams': DatabaseParameters(),
             'findParams': FindParameters(),
         }
     }
     am = AffinityMatrices(Reads(), parameterSets=parameterSets,
                           returnDict=True)
     self.assertIs(am['test'], am['test'])
Example #60
0
    def testPopulationFromInMemoryAndFastaFile(self):
        """
        Passing both subjects and databaseFasta keywords must result in
        all the subjects in memory and in the file being added to the returned
        database.
        """
        subjects = Reads()
        subject1 = AARead('id1', 'FFF')
        subject2 = AARead('id2', 'RRR')
        subjects.add(subject1)
        subjects.add(subject2)

        data = '\n'.join(['>id3', 'FFFF', '>id4', 'RRRR'])
        mockOpener = mockOpen(read_data=data)
        with patch.object(builtins, 'open', mockOpener):
            db = DatabaseSpecifier().getDatabaseFromKeywords(
                subjects=subjects, databaseFasta='file.fasta')

        allSubjects = [subject.read for subject in db.getSubjects()]
        self.assertEqual(
            {subject1, subject2,
             AARead('id3', 'FFFF'),
             AARead('id4', 'RRRR')}, set(allSubjects))