Exemple #1
0
 def testNoOffsets(self):
     """
     If an empty set of wanted offsets is passed, the result must be empty.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 0,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 0,
             },
             'read1': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         },
         compareDNAReads(Read('id1', 'ATT-T'),
                         Read('id2', 'A-GTC'),
                         offsets=set()))
Exemple #2
0
 def testLengthOne(self):
     """
     A FASTA list with just one item gets de-duped to the same one item.
     """
     reads = Reads()
     reads.add(Read('id', 'GGG'))
     self.assertEqual(list(dedupFasta(reads)), [Read('id', 'GGG')])
 def testWikiAnswerWithMatchOneAsDict(self):
     """
     Test the example given in Wikipedia
     http://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
     Wikipedia uses a match score of two, here we use a score of one.
     Get the result as a dict.
     """
     seq1 = Read('seq1', 'ACACACTA')
     seq2 = Read('seq2', 'AGCACACA')
     align = LocalAlignment(seq1, seq2, match=1)
     result = align.createAlignment()
     self.assertEqual(
         {
             'cigar':
             '5=1D1=',
             'sequence1Start':
             2,
             'sequence1End':
             8,
             'sequence2Start':
             3,
             'sequence2End':
             8,
             'text': [
                 'seq1 2 CACACTA 8',
                 '       ||||| |',
                 'seq2 3 CACAC-A 8',
             ]
         }, result)
Exemple #4
0
 def testCoverageIncludesSome(self):
     """
     The coverage function must return an titlesAlignments instance with
     only the expected titles if only some of its titles have sufficient
     coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         # To understand why the following produces the result it does,
         # you need to look at the HSP coverage in sample_data.py and
         # calculate the coverage by hand.
         result = titlesAlignments.filter(minCoverage=0.0003)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             ],
             sorted(result))
Exemple #5
0
 def testMaxScore_EValue(self):
     """
     Sorting on max score must work when scores are e values, including a
     secondary sort on title.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('maxScore')
         # self.assertEqual([
         #     'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         #     'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
         #     'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
         #     'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
         # ], result)
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 1e-11
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 1e-10
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 1e-8
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 1e-7
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 1e-6
         ], result)
Exemple #6
0
 def testMaxMatchingReads(self):
     """
     The filter function must work correctly when passed a value for
     maxMatchingReads.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(maxMatchingReads=1)
         # Cowpox virus 15 is not in the results as it is matched by two
         # reads.
         self.assertEqual(
             sorted([
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.'
             ]),
             sorted(result))
Exemple #7
0
 def testReadSetFilterAllowAnything(self):
     """
     The filter function must work correctly when passed a 0.0 value for
     minNewReads, i.e. that considers any read set sufficiently novel.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minNewReads=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemple #8
0
 def testNonDefaultGapChars(self):
     """
     We must be able to specify the gap characters.
     """
     for gap in '+$':
         self.assertEqual(
             {
                 'match': {
                     'identicalMatchCount': 3,
                     'ambiguousMatchCount': 0,
                     'gapMismatchCount': 2,
                     'gapGapMismatchCount': 0,
                     'nonGapMismatchCount': 0,
                 },
                 'read1': {
                     'ambiguousOffsets': [],
                     'extraCount': 0,
                     'gapOffsets': [2],
                 },
                 'read2': {
                     'ambiguousOffsets': [],
                     'extraCount': 0,
                     'gapOffsets': [0],
                 },
             },
             compareDNAReads(Read('id1', 'AC%sTT' % gap),
                             Read('id2', '%sCGTT' % gap),
                             gapChars='+$'))
Exemple #9
0
 def testGapAmbiguous(self):
     """
     Testing that the ambiguousOffset shows ambiguous characters paired
     with gaps as expected
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 2,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 2,
                 'gapGapMismatchCount': 1,
                 'nonGapMismatchCount': 0,
             },
             'read1': {
                 'ambiguousOffsets': [1],
                 'extraCount': 0,
                 'gapOffsets': [2, 3],
             },
             'read2': {
                 'ambiguousOffsets': [3],
                 'extraCount': 0,
                 'gapOffsets': [1, 2],
             },
         }, compareDNAReads(Read('id1', 'AN--T'), Read('id2', 'A--NT')))
Exemple #10
0
 def testMatchWithIncompatibleAmbiguityInBoth(self):
     """
     Two sequences that match exactly, apart from one (incompatible)
     ambiguity at the same location in the sequence, must compare as
     expected.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 5,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 1,
             },
             'read1': {
                 'ambiguousOffsets': [5],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [5],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         }, compareDNAReads(Read('id1', 'ACGTTW'), Read('id2', 'ACGTTS')))
Exemple #11
0
 def testMatchWithIdenticalAmbiguityButStrict(self):
     """
     Two sequences that match exactly, including one (identical)
     ambiguity at the same location in the sequence, must compare as
     expected. Strict.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 5,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 1,
             },
             'read1': {
                 'ambiguousOffsets': [5],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [5],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         },
         compareDNAReads(Read('id1', 'ACGTTN'),
                         Read('id2', 'ACGTTN'),
                         matchAmbiguous=False))
Exemple #12
0
 def testNonMatchingAmbiguityInFirst(self):
     """
     Two sequences that match exactly, apart from one (incompatible)
     ambiguity in the second sequence, must compare as expected.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 5,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 1,
             },
             'read1': {
                 'ambiguousOffsets': [5],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         }, compareDNAReads(Read('id1', 'ACGTTW'), Read('id2', 'ACGTTC')))
Exemple #13
0
 def testMatchWithAmbiguityButStrict(self):
     """
     Two sequences that match exactly, apart from one ambiguity in the first
     sequence, must compare as expected when we specify matchAmbiguous=False
     to disallow ambiguous matching.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 5,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 1,
             },
             'read1': {
                 'ambiguousOffsets': [5],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         },
         compareDNAReads(Read('id1', 'ACGTTS'),
                         Read('id2', 'ACGTTC'),
                         matchAmbiguous=False))
Exemple #14
0
 def testOffsets(self):
     """
     If a set of wanted offsets is passed, the result must be restricted to
     just those offsets.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 1,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 1,
             },
             'read1': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         },
         compareDNAReads(Read('id1', 'ATT-T'),
                         Read('id2', 'A-GTC'),
                         offsets=set([0, 4])))
Exemple #15
0
    def testTitleCollection(self):
        """
        A title that occurs in the alignments of multiple reads must have
        the data from both reads collected properly.
        """
        mockOpener = mock_open(read_data=(
            dumps(PARAMS) + '\n' + dumps(RECORD2) + '\n' +
            dumps(RECORD3) + '\n'))
        with patch.object(builtins, 'open', mockOpener):
            reads = Reads()
            read2 = Read('id2', 'A' * 70)
            read3 = Read('id3', 'A' * 70)
            reads.add(read2)
            reads.add(read3)
            readsAlignments = DiamondReadsAlignments(reads, 'file.json')
            titlesAlignments = TitlesAlignments(readsAlignments)

            title = 'gi|887699|gb|DQ37780 Cowpox virus 15'
            titleAlignments = titlesAlignments[title]
            self.assertEqual(title, titleAlignments.subjectTitle)
            self.assertEqual(30000, titleAlignments.subjectLength)
            self.assertEqual(2, len(titleAlignments))

            self.assertEqual(read2, titleAlignments[0].read)
            self.assertEqual(HSP(20), titleAlignments[0].hsps[0])

            self.assertEqual(read3, titleAlignments[1].read)
            self.assertEqual(HSP(20), titleAlignments[1].hsps[0])
Exemple #16
0
 def testExtraInSecond(self):
     """
     If the second sequence has extra bases, they must be indicated in the
     extraCount.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 5,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 0,
             },
             'read1': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [],
                 'extraCount': 2,
                 'gapOffsets': [],
             },
         }, compareDNAReads(Read('id1', 'ACGTT'), Read('id2', 'ACGTTCC')))
Exemple #17
0
 def testFilterWithNoArguments(self):
     """
     The filter function must return a TitlesAlignments instance with all
     the titles of the original when called with no arguments.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter()
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemple #18
0
 def testExtraAmbiguous(self):
     """
     If the first sequence has extra bases which are ambiguous,they must
     be indicated in the extraCount and in the ambiguousOffset.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 5,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 0,
             },
             'read1': {
                 'ambiguousOffsets': [6],
                 'extraCount': 2,
                 'gapOffsets': [5],
             },
             'read2': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         }, compareDNAReads(Read('id1', 'ACGTT-N'), Read('id2', 'ACGTT')))
Exemple #19
0
 def testMinMedianScore_EValue(self):
     """
     The filter function must work correctly when passed a value for
     minMedianScore when using e values.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minMedianScore=1e-9)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemple #20
0
 def testMismatch(self):
     """
     If the sequences have mismatched (non-ambiguous) bases, their count
     must be given correctly in the nonGapMismatchCount.
     """
     self.assertEqual(
         {
             'match': {
                 'identicalMatchCount': 3,
                 'ambiguousMatchCount': 0,
                 'gapMismatchCount': 0,
                 'gapGapMismatchCount': 0,
                 'nonGapMismatchCount': 2,
             },
             'read1': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
             'read2': {
                 'ambiguousOffsets': [],
                 'extraCount': 0,
                 'gapOffsets': [],
             },
         }, compareDNAReads(Read('id1', 'ACGTT'), Read('id2', 'ACGCC')))
Exemple #21
0
 def testCoverageIncludesAll(self):
     """
     The coverage function must return an titlesAlignments instance with
     all titles if all its titles has sufficient coverage.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.filter(minCoverage=0.0)
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Cowpox virus 15',
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',
             ],
             sorted(result))
Exemple #22
0
    def testMatchWithAmbiguityAndNotStrict(self):
        """
        Two sequences that match exactly, apart from one ambiguity in the first
        sequence, must compare as expected when we specify matchAmbiguous=True
        to allow ambiguous matching.
        """
        read1 = Read('id1', 'ACGTTS')
        read2 = Read('id2', 'ACGTTC')
        match = compareDNAReads(read1, read2, matchAmbiguous=True)

        self.assertEqual(
            '''\
Exact matches: 5/6 (83.33%)
Ambiguous matches: 1/6 (16.67%)
Exact or ambiguous matches: 6/6 (100.00%)
Mismatches: 0
  Not involving gaps (i.e., conflicts): 0
  Involving a gap in one sequence: 0
  Involving a gap in both sequences: 0
  Id: id1
    Length: 6
    Gaps: 0
    Ambiguous: 1/6 (16.67%)
  Id: id2
    Length: 6
    Gaps: 0
    Ambiguous: 0''', matchToString(match, read1, read2, matchAmbiguous=True))
Exemple #23
0
 def testMedianScore_Bits(self):
     """
     Sorting on median score must work when scores are bit scores,
     including a secondary sort on title.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n' + dumps(RECORD4) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         reads.add(Read('id4', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(reads, 'file.json')
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('medianScore')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',       # 25
             'gi|887699|gb|DQ37780 Monkeypox virus 456',        # 20
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',   # 20
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 20
             'gi|887699|gb|DQ37780 Cowpox virus 15',            # 20
         ], result)
Exemple #24
0
    def testResidueCountsTwoReadsTwoHSPsLeftOverhang(self):
        """
        The residueCounts method must return the correct result when two
        reads, each with one HSP are aligned to a title and the leftmost HSP
        is aligned before the left edge of the subject (i.e, will include
        negative subject offsets).

        Subject:      GTT
        HSP1:       ACGT
        HSP2:        CGTT
        """
        read1 = Read('id', 'ACGT')
        hsp1 = HSP(33, readStart=0, readEnd=4, readStartInSubject=-2,
                   readEndInSubject=2, subjectStart=0, subjectEnd=2,
                   readMatchedSequence='GT', subjectMatchedSequence='GT')
        read2 = Read('id', 'CGTT')
        hsp2 = HSP(33, readStart=0, readEnd=4, readStartInSubject=-1,
                   readEndInSubject=3, subjectStart=0, subjectEnd=3,
                   readMatchedSequence='GTT', subjectMatchedSequence='GTT')
        titleAlignments = TitleAlignments('subject title', 55)
        titleAlignment = TitleAlignment(read1, [hsp1])
        titleAlignments.addAlignment(titleAlignment)
        titleAlignment = TitleAlignment(read2, [hsp2])
        titleAlignments.addAlignment(titleAlignment)
        self.assertEqual(
            {
                -2: {'A': 1},
                -1: {'C': 2},
                0: {'G': 2},
                1: {'T': 2},
                2: {'T': 1},
            },
            titleAlignments.residueCounts())
Exemple #25
0
 def testTitle(self):
     """
     Sorting on title must work.
     """
     mockOpener = mock_open(read_data=(
         dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
         dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
         dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = DiamondReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('title')
         self.assertEqual([
             'gi|887699|gb|DQ37780 Cowpox virus 15',
             'gi|887699|gb|DQ37780 Monkeypox virus 456',
             'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',
             'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',
             'gi|887699|gb|DQ37780 Squirrelpox virus 55',
         ], result)
Exemple #26
0
 def testSummary(self):
     """
     The summary method must return the correct result.
     """
     titleAlignments = TitleAlignments('subject title', 10)
     titleAlignments.addAlignment(
         TitleAlignment(Read('id1', 'ACGT'), [
             HSP(30, subjectStart=0, subjectEnd=2),
         ]))
     titleAlignments.addAlignment(
         TitleAlignment(Read('id2', 'ACGT'), [
             HSP(55, subjectStart=2, subjectEnd=4),
             HSP(40, subjectStart=8, subjectEnd=9),
         ]))
     self.assertEqual(
         {
             'bestScore': 55,
             'coverage': 0.5,
             'hspCount': 3,
             'medianScore': 40,
             'readCount': 2,
             'subjectLength': 10,
             'subjectTitle': 'subject title',
         },
         titleAlignments.summary())
 def testWikiAnswerAsDict(self):
     """
     Test the example given in Wikipedia:
     http://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
     with the return result being a dict.
     """
     seq1 = Read('seq1', 'ACACACTA')
     seq2 = Read('seq2', 'AGCACACA')
     align = LocalAlignment(seq1, seq2, match=2)
     result = align.createAlignment()
     self.assertEqual(
         {
             'cigar':
             '1=1I5=1D1=',
             'sequence1Start':
             1,
             'sequence1End':
             8,
             'sequence2Start':
             1,
             'sequence2End':
             8,
             'text': [
                 'seq1 1 A-CACACTA 8',
                 '       | ||||| |',
                 'seq2 1 AGCACAC-A 8',
             ]
         }, result)
Exemple #28
0
 def testLength(self):
     """
     Sorting on sequence length must work, including a secondary sort on
     title.
     """
     mockOpener = mockOpen(
         read_data=(dumps(PARAMS) + '\n' + dumps(RECORD0) + '\n' +
                    dumps(RECORD1) + '\n' + dumps(RECORD2) + '\n' +
                    dumps(RECORD3) + '\n'))
     with patch.object(builtins, 'open', mockOpener):
         reads = Reads()
         reads.add(Read('id0', 'A' * 70))
         reads.add(Read('id1', 'A' * 70))
         reads.add(Read('id2', 'A' * 70))
         reads.add(Read('id3', 'A' * 70))
         readsAlignments = BlastReadsAlignments(
             reads, 'file.json', scoreClass=LowerIsBetterScore)
         titlesAlignments = TitlesAlignments(readsAlignments)
         result = titlesAlignments.sortTitles('length')
         self.assertEqual(
             [
                 'gi|887699|gb|DQ37780 Squirrelpox virus 55',  # 38000
                 'gi|887699|gb|DQ37780 Squirrelpox virus 1296/99',  # 37000
                 'gi|887699|gb|DQ37780 Monkeypox virus 456',  # 35000
                 'gi|887699|gb|DQ37780 Mummypox virus 3000 B.C.',  # 35000
                 'gi|887699|gb|DQ37780 Cowpox virus 15',  # 30000
             ],
             result)
Exemple #29
0
    def testYP_009259545(self):
        """
        Test for a match against YP_009259545
        """
        proteinAccession = 'YP_009259545.1'
        proteinSequence = SAMPLE_DATA['proteins'][proteinAccession]['protein']
        proteinId = SAMPLE_DATA['proteins'][proteinAccession]['id']

        qid = 'query'
        qseq = ''.join(CODONS[aa][0] for aa in proteinSequence[10:50])
        qqual = 'E' * len(qseq)

        with DiamondExecutor() as de:
            de.addSubject(Read(proteinId, proteinSequence))
            queries = Reads([Read(qid, qseq, qqual)])
            (result, ) = list(de.search(queries))

        self.assertEqual(
            {
                'bitscore': 83.6,
                'btop': '40',
                'qframe': 1,
                'qend': 120,
                'full_qqual': qqual,
                'qlen': len(qseq),
                'full_qseq': qseq,
                'qseqid': 'query',
                'qstart': 1,
                'slen': len(proteinSequence),
                'sstart': 11,
                'stitle': proteinId,
            }, result)
Exemple #30
0
    def testHardClippingInCIGARButQueryNotHardClipped(self):
        """
        As documented in https://github.com/acorg/dark-matter/issues/630 we
        must deal correctly with a case in which the CIGAR string says a
        query is hard-clipped but the query sequence in the SAM file
        actually isn't. This can be due to a prior alignment with a soft clip,
        in which case the full query sequence has to be given before the
        secondary alignment with the hard clip.
        """
        data = '\n'.join([
            '@SQ SN:Chimp-D00220 LN:8',
            '@SQ SN:D-AM494716 LN:8',
            '@SQ SN:D-XXX LN:8',
            '@SQ SN:Chimp-YYY LN:8',
            'query1 0 Chimp-D00220 1 0 3S5M * 0 0 TTTTGGTT 12345678',
            'query1 256 D-AM494716 1 0 3H5M * 0 0 * *',
            'query1 256 D-XXX 1 0 5H3M * 0 0 * *',
            'query1 0 Chimp-YYY 1 0 8M * 0 0 * *',
        ]).replace(' ', '\t')

        with dataFile(data) as filename:
            ps = PaddedSAM(SAMFilter(filename))
            (read1, read2, read3, read4) = list(ps.queries(addAlignment=True))

            self.assertEqual(Read('query1', 'TGGTT---', '45678!!!'), read1)
            self.assertEqual('TTTTGGTT', read1.alignment.query_sequence)

            self.assertEqual(Read('query1/1', 'TGGTT---', '45678!!!'), read2)
            self.assertEqual('TGGTT', read2.alignment.query_sequence)

            self.assertEqual(Read('query1/2', 'GTT-----', '678!!!!!'), read3)
            self.assertEqual('GTT', read3.alignment.query_sequence)

            self.assertEqual(Read('query1/3', 'TTTTGGTT', '12345678'), read4)
            self.assertEqual('TTTTGGTT', read4.alignment.query_sequence)