Python DNARead Examples, dark.reads.DNARead Python Examples

Example #1

0

Show file

File: test_fasta.py Project: akifoss/dark-matter

    def testTwoFiles(self):
        """
        It must be possible to read from two FASTA files.
        """
        class SideEffect(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('file1.fasta', filename)
                    self.count += 1
                    return File(['>id1\n', 'ACTG\n'])
                elif self.count == 1:
                    self.test.assertEqual('file2.fasta', filename)
                    self.count += 1
                    return File(['>id2\n', 'CAGT\n'])
                else:
                    self.fail('We are only supposed to be called twice!')

        sideEffect = SideEffect(self)
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect.sideEffect
            reads = FastaReads(['file1.fasta', 'file2.fasta'])
            self.assertEqual([
                DNARead('id1', 'ACTG'),
                DNARead('id2', 'CAGT'),
            ], list(reads))

Example #2

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testNtSequencesChangesTuple(self):
        """
        It must be possible to retrieve aligned nucleotide sequences
        and check on changes using a tuple specification.
        """
        features = Features(
            {
                'spike': {
                    'name': 'spike',
                    'sequence': 'ATTC',
                    'start': 0,
                    'stop': 4,
                },
            },
            DNARead('refId', 'ATTC'))

        genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features)

        # Note: 0-based offsets.
        testCount, errorCount, result = genome.checkFeature(
            'spike',
            (('A', 0, 'A'), ('T', 1, 'A'), ('A', 2, 'T'), ('T', 3, 'T')), True)

        self.assertEqual(4, testCount)
        self.assertEqual(3, errorCount)
        self.assertEqual((True, 'A', True, 'A'), result[('A', 0, 'A')])
        self.assertEqual((True, 'T', False, 'T'), result[('T', 1, 'A')])
        self.assertEqual((False, 'T', True, 'T'), result[('A', 2, 'T')])
        self.assertEqual((False, 'C', False, 'C'), result[('T', 3, 'T')])

Example #3

0

Show file

File: test_conversion.py Project: akifoss/dark-matter

    def testCorrectNumberOfAlignments(self):
        """
        A JSONRecordsReader must return the expected number of alignments.
        """
        reads = Reads([
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:9489:4234 1:N:0:TGACCA',
                'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG'
                'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'),
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:19964:6287 1:N:0:TGACCA',
                'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC'
                'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'),
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:11488:7488 1:N:0:TGACCA',
                'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA'
                'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'),
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:14734:7512 1:N:0:TGACCA',
                'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT'
                'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'),
        ])

        mockOpener = mockOpen(read_data=JSON)
        with patch.object(builtins, 'open', mockOpener):
            reader = JSONRecordsReader('file.json')
            alignments = list(reader.readAlignments(reads))
            self.assertEqual(4, len(alignments))

Example #4

0

Show file

    def testDictLookupWithTwoFiles(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences are added from two files.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 2 or self.count == 3:
                    self.test.assertEqual('filename1.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1 or self.count == 4:
                    self.test.assertEqual('filename2.fasta', filename)
                    self.count += 1
                    return StringIO('>seq3\nAAACCC\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename1.fasta')
            index.addFile('filename2.fasta')
            self.assertEqual(DNARead('id1', 'ACTG'), index['id1'])
            self.assertEqual(DNARead('id2', 'AACCTTGG'), index['id2'])
            self.assertEqual(DNARead('seq3', 'AAACCC'), index['seq3'])
            index.close()

Example #5

0

Show file

File: test_conversion.py Project: akifoss/dark-matter

    def testCorrectNumberOfAlignmentsWhenReadIdsAreAbbreviated(self):
        """
        A JSONRecordsReader must return the expected number of alignments
        when read ids are truncated at the first space. That is, the BLAST
        output has query names that are long and which contain a space but
        the reads in the FASTA have just the first part of those names (up to
        the first space).
        """
        reads = Reads([
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:9489:4234',
                'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG'
                'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'),
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:19964:6287',
                'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC'
                'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'),
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:11488:7488',
                'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA'
                'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'),
            DNARead(
                'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:14734:7512',
                'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT'
                'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'),
        ])

        mockOpener = mockOpen(read_data=JSON)
        with patch.object(builtins, 'open', mockOpener):
            reader = JSONRecordsReader('file.json')
            alignments = list(reader.readAlignments(reads))
            self.assertEqual(4, len(alignments))

Example #6

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testNtSequencesChangesIndexErrorIgnore(self):
        """
        If we check on nucleotide sequences with an out-of-range
        check, no error should be printed if we pass onError='ignore'
        and the expected error result must be returned.
        """
        features = Features(
            {
                'spike': {
                    'name': 'spike',
                    'sequence': 'ATTC',
                    'start': 0,
                    'stop': 4,
                },
            },
            DNARead('refId', 'ATTC'))

        genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features)

        err = StringIO()
        testCount, errorCount, result = genome.checkFeature(
            'spike', 'A100000A', nt=True, onError='ignore', errFp=err)
        self.assertEqual('', err.getvalue())

        self.assertEqual(1, testCount)
        self.assertEqual(1, errorCount)
        self.assertEqual((False, None, False, None), result['A100000A'])

Example #7

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testAaSequencesChangesTranslationErrorIgnore(self):
        """
        Check that no error is printed when checking AA sequences and
        onError='ignore' and that the expected result is returned.
        """
        features = Features(
            {
                'orf1ab': {
                    'name': 'ORF1ab polyprotein',
                    'sequence': 'ATTC',
                    'start': 0,
                    'stop': 4,
                },
            },
            DNARead('refId', 'ATTC'))

        genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features)

        err = StringIO()

        testCount, errorCount, result = genome.checkFeature(
            'orf1ab', 'A100000A', nt=False, onError='ignore', errFp=err)
        self.assertEqual('', err.getvalue())

        self.assertEqual(1, testCount)
        self.assertEqual(1, errorCount)
        self.assertEqual((False, None, False, None), result['A100000A'])

Example #8

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testNtSequencesChangesString(self):
        """
        It must be possible to retrieve aligned nucleotide sequences
        and check on changes using a string specification.
        """
        features = Features(
            {
                'spike': {
                    'name': 'spike',
                    'sequence': 'ATTC',
                    'start': 0,
                    'stop': 4,
                },
            },
            DNARead('refId', 'ATTC'))

        genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features)

        # Note: 1-based locations.
        testCount, errorCount, result = genome.checkFeature(
            'spike', 'A1A T2A A3T T4T', True)

        self.assertEqual(4, testCount)
        self.assertEqual(3, errorCount)
        self.assertEqual((True, 'A', True, 'A'), result['A1A'])
        self.assertEqual((True, 'T', False, 'T'), result['T2A'])
        self.assertEqual((False, 'T', True, 'T'), result['A3T'])
        self.assertEqual((False, 'C', False, 'C'), result['T4T'])

Example #9

0

Show file

    def _checkTranslation(self, genome, ranges, protein):
        """
        Make sure all protein sequences supposed to be in the genome can in
        fact be obtained by translating the genome.

        @param genome: A C{dict} with genome information from our sqlite3
            protein/genome database, as returned by
            C{dark.civ.proteins.SqliteIndex.findGenome.
        @param ranges: A C{list} of (start, stop, forward) nucleotide ranges
            for the protein in the genome.
        @param protein: A C{dict} with protein information from our sqlite3
            protein/genome database, as returned by
            C{dark.civ.proteins.SqliteIndex.findProtein.
        """
        proteinSequence = protein['sequence'] + '*'

        # print('protein name', protein['product'], 'ranges', ranges)
        sequence = ''.join(
            [genome['sequence'][start:stop] for (start, stop, _) in ranges])

        genomeRead = DNARead('id', sequence)
        translations = list(genomeRead.translations())
        index = 0 if protein['forward'] else 3
        if translations[index].sequence != proteinSequence:
            # TODO: improve this error to show what actually went wrong.
            raise ValueError(
                'Could not translate genome range to get protein sequence')

Example #10

0

Show file

File: test_dna.py Project: terrycojones/dark-matter

    def testFindTwoKozakConsensi(self):
        """
        In a given sequence with two Kozak consensuses with different offsets
        and qualities, the output should be as expected.
        """
        read = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG')
        expectedRead1 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG')
        expectedRead2 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG')
        expectedKozakRead1 = DNAKozakRead(expectedRead1, 3, 13, 100.0)
        expectedKozakRead2 = DNAKozakRead(expectedRead2, 13, 23, 60.0)

        self.assertEqual([expectedKozakRead1, expectedKozakRead2],
                         list(findKozakConsensus(read)))

Example #11

0

Show file

 def testTwoReads(self):
     """
     A FASTQ file with two reads must be read properly and its
     sequences must be returned in the correct order.
     """
     data = '\n'.join(['@id1', 'ACGT', '+', '!!!!',
                       '@id2', 'TGCA', '+', '????'])
     mockOpener = mockOpen(read_data=data)
     with patch.object(builtins, 'open', mockOpener):
         reads = list(FastqReads('filename.fastq'))
         self.assertEqual(2, len(reads))
         self.assertEqual([DNARead('id1', 'ACGT', '!!!!'),
                           DNARead('id2', 'TGCA', '????')], reads)

Example #12

0

Show file

    def testDictLookupSequenceCrossesNewlines(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the sequence spans multiple lines of the input file,
        including lines ending in \n and \r\n.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0 or self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()

Example #13

0

Show file

    def testDictLookupGzipDataWithBGZsuffix(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read when the index file is in BGZF format and has a .bgz
        suffix.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 1:
                    self.test.assertEqual('filename.fasta.bgz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.bgz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            index.close()

Example #14

0

Show file

File: test_dna.py Project: terrycojones/dark-matter

 def testKozakConsensusAtEndPart(self):
     """
     In a given sequence without a Kozak consensus, the output should be
     as expected.
     """
     read = DNARead('id', 'AAAAAAATTGCCGCCATG')
     self.assertEqual([], list(findKozakConsensus(read)))

Example #15

0

Show file

    def testDictLookupWithFastaDirectory(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected read, obtained from the expected file name, when a FASTA base
        directory is specified.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('/tmp/f.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                if self.count == 1:
                    self.test.assertEqual(
                        os.path.join('/usr/local/fasta', 'f.fasta'), filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n')
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:', fastaDirectory='/usr/local/fasta')
            index.addFile('/tmp/f.fasta')
            self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1'])
            index.close()

Example #16

0

Show file

 def testPassingRefence(self):
     """
     It must be possible to pass a reference
     """
     reference = DNARead('refId', 'ATTC')
     features = Features({}, reference)
     self.assertIs(reference, features.reference)

Example #17

0

Show file

    def testTwoReads(self):
        """
        It must be possible to access a FASTA file with two reads like a dict.
        """

        pyfaidxIndex = StringIO()

        class Open(object):
            def __init__(self, test, manager):
                self.test = test
                self.manager = manager
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count == 0:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return BytesIO(b'>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 1:
                    self.test.assertEqual('filename.fasta', filename)
                    self.count += 1
                    return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n')
                elif self.count == 2:
                    self.count += 1
                    return self.manager
                elif self.count == 3:
                    self.count += 1
                    return StringIO(pyfaidxIndex.getvalue())
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        @contextmanager
        def manager():
            yield pyfaidxIndex

        sideEffect = Open(self, manager()).sideEffect
        with patch.object(builtins, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            reads = FastaFaiReads('filename.fasta')
            self.assertEqual(DNARead('id1', 'ACTG'), reads['id1'])
            self.assertEqual(DNARead('id2', 'AACCTTGG'), reads['id2'])
            # Check that the fai index was built correctly.
            self.assertEqual(pyfaidxIndex.getvalue(),
                             'id1\t4\t5\t4\t5\nid2\t8\t15\t8\t9\n')

Example #18

0

Show file

File: test_fastq.py Project: TaliVeith/dark-matter

 def testOneRead(self):
     """
     A FASTQ file with one read must be read properly.
     """
     data = '\n'.join(['@id1', 'ACGT', '+', '!!!!'])
     with patch.object(builtins, 'open', mock_open(read_data=data)):
         reads = list(FastqReads('filename.fastq'))
         self.assertEqual([DNARead('id1', 'ACGT', '!!!!')], reads)

Example #19

0

Show file

 def testInvalidMinLength(self):
     """
     If a minLength value less than 1 is passed, a ValueError must be
     raised.
     """
     read = DNARead('id', '')
     error = '^minLength must be at least 1$'
     assertRaisesRegex(self, ValueError, error, sequenceCategoryLengths,
                       read, {}, minLength=0)

Example #20

0

Show file

File: test_dna.py Project: terrycojones/dark-matter

 def testOneKozakConsensus(self):
     """
     In a given sequence with an exact Kozak consensus sequence, the offset
     and quality percentage should be as expected.
     """
     read = DNARead('id', 'ATTGCCGCCATGGGGG')
     expectedKozakRead = DNAKozakRead(read, 3, 13, 100.0)
     (result, ) = list(findKozakConsensus(read))
     self.assertEqual(expectedKozakRead, result)

Example #21

0

Show file

File: test_dna.py Project: terrycojones/dark-matter

 def testKozakConsensusAtEnd(self):
     """
     In a given sequence without a Kozak consensus, the output should be
     as expected.
     """
     read = DNARead('id', 'AAAAAAATTGCCGCCATGG')
     expectedKozakRead = DNAKozakRead(read, 9, 19, 100.0)
     (result, ) = list(findKozakConsensus(read))
     self.assertEqual(expectedKozakRead, result)

Example #22

0

Show file

File: genome.py Project: VirologyCharite/sars2seq

    def ntSequences(self, featureName):
        """
        Get the aligned nucelotide sequences.

        @param featureName: A C{str} feature name.
        @return: A 2-C{tuple} of C{dark.reads.DNARead} instances, holding
            the nucleotides for the feature as located in the reference
            genome and then the corresponding nucleotides from the genome being
            examined.
        """
        try:
            return self._cache['nt'][featureName]
        except KeyError:
            pass

        feature = self.features[featureName]
        name = feature['name']
        length = feature['stop'] - feature['start']
        offset = self.offsetMap[feature['start']]
        end = alignmentEnd(self.referenceAligned.sequence, offset, length)

        referenceNt = DNARead(self.features.reference.id + f' ({name})',
                              self.referenceAligned.sequence[offset:end])

        # In general, there should not be insertions to the reference. There
        # are lineages with insertions in the Spike (e.g. B.1.214.2) that we
        # can correct for in the downstream processing, therefore the error is
        # not raised for the Spike.
        if '-' in referenceNt.sequence and name != 'surface glycoprotein':
            raise ReferenceInsertionError(
                f'MAFFT suggests a reference insertion into {featureName!r}.')

        genomeNt = DNARead(self.genome.id + f' ({name})',
                           self.genomeAligned.sequence[offset:end])

        if DEBUG:
            print('NT MATCH:')
            print('ref  nt:', referenceNt.sequence[SLICE])
            print('gen  nt:', genomeNt.sequence[SLICE])

        self._cache['nt'][featureName] = referenceNt, genomeNt

        return referenceNt, genomeNt

Example #23

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testNtSequencesGenomeGap(self):
        """
        The genome must be able to have a gap relative to the reference.
        """
        referenceSequence = 'TGGCGTGGA' + ('T' * 20) + 'CAAATCGG'
        genomeFeature = 'TGGA' + ('T' * 19) + 'CAAATCGG'
        genomeSequence = 'CCCGGTGGCG' + genomeFeature + 'CCCCCCC'

        features = Features(
            {
                'spike': {
                    'name': 'spike',
                    'sequence': referenceSequence,
                    'start': 5,
                    'stop': len(referenceSequence),
                },
            },
            DNARead('refId', referenceSequence))

        genome = SARS2Genome(DNARead('genId', genomeSequence), features)

        # The genome offset is initialized to None and isn't set until
        # after ntSequences is called.
        # self.assertEqual(None, alignment.genomeOffset)

        referenceNt, genomeNt = genome.ntSequences('spike')

        # self.assertEqual(5, alignment.genomeOffset)

        self.assertEqual(referenceSequence[5:], referenceNt.sequence)
        self.assertEqual('refId (spike)', referenceNt.id)

        expected = 'TGGA-' + ('T' * 19) + 'CAAATCGG'
        self.assertEqual(expected, genomeNt.sequence)
        self.assertEqual('genId (spike)', genomeNt.id)

        testCount, errorCount, result = genome.checkFeature(
            'spike', 'T5-', True)

        self.assertEqual(1, testCount)
        self.assertEqual(0, errorCount)
        self.assertEqual((True, 'T', True, '-'), result['T5-'])

Example #24

0

Show file

    def testDictLookupGzipData(self):
        """"
        The __getitem__ method (i.e., dictionary-like lookup) must return the
        expected reads when sequences span multiple lines of the input file,
        and include lines ending in \n and \r\n and have been compressed with
        bgzip, including when sequences are more than 64K bytes into the input
        file.
        """
        class Open(object):
            def __init__(self, test):
                self.test = test
                self.count = 0

            def sideEffect(self, filename, *args, **kwargs):
                if self.count <= 4:
                    self.test.assertEqual('filename.fasta.gz', filename)
                    self.count += 1
                    writerIO = BytesIO()
                    writer = bgzf.BgzfWriter(fileobj=writerIO)
                    writer.write(b'>id0\nAC\n' + b'>id1\n' + (b'A' * 70000) +
                                 b'\n' + b'>id2\r\nACTG\r\nCCCC\r\nGGG\r\n' +
                                 b'>id3\nAACCTG\n')
                    writer.flush()
                    fileobj = BytesIO(writerIO.getvalue())
                    fileobj.mode = 'rb'
                    return bgzf.BgzfReader(fileobj=fileobj)
                else:
                    self.test.fail(
                        'Open called too many times. Filename: %r, Args: %r, '
                        'Keyword args: %r.' % (filename, args, kwargs))

        sideEffect = Open(self).sideEffect
        with patch.object(bgzf, 'open') as mockMethod:
            mockMethod.side_effect = sideEffect
            index = SqliteIndex(':memory:')
            index.addFile('filename.fasta.gz')
            self.assertEqual(DNARead('id0', 'AC'), index['id0'])
            self.assertEqual(DNARead('id1', 'A' * 70000), index['id1'])
            self.assertEqual(DNARead('id2', 'ACTGCCCCGGG'), index['id2'])
            self.assertEqual(DNARead('id3', 'AACCTG'), index['id3'])
            index.close()

Example #25

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testAaSequencesTranslationNoSlipperySequenceRaise(self):
        """
        The aaSequences function must raise if it can't translate an
        'ORF1ab polyprotein' sequence due to a missing slippery sequence.
        """
        features = Features(
            {
                'ORF1ab polyprotein': {
                    'name': 'ORF1ab polyprotein',
                    'sequence': 'ATTC',
                    'start': 0,
                    'stop': 4,
                },
            },
            DNARead('refId', 'ATTC'))

        genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features)

        error = r'^No slippery sequence found\.$'
        self.assertRaisesRegex(NoSlipperySequenceError, error,
                               genome.aaSequences, 'ORF1ab polyprotein')

Example #26

0

Show file

    def testUnspecifiedReference(self):
        """
        If a reference is not specified and the BAM file mentions more than
        one, UnspecifiedReference must be raised.
        """
        template = ('ACGTTCCG', )

        bamReferences = [DNARead('ref-1', template[0]), DNARead('ref-2', 'AA')]
        fastaReferences = [DNARead('ref-3', 'AAA')]
        with makeBAM(template,
                     bamReferences=bamReferences,
                     fastaReferences=fastaReferences) as (fastaFilename,
                                                          bamFilename):
            error = (
                r'^Could not infer a BAM reference. Available references are: '
                r'ref-1, ref-2\.$')
            self.assertRaisesRegex(UnspecifiedReference,
                                   error,
                                   consensusFromBAM,
                                   bamFilename,
                                   referenceFasta=fastaFilename)

Example #27

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testNtSequencesChangesIndexErrorPrint(self):
        """
        If we check on nucleotide sequences with an out-of-range
        check, an error must be printed if we pass onError='print'
        and the expected error result must be returned.
        """
        features = Features(
            {
                'spike': {
                    'name': 'spike',
                    'sequence': 'ATTC',
                    'start': 0,
                    'stop': 4,
                },
            },
            DNARead('refId', 'ATTC'))

        genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features)

        err = StringIO()

        # Two lines of error output are printed.
        error = (
            r"Index 99999 out of range trying to access feature "
            r"'spike' of length 4 sequence 'refId (spike)' via "
            r"expected change specification 'A100000A'."
            "\n"
            r"Index 99999 out of range trying to access feature "
            r"'spike' of length 4 sequence 'genId (spike)' via "
            r"expected change specification 'A100000A'."
            "\n"
        )
        testCount, errorCount, result = genome.checkFeature(
            'spike', 'A100000A', nt=True, onError='print', errFp=err)
        self.assertEqual(error, err.getvalue())

        self.assertEqual(1, testCount)
        self.assertEqual(1, errorCount)
        self.assertEqual((False, None, False, None), result['A100000A'])

Example #28

0

Show file

 def testUnknownCategory(self):
     """
     If a base has no category, the summary must have C{None} as the
     category for those bases.
     """
     read = DNARead('id', 'ACCGGTTT')
     categories = {
         'A': 'a',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('a', 1), (None, 2), ('g', 2), ('t', 3)],
                      sequenceCategoryLengths(read, categories))

Example #29

0

Show file

 def testUnknownCategoryWithDefault(self):
     """
     If a base has no category, the summary must have the passed default
     category as the category for those bases.
     """
     read = DNARead('id', 'ACCGGTTT')
     categories = {
         'A': 'a',
         'G': 'g',
         'T': 't',
     }
     self.assertEqual([('a', 1), ('xxx', 2), ('g', 2), ('t', 3)],
                      sequenceCategoryLengths(read, categories, 'xxx'))

Example #30

0

Show file

File: test_genome.py Project: VirologyCharite/sars2seq

    def testAaSequencesChangesTranslationErrorRaise(self):
        """
        Check that a TranslationError is raised when checking AA
        sequences.
        """
        features = Features(
            {
                'orf1ab': {
                    'name': 'ORF1ab polyprotein',
                    'sequence': 'ATTC',
                    'start': 0,
                    'stop': 4,
                },
            },
            DNARead('refId', 'ATTC'))

        genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features)

        error = r"^No slippery sequence found\.$"
        self.assertRaisesRegex(
            NoSlipperySequenceError, error, genome.checkFeature,
            'orf1ab', 'A100000A', False)