def testTwoFiles(self): """ It must be possible to read from two FASTA files. """ class SideEffect(object): def __init__(self, test): self.test = test self.count = 0 def sideEffect(self, filename, **kwargs): if self.count == 0: self.test.assertEqual('file1.fasta', filename) self.count += 1 return File(['>id1\n', 'ACTG\n']) elif self.count == 1: self.test.assertEqual('file2.fasta', filename) self.count += 1 return File(['>id2\n', 'CAGT\n']) else: self.fail('We are only supposed to be called twice!') sideEffect = SideEffect(self) with patch.object(builtins, 'open') as mockMethod: mockMethod.side_effect = sideEffect.sideEffect reads = FastaReads(['file1.fasta', 'file2.fasta']) self.assertEqual([ DNARead('id1', 'ACTG'), DNARead('id2', 'CAGT'), ], list(reads))
def testNtSequencesChangesTuple(self): """ It must be possible to retrieve aligned nucleotide sequences and check on changes using a tuple specification. """ features = Features( { 'spike': { 'name': 'spike', 'sequence': 'ATTC', 'start': 0, 'stop': 4, }, }, DNARead('refId', 'ATTC')) genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features) # Note: 0-based offsets. testCount, errorCount, result = genome.checkFeature( 'spike', (('A', 0, 'A'), ('T', 1, 'A'), ('A', 2, 'T'), ('T', 3, 'T')), True) self.assertEqual(4, testCount) self.assertEqual(3, errorCount) self.assertEqual((True, 'A', True, 'A'), result[('A', 0, 'A')]) self.assertEqual((True, 'T', False, 'T'), result[('T', 1, 'A')]) self.assertEqual((False, 'T', True, 'T'), result[('A', 2, 'T')]) self.assertEqual((False, 'C', False, 'C'), result[('T', 3, 'T')])
def testCorrectNumberOfAlignments(self): """ A JSONRecordsReader must return the expected number of alignments. """ reads = Reads([ DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:9489:4234 1:N:0:TGACCA', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:19964:6287 1:N:0:TGACCA', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:11488:7488 1:N:0:TGACCA', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:14734:7512 1:N:0:TGACCA', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignments = list(reader.readAlignments(reads)) self.assertEqual(4, len(alignments))
def testDictLookupWithTwoFiles(self): """" The __getitem__ method (i.e., dictionary-like lookup) must return the expected reads when sequences are added from two files. """ class Open(object): def __init__(self, test): self.test = test self.count = 0 def sideEffect(self, filename, *args, **kwargs): if self.count == 0 or self.count == 2 or self.count == 3: self.test.assertEqual('filename1.fasta', filename) self.count += 1 return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n') elif self.count == 1 or self.count == 4: self.test.assertEqual('filename2.fasta', filename) self.count += 1 return StringIO('>seq3\nAAACCC\n') else: self.test.fail( 'Open called too many times. Filename: %r, Args: %r, ' 'Keyword args: %r.' % (filename, args, kwargs)) sideEffect = Open(self).sideEffect with patch.object(builtins, 'open') as mockMethod: mockMethod.side_effect = sideEffect index = SqliteIndex(':memory:') index.addFile('filename1.fasta') index.addFile('filename2.fasta') self.assertEqual(DNARead('id1', 'ACTG'), index['id1']) self.assertEqual(DNARead('id2', 'AACCTTGG'), index['id2']) self.assertEqual(DNARead('seq3', 'AAACCC'), index['seq3']) index.close()
def testCorrectNumberOfAlignmentsWhenReadIdsAreAbbreviated(self): """ A JSONRecordsReader must return the expected number of alignments when read ids are truncated at the first space. That is, the BLAST output has query names that are long and which contain a space but the reads in the FASTA have just the first part of those names (up to the first space). """ reads = Reads([ DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:9489:4234', 'AGGGCTCGGATGCTGTGGGTGTTTGTGTGGAGTTGGGTGTGTTTTCGGGG' 'GTGGTTGAGTGGAGGGATTGCTGTTGGATTGTGTGTTTTGTTGTGGTTGCG'), DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:19964:6287', 'TTTTTCTCCTGCGTAGATGAACCTACCCATGGCTTAGTAGGTCCTCTTTC' 'ACCACGAGTTAAACCATTAACATTATATTTTTCTATAATTATACCACTGGC'), DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:11488:7488', 'ACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTTAGCCTCCTGAATA' 'GCTGGGATTACAGGTATGCAGGAGGCTAAGGCAGGAGAATTGCTTGAACCT'), DNARead( 'BIOMICS-HISEQTP:140:HJFH5BCXX:1:1101:14734:7512', 'GAGGGTGGAGGTAACTGAGGAAGCAAAGGCTTGGAGACAGGGCCCCTCAT' 'AGCCAGTGAGTGCGCCATTTTCTTTGGAGCAATTGGGTGGGGAGATGGGGC'), ]) mockOpener = mockOpen(read_data=JSON) with patch.object(builtins, 'open', mockOpener): reader = JSONRecordsReader('file.json') alignments = list(reader.readAlignments(reads)) self.assertEqual(4, len(alignments))
def testNtSequencesChangesIndexErrorIgnore(self): """ If we check on nucleotide sequences with an out-of-range check, no error should be printed if we pass onError='ignore' and the expected error result must be returned. """ features = Features( { 'spike': { 'name': 'spike', 'sequence': 'ATTC', 'start': 0, 'stop': 4, }, }, DNARead('refId', 'ATTC')) genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features) err = StringIO() testCount, errorCount, result = genome.checkFeature( 'spike', 'A100000A', nt=True, onError='ignore', errFp=err) self.assertEqual('', err.getvalue()) self.assertEqual(1, testCount) self.assertEqual(1, errorCount) self.assertEqual((False, None, False, None), result['A100000A'])
def testAaSequencesChangesTranslationErrorIgnore(self): """ Check that no error is printed when checking AA sequences and onError='ignore' and that the expected result is returned. """ features = Features( { 'orf1ab': { 'name': 'ORF1ab polyprotein', 'sequence': 'ATTC', 'start': 0, 'stop': 4, }, }, DNARead('refId', 'ATTC')) genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features) err = StringIO() testCount, errorCount, result = genome.checkFeature( 'orf1ab', 'A100000A', nt=False, onError='ignore', errFp=err) self.assertEqual('', err.getvalue()) self.assertEqual(1, testCount) self.assertEqual(1, errorCount) self.assertEqual((False, None, False, None), result['A100000A'])
def testNtSequencesChangesString(self): """ It must be possible to retrieve aligned nucleotide sequences and check on changes using a string specification. """ features = Features( { 'spike': { 'name': 'spike', 'sequence': 'ATTC', 'start': 0, 'stop': 4, }, }, DNARead('refId', 'ATTC')) genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features) # Note: 1-based locations. testCount, errorCount, result = genome.checkFeature( 'spike', 'A1A T2A A3T T4T', True) self.assertEqual(4, testCount) self.assertEqual(3, errorCount) self.assertEqual((True, 'A', True, 'A'), result['A1A']) self.assertEqual((True, 'T', False, 'T'), result['T2A']) self.assertEqual((False, 'T', True, 'T'), result['A3T']) self.assertEqual((False, 'C', False, 'C'), result['T4T'])
def _checkTranslation(self, genome, ranges, protein): """ Make sure all protein sequences supposed to be in the genome can in fact be obtained by translating the genome. @param genome: A C{dict} with genome information from our sqlite3 protein/genome database, as returned by C{dark.civ.proteins.SqliteIndex.findGenome. @param ranges: A C{list} of (start, stop, forward) nucleotide ranges for the protein in the genome. @param protein: A C{dict} with protein information from our sqlite3 protein/genome database, as returned by C{dark.civ.proteins.SqliteIndex.findProtein. """ proteinSequence = protein['sequence'] + '*' # print('protein name', protein['product'], 'ranges', ranges) sequence = ''.join( [genome['sequence'][start:stop] for (start, stop, _) in ranges]) genomeRead = DNARead('id', sequence) translations = list(genomeRead.translations()) index = 0 if protein['forward'] else 3 if translations[index].sequence != proteinSequence: # TODO: improve this error to show what actually went wrong. raise ValueError( 'Could not translate genome range to get protein sequence')
def testFindTwoKozakConsensi(self): """ In a given sequence with two Kozak consensuses with different offsets and qualities, the output should be as expected. """ read = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG') expectedRead1 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG') expectedRead2 = DNARead('id', 'ATTGCCGCCATGGGGGGCCATGG') expectedKozakRead1 = DNAKozakRead(expectedRead1, 3, 13, 100.0) expectedKozakRead2 = DNAKozakRead(expectedRead2, 13, 23, 60.0) self.assertEqual([expectedKozakRead1, expectedKozakRead2], list(findKozakConsensus(read)))
def testTwoReads(self): """ A FASTQ file with two reads must be read properly and its sequences must be returned in the correct order. """ data = '\n'.join(['@id1', 'ACGT', '+', '!!!!', '@id2', 'TGCA', '+', '????']) mockOpener = mockOpen(read_data=data) with patch.object(builtins, 'open', mockOpener): reads = list(FastqReads('filename.fastq')) self.assertEqual(2, len(reads)) self.assertEqual([DNARead('id1', 'ACGT', '!!!!'), DNARead('id2', 'TGCA', '????')], reads)
def testDictLookupSequenceCrossesNewlines(self): """" The __getitem__ method (i.e., dictionary-like lookup) must return the expected read when the sequence spans multiple lines of the input file, including lines ending in \n and \r\n. """ class Open(object): def __init__(self, test): self.test = test self.count = 0 def sideEffect(self, filename, *args, **kwargs): if self.count == 0 or self.count == 1: self.test.assertEqual('filename.fasta', filename) self.count += 1 return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n') else: self.test.fail( 'Open called too many times. Filename: %r, Args: %r, ' 'Keyword args: %r.' % (filename, args, kwargs)) sideEffect = Open(self).sideEffect with patch.object(builtins, 'open') as mockMethod: mockMethod.side_effect = sideEffect index = SqliteIndex(':memory:') index.addFile('filename.fasta') self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1']) index.close()
def testDictLookupGzipDataWithBGZsuffix(self): """" The __getitem__ method (i.e., dictionary-like lookup) must return the expected read when the index file is in BGZF format and has a .bgz suffix. """ class Open(object): def __init__(self, test): self.test = test self.count = 0 def sideEffect(self, filename, *args, **kwargs): if self.count <= 1: self.test.assertEqual('filename.fasta.bgz', filename) self.count += 1 writerIO = BytesIO() writer = bgzf.BgzfWriter(fileobj=writerIO) writer.write(b'>id0\nAC\n') writer.flush() fileobj = BytesIO(writerIO.getvalue()) fileobj.mode = 'rb' return bgzf.BgzfReader(fileobj=fileobj) else: self.test.fail( 'Open called too many times. Filename: %r, Args: %r, ' 'Keyword args: %r.' % (filename, args, kwargs)) sideEffect = Open(self).sideEffect with patch.object(bgzf, 'open') as mockMethod: mockMethod.side_effect = sideEffect index = SqliteIndex(':memory:') index.addFile('filename.fasta.bgz') self.assertEqual(DNARead('id0', 'AC'), index['id0']) index.close()
def testKozakConsensusAtEndPart(self): """ In a given sequence without a Kozak consensus, the output should be as expected. """ read = DNARead('id', 'AAAAAAATTGCCGCCATG') self.assertEqual([], list(findKozakConsensus(read)))
def testDictLookupWithFastaDirectory(self): """" The __getitem__ method (i.e., dictionary-like lookup) must return the expected read, obtained from the expected file name, when a FASTA base directory is specified. """ class Open(object): def __init__(self, test): self.test = test self.count = 0 def sideEffect(self, filename, *args, **kwargs): if self.count == 0: self.test.assertEqual('/tmp/f.fasta', filename) self.count += 1 return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n') if self.count == 1: self.test.assertEqual( os.path.join('/usr/local/fasta', 'f.fasta'), filename) self.count += 1 return StringIO('>id1\nACTG\r\nCCCC\nGGG\n>id2\nAACCTG\n') else: self.test.fail( 'Open called too many times. Filename: %r, Args: %r, ' 'Keyword args: %r.' % (filename, args, kwargs)) sideEffect = Open(self).sideEffect with patch.object(builtins, 'open') as mockMethod: mockMethod.side_effect = sideEffect index = SqliteIndex(':memory:', fastaDirectory='/usr/local/fasta') index.addFile('/tmp/f.fasta') self.assertEqual(DNARead('id1', 'ACTGCCCCGGG'), index['id1']) index.close()
def testPassingRefence(self): """ It must be possible to pass a reference """ reference = DNARead('refId', 'ATTC') features = Features({}, reference) self.assertIs(reference, features.reference)
def testTwoReads(self): """ It must be possible to access a FASTA file with two reads like a dict. """ pyfaidxIndex = StringIO() class Open(object): def __init__(self, test, manager): self.test = test self.manager = manager self.count = 0 def sideEffect(self, filename, *args, **kwargs): if self.count == 0: self.test.assertEqual('filename.fasta', filename) self.count += 1 return BytesIO(b'>id1\nACTG\n>id2\nAACCTTGG\n') elif self.count == 1: self.test.assertEqual('filename.fasta', filename) self.count += 1 return StringIO('>id1\nACTG\n>id2\nAACCTTGG\n') elif self.count == 2: self.count += 1 return self.manager elif self.count == 3: self.count += 1 return StringIO(pyfaidxIndex.getvalue()) else: self.test.fail( 'Open called too many times. Filename: %r, Args: %r, ' 'Keyword args: %r.' % (filename, args, kwargs)) @contextmanager def manager(): yield pyfaidxIndex sideEffect = Open(self, manager()).sideEffect with patch.object(builtins, 'open') as mockMethod: mockMethod.side_effect = sideEffect reads = FastaFaiReads('filename.fasta') self.assertEqual(DNARead('id1', 'ACTG'), reads['id1']) self.assertEqual(DNARead('id2', 'AACCTTGG'), reads['id2']) # Check that the fai index was built correctly. self.assertEqual(pyfaidxIndex.getvalue(), 'id1\t4\t5\t4\t5\nid2\t8\t15\t8\t9\n')
def testOneRead(self): """ A FASTQ file with one read must be read properly. """ data = '\n'.join(['@id1', 'ACGT', '+', '!!!!']) with patch.object(builtins, 'open', mock_open(read_data=data)): reads = list(FastqReads('filename.fastq')) self.assertEqual([DNARead('id1', 'ACGT', '!!!!')], reads)
def testInvalidMinLength(self): """ If a minLength value less than 1 is passed, a ValueError must be raised. """ read = DNARead('id', '') error = '^minLength must be at least 1$' assertRaisesRegex(self, ValueError, error, sequenceCategoryLengths, read, {}, minLength=0)
def testOneKozakConsensus(self): """ In a given sequence with an exact Kozak consensus sequence, the offset and quality percentage should be as expected. """ read = DNARead('id', 'ATTGCCGCCATGGGGG') expectedKozakRead = DNAKozakRead(read, 3, 13, 100.0) (result, ) = list(findKozakConsensus(read)) self.assertEqual(expectedKozakRead, result)
def testKozakConsensusAtEnd(self): """ In a given sequence without a Kozak consensus, the output should be as expected. """ read = DNARead('id', 'AAAAAAATTGCCGCCATGG') expectedKozakRead = DNAKozakRead(read, 9, 19, 100.0) (result, ) = list(findKozakConsensus(read)) self.assertEqual(expectedKozakRead, result)
def ntSequences(self, featureName): """ Get the aligned nucelotide sequences. @param featureName: A C{str} feature name. @return: A 2-C{tuple} of C{dark.reads.DNARead} instances, holding the nucleotides for the feature as located in the reference genome and then the corresponding nucleotides from the genome being examined. """ try: return self._cache['nt'][featureName] except KeyError: pass feature = self.features[featureName] name = feature['name'] length = feature['stop'] - feature['start'] offset = self.offsetMap[feature['start']] end = alignmentEnd(self.referenceAligned.sequence, offset, length) referenceNt = DNARead(self.features.reference.id + f' ({name})', self.referenceAligned.sequence[offset:end]) # In general, there should not be insertions to the reference. There # are lineages with insertions in the Spike (e.g. B.1.214.2) that we # can correct for in the downstream processing, therefore the error is # not raised for the Spike. if '-' in referenceNt.sequence and name != 'surface glycoprotein': raise ReferenceInsertionError( f'MAFFT suggests a reference insertion into {featureName!r}.') genomeNt = DNARead(self.genome.id + f' ({name})', self.genomeAligned.sequence[offset:end]) if DEBUG: print('NT MATCH:') print('ref nt:', referenceNt.sequence[SLICE]) print('gen nt:', genomeNt.sequence[SLICE]) self._cache['nt'][featureName] = referenceNt, genomeNt return referenceNt, genomeNt
def testNtSequencesGenomeGap(self): """ The genome must be able to have a gap relative to the reference. """ referenceSequence = 'TGGCGTGGA' + ('T' * 20) + 'CAAATCGG' genomeFeature = 'TGGA' + ('T' * 19) + 'CAAATCGG' genomeSequence = 'CCCGGTGGCG' + genomeFeature + 'CCCCCCC' features = Features( { 'spike': { 'name': 'spike', 'sequence': referenceSequence, 'start': 5, 'stop': len(referenceSequence), }, }, DNARead('refId', referenceSequence)) genome = SARS2Genome(DNARead('genId', genomeSequence), features) # The genome offset is initialized to None and isn't set until # after ntSequences is called. # self.assertEqual(None, alignment.genomeOffset) referenceNt, genomeNt = genome.ntSequences('spike') # self.assertEqual(5, alignment.genomeOffset) self.assertEqual(referenceSequence[5:], referenceNt.sequence) self.assertEqual('refId (spike)', referenceNt.id) expected = 'TGGA-' + ('T' * 19) + 'CAAATCGG' self.assertEqual(expected, genomeNt.sequence) self.assertEqual('genId (spike)', genomeNt.id) testCount, errorCount, result = genome.checkFeature( 'spike', 'T5-', True) self.assertEqual(1, testCount) self.assertEqual(0, errorCount) self.assertEqual((True, 'T', True, '-'), result['T5-'])
def testDictLookupGzipData(self): """" The __getitem__ method (i.e., dictionary-like lookup) must return the expected reads when sequences span multiple lines of the input file, and include lines ending in \n and \r\n and have been compressed with bgzip, including when sequences are more than 64K bytes into the input file. """ class Open(object): def __init__(self, test): self.test = test self.count = 0 def sideEffect(self, filename, *args, **kwargs): if self.count <= 4: self.test.assertEqual('filename.fasta.gz', filename) self.count += 1 writerIO = BytesIO() writer = bgzf.BgzfWriter(fileobj=writerIO) writer.write(b'>id0\nAC\n' + b'>id1\n' + (b'A' * 70000) + b'\n' + b'>id2\r\nACTG\r\nCCCC\r\nGGG\r\n' + b'>id3\nAACCTG\n') writer.flush() fileobj = BytesIO(writerIO.getvalue()) fileobj.mode = 'rb' return bgzf.BgzfReader(fileobj=fileobj) else: self.test.fail( 'Open called too many times. Filename: %r, Args: %r, ' 'Keyword args: %r.' % (filename, args, kwargs)) sideEffect = Open(self).sideEffect with patch.object(bgzf, 'open') as mockMethod: mockMethod.side_effect = sideEffect index = SqliteIndex(':memory:') index.addFile('filename.fasta.gz') self.assertEqual(DNARead('id0', 'AC'), index['id0']) self.assertEqual(DNARead('id1', 'A' * 70000), index['id1']) self.assertEqual(DNARead('id2', 'ACTGCCCCGGG'), index['id2']) self.assertEqual(DNARead('id3', 'AACCTG'), index['id3']) index.close()
def testAaSequencesTranslationNoSlipperySequenceRaise(self): """ The aaSequences function must raise if it can't translate an 'ORF1ab polyprotein' sequence due to a missing slippery sequence. """ features = Features( { 'ORF1ab polyprotein': { 'name': 'ORF1ab polyprotein', 'sequence': 'ATTC', 'start': 0, 'stop': 4, }, }, DNARead('refId', 'ATTC')) genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features) error = r'^No slippery sequence found\.$' self.assertRaisesRegex(NoSlipperySequenceError, error, genome.aaSequences, 'ORF1ab polyprotein')
def testUnspecifiedReference(self): """ If a reference is not specified and the BAM file mentions more than one, UnspecifiedReference must be raised. """ template = ('ACGTTCCG', ) bamReferences = [DNARead('ref-1', template[0]), DNARead('ref-2', 'AA')] fastaReferences = [DNARead('ref-3', 'AAA')] with makeBAM(template, bamReferences=bamReferences, fastaReferences=fastaReferences) as (fastaFilename, bamFilename): error = ( r'^Could not infer a BAM reference. Available references are: ' r'ref-1, ref-2\.$') self.assertRaisesRegex(UnspecifiedReference, error, consensusFromBAM, bamFilename, referenceFasta=fastaFilename)
def testNtSequencesChangesIndexErrorPrint(self): """ If we check on nucleotide sequences with an out-of-range check, an error must be printed if we pass onError='print' and the expected error result must be returned. """ features = Features( { 'spike': { 'name': 'spike', 'sequence': 'ATTC', 'start': 0, 'stop': 4, }, }, DNARead('refId', 'ATTC')) genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features) err = StringIO() # Two lines of error output are printed. error = ( r"Index 99999 out of range trying to access feature " r"'spike' of length 4 sequence 'refId (spike)' via " r"expected change specification 'A100000A'." "\n" r"Index 99999 out of range trying to access feature " r"'spike' of length 4 sequence 'genId (spike)' via " r"expected change specification 'A100000A'." "\n" ) testCount, errorCount, result = genome.checkFeature( 'spike', 'A100000A', nt=True, onError='print', errFp=err) self.assertEqual(error, err.getvalue()) self.assertEqual(1, testCount) self.assertEqual(1, errorCount) self.assertEqual((False, None, False, None), result['A100000A'])
def testUnknownCategory(self): """ If a base has no category, the summary must have C{None} as the category for those bases. """ read = DNARead('id', 'ACCGGTTT') categories = { 'A': 'a', 'G': 'g', 'T': 't', } self.assertEqual([('a', 1), (None, 2), ('g', 2), ('t', 3)], sequenceCategoryLengths(read, categories))
def testUnknownCategoryWithDefault(self): """ If a base has no category, the summary must have the passed default category as the category for those bases. """ read = DNARead('id', 'ACCGGTTT') categories = { 'A': 'a', 'G': 'g', 'T': 't', } self.assertEqual([('a', 1), ('xxx', 2), ('g', 2), ('t', 3)], sequenceCategoryLengths(read, categories, 'xxx'))
def testAaSequencesChangesTranslationErrorRaise(self): """ Check that a TranslationError is raised when checking AA sequences. """ features = Features( { 'orf1ab': { 'name': 'ORF1ab polyprotein', 'sequence': 'ATTC', 'start': 0, 'stop': 4, }, }, DNARead('refId', 'ATTC')) genome = SARS2Genome(DNARead('genId', 'GGATTCGG'), features) error = r"^No slippery sequence found\.$" self.assertRaisesRegex( NoSlipperySequenceError, error, genome.checkFeature, 'orf1ab', 'A100000A', False)