def test_no_labels(self): """parse_fasta should return empty list from file w/o seqs""" # should fail if strict (the default) self.assertRaises(ValueError, list, parse_fasta(self.labels, strict=True)) # if not strict, should skip the records self.assertEqual(list(parse_fasta(self.labels, strict=False)), [])
def test_empty(self): """parse_fasta should return empty list from 'file' w/o labels """ self.assertEqual(list(parse_fasta(self.empty)), []) self.assertEqual(list(parse_fasta(self.nolabels, strict=False)), []) self.assertRaises(ValueError, list, parse_fasta(self.nolabels))
def test_single(self): """parse_fasta should read single record as (label, seq) tuple """ f = list(parse_fasta(self.oneseq)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ('abc', 'UCAG')) f = list(parse_fasta(self.multiline)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ('xyz', 'UUUUCCAAAAAG'))
def test_parse_fasta_ignore_comment(self): """parse_fasta correct ignores label comments when requested """ in_ = '>1\nCAG\n>2 some other info\nCCAG\n>3 \nA'.split('\n') # ignore_comment = False actual = list(parse_fasta(in_)) expected = [('1', 'CAG'), ('2 some other info', 'CCAG'), ('3', 'A')] self.assertEqual(actual, expected) # ignore_comment = True actual = list(parse_fasta(in_, ignore_comment=True)) expected = [('1', 'CAG'), ('2', 'CCAG'), ('3', 'A')] self.assertEqual(actual, expected)
def test_multiple(self): """parse_fasta should read multiline records correctly""" f = list(parse_fasta(self.threeseq)) self.assertEqual(len(f), 3) a, b, c = f self.assertEqual(a, ('123', 'a')) self.assertEqual(b, ('abc', 'caggac')) self.assertEqual(c, ('456', 'cg'))
def test_parse_fasta_label_to_name(self): exp = [('brofist', 'a'), ('brofist', 'caggac'), ('brofist', 'cg')] # the most powerful fasta label converter known to mankind obs = list(parse_fasta(self.threeseq, label_to_name=lambda _: 'brofist')) self.assertEqual(obs, exp)
def test_gt_bracket_in_seq(self): """parse_fasta handles alternate finder function this test also illustrates how to use the parse_fasta to handle "sequences" that start with a > symbol, which can happen when we abuse the parse_fasta to parse fasta-like sequence quality files. """ oneseq_w_gt = '>abc\n>CAG\n'.split('\n') def get_two_line_records(infile): line1 = None for line in infile: if line1 is None: line1 = line else: yield (line1, line) line1 = None f = list(parse_fasta(oneseq_w_gt, finder=get_two_line_records)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ('abc', '>CAG'))
def test_multiple_bad_not_strict(self): f = list(parse_fasta(self.twogood, strict=False)) self.assertEqual(len(f), 2) a, b = f self.assertEqual(a, ('abc', 'caggac'))
def test_multiple_bad_strict(self): with self.assertRaises(ValueError): list(parse_fasta(self.twogood))