def test_no_labels(self): """parse_fasta should return empty list from file w/o seqs""" # should fail if strict (the default) self.assertRaises(RecordError, list, parse_fasta(self.labels, strict=True)) # if not strict, should skip the records self.assertEqual(list(parse_fasta(self.labels, strict=False)), [])
def test_empty(self): """parse_fasta should return empty list from 'file' w/o labels """ self.assertEqual(list(parse_fasta(self.empty)), []) self.assertEqual(list(parse_fasta(self.nolabels, strict=False)), []) self.assertRaises(RecordError, list, parse_fasta(self.nolabels))
def test_parse_fasta_ignore_comment(self): """parse_fasta correct ignores label comments when requested """ in_ = '>1\nCAG\n>2 some other info\nCCAG\n>3 \nA'.split('\n') # ignore_comment = False actual = list(parse_fasta(in_)) expected = [('1', 'CAG'), ('2 some other info', 'CCAG'), ('3', 'A')] self.assertEqual(actual, expected) # ignore_comment = True actual = list(parse_fasta(in_, ignore_comment=True)) expected = [('1', 'CAG'), ('2', 'CCAG'), ('3', 'A')] self.assertEqual(actual, expected)
def test_single(self): """parse_fasta should read single record as (label, seq) tuple """ f = list(parse_fasta(self.oneseq)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ('abc', 'UCAG')) f = list(parse_fasta(self.multiline)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ('xyz', 'UUUUCCAAAAAG'))
def combine_alignments(fp1, fp2): """take two filepointers, combine the files""" seqs1 = dict(parse_fasta(fp1)) seqs2 = dict(parse_fasta(fp2)) if set(seqs1).intersection(set(seqs2)): raise ValueError("Conflicting sequence ids in fp1 and fp2") combined = seqs1 combined.update(seqs2) return combined
def test_generate_lane_mask(self): sample_alignment = """>1 AAAAT >2 AAAGG >3 AACCC >4 A----""".split('\n') aln = Alignment.from_fasta_records(parse_fasta(sample_alignment), DNA) actual_lanemask = generate_lane_mask(aln, 0.00) self.assertEqual(actual_lanemask, "11111") actual_lanemask = generate_lane_mask(aln, 0.10) self.assertEqual(actual_lanemask, "11100") actual_lanemask = generate_lane_mask(aln, 0.20) self.assertEqual(actual_lanemask, "11100") actual_lanemask = generate_lane_mask(aln, 0.40) self.assertEqual(actual_lanemask, "11000") actual_lanemask = generate_lane_mask(aln, 0.60) self.assertEqual(actual_lanemask, "11000") actual_lanemask = generate_lane_mask(aln, 0.80) self.assertEqual(actual_lanemask, "10000") actual_lanemask = generate_lane_mask(aln, 1.00) self.assertEqual(actual_lanemask, "00000")
def test_parse_fasta_label_to_name(self): exp = [('brofist', 'a'), ('brofist', 'caggac'), ('brofist', 'cg')] # the most powerful fasta label converter known to mankind obs = list( parse_fasta(self.threeseq, label_to_name=lambda _: 'brofist')) self.assertEqual(obs, exp)
def test_multiple(self): """parse_fasta should read multiline records correctly""" f = list(parse_fasta(self.threeseq)) self.assertEqual(len(f), 3) a, b, c = f self.assertEqual(a, ('123', 'a')) self.assertEqual(b, ('abc', 'caggac')) self.assertEqual(c, ('456', 'cg'))
def test_parse_fasta_label_to_name(self): exp = [('brofist', 'a'), ('brofist', 'caggac'), ('brofist', 'cg')] # the most powerful fasta label converter known to mankind obs = list(parse_fasta(self.threeseq, label_to_name=lambda _: 'brofist')) self.assertEqual(obs, exp)
def test_gt_bracket_in_seq(self): """parse_fasta handles alternate finder function this test also illustrates how to use the parse_fasta to handle "sequences" that start with a > symbol, which can happen when we abuse the parse_fasta to parse fasta-like sequence quality files. """ oneseq_w_gt = '>abc\n>CAG\n'.split('\n') def get_two_line_records(infile): line1 = None for line in infile: if line1 is None: line1 = line else: yield (line1, line) line1 = None f = list(parse_fasta(oneseq_w_gt, finder=get_two_line_records)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ('abc', '>CAG'))
def test_multiple_bad_strict(self): with self.assertRaises(RecordError): list(parse_fasta(self.twogood))
def test_multiple_bad_not_strict(self): f = list(parse_fasta(self.twogood, strict=False)) self.assertEqual(len(f), 2) a, b = f self.assertEqual(a, ('abc', 'caggac'))