def test_multiple_bad(self): """MinimalFastaParser should complain or skip bad records""" self.assertRaises(RecordError, list, MinimalFastaParser(self.twogood)) f = list(MinimalFastaParser(self.twogood, strict=False)) self.assertEqual(len(f), 2) a, b = f self.assertEqual(a, ("abc", "caggac")) self.assertEqual(b, ("456", "cg"))
def test_no_labels(self): """MinimalFastaParser should return empty list from file w/o seqs""" # should fail if strict (the default) self.assertRaises(RecordError, list, MinimalFastaParser(self.labels, strict=True)) # if not strict, should skip the records self.assertEqual(list(MinimalFastaParser(self.labels, strict=False)), [])
def test_read(self): """correctly read content""" with open("data" + os.sep + "brca1.fasta") as infile: expect = {l: s for l, s in MinimalFastaParser(infile)} dstore = self.Class(self.basedir, suffix=".fasta") data = dstore.read(self.basedir) data = data.splitlines() got = {l: s for l, s in MinimalFastaParser(data)} self.assertEqual(got, expect)
def test_single(self): """MinimalFastaParser should read single record as (label, seq) tuple""" f = list(MinimalFastaParser(self.oneseq)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ("abc", "UCAG")) f = list(MinimalFastaParser(self.multiline)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ("xyz", "UUUUCCAAAAAG"))
def test_multiple(self): """MinimalFastaParser should read multiline records correctly""" f = list(MinimalFastaParser(self.threeseq)) self.assertEqual(len(f), 3) a, b, c = f self.assertEqual(a, ("123", "a")) self.assertEqual(b, ("abc", "caggac")) self.assertEqual(c, ("456", "cg"))
def test_gt_bracket_in_seq(self): """MinimalFastaParser handles alternate finder function this test also illustrates how to use the MinimalFastaParser to handle "sequences" that start with a > symbol, which can happen when we abuse the MinimalFastaParser to parse fasta-like sequence quality files. """ oneseq_w_gt = ">abc\n>CAG\n".split("\n") def get_two_line_records(infile): line1 = None for line in infile: if line1 == None: line1 = line else: yield (line1, line) line1 = None f = list(MinimalFastaParser(oneseq_w_gt, finder=get_two_line_records)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, ("abc", ">CAG"))
def test_empty(self): """MinimalFastaParser should return empty list from 'file' w/o labels""" self.assertEqual(list(MinimalFastaParser(self.empty)), []) self.assertEqual(list(MinimalFastaParser(self.nolabels, strict=False)), []) self.assertRaises(RecordError, list, MinimalFastaParser(self.nolabels))
def test_parser_from_file(self): """passing path should work""" path = os.path.join(data_path, "brca1.fasta") seqs = dict(p for p in MinimalFastaParser(path)) self.assertTrue("Human" in seqs)
def load_from_fasta(filename): infile = open_(filename, mode='rt') parser = MinimalFastaParser(infile) seqs = [(n, s) for n, s in parser] infile.close() return ArrayAlignment(data=seqs, moltype=DNA)