def test_single(self): """RdbParser should read single record as (header,seq) tuple""" res = list(RdbParser(self.oneseq)) self.assertEqual(len(res), 1) first = res[0] self.assertEqual(first, Sequence("AGUCAUCUAGAUHCAUHC")) self.assertEqual( first.info, Info({ "Species": "H.Sapiens", "OriginalSeq": "AGUCAUCUAGAUHCAUHC" }), ) res = list(RdbParser(self.multiline)) self.assertEqual(len(res), 1) first = res[0] self.assertEqual(first, Sequence("AGUCAUUAGAUHCAUHC")) self.assertEqual( first.info, Info({ "Species": "H.Sapiens", "OriginalSeq": "AGUCAUUAGAUHCAUHC" }), )
def test_only_sequences(self): """RdbParser should return empty list form file w/o lables""" # should fail if strict (the default) self.assertRaises(RecordError, list, RdbParser(self.nolabels, strict=True)) # if not strict, should skip the records self.assertEqual(list(RdbParser(self.nolabels, strict=False)), [])
def test_single_constructor(self): """RdbParser should use constructors if supplied""" to_dna = lambda x, info: DnaSequence(str(x).replace("U", "T"), info=info) f = list(RdbParser(self.oneseq, to_dna)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, "AGTCATCTAGATHCATHC") self.assertEqual( a.info, Info({"Species": "H.Sapiens", "OriginalSeq": "AGUCAUCUAGAUHCAUHC"}) ) def alternativeConstr(header_lines): info = Info() for line in header_lines: all = line.strip().split(":", 1) # strip out empty lines, lines without name, lines without # colon if not all[0] or len(all) != 2: continue name = all[0].upper() value = all[1].strip().upper() info[name] = value return info f = list(RdbParser(self.oneseq, to_dna, alternativeConstr)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, "AGTCATCTAGATHCATHC") exp_info = Info( {"OriginalSeq": "AGUCAUCUAGAUHCAUHC", "Refs": {}, "SEQ": "H.SAPIENS"} ) self.assertEqual( a.info, Info({"OriginalSeq": "AGUCAUCUAGAUHCAUHC", "Refs": {}, "SEQ": "H.SAPIENS"}), )
def test_full(self): """RdbParser: full data, valid and invalid""" # when only good record, should work independent of strict r1 = RnaSequence( "-??GG-UGAA--CGCU---ACGU-N???---", info=Info({ "Species": "unidentified Thermus OPB AF027020", "Refs": { "rRNA": ["AF027020"] }, "OriginalSeq": "-o[oGG-U{G}AA--C^GC]U---ACGU-Nooo---", }), ) r2 = RnaSequence( "---CGAUCG--UAUACG-N???-", info=Info({ "Species": "Thermus silvanus X84211", "Refs": { "rRNA": ["X84211"] }, "OriginalSeq": "---CGAU[C(G){--UA}U]ACG-Nooo-", }), ) obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=True)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].info, r1.info) self.assertEqual(obs[1], r2) self.assertEqual(str(obs[1]), str(r2)) self.assertEqual(obs[1].info, r2.info) obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split("\n"), strict=False)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].info, r1.info) # when strict, should raise error on invalid record f = RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=True) self.assertRaises(RecordError, list, f) # when not strict, malicious record is skipped obs = list(RdbParser(RDB_LINES_GOOD_BAD.split("\n"), strict=False)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].info, r1.info) self.assertEqual(obs[1], r2) self.assertEqual(str(obs[1]), str(r2)) self.assertEqual(obs[1].info, r2.info)
def test_multiple_constructor_bad(self): """RdbParser should complain or skip bad records w/ constructor""" def dnastrict(x, **kwargs): try: return DnaSequence(x, **kwargs) except Exception: raise RecordError("Could not convert sequence") self.assertRaises(RecordError, list, RdbParser(self.oneX, dnastrict)) f = list(RdbParser(self.oneX, dnastrict, strict=False)) self.assertEqual(len(f), 2) a, b = f self.assertEqual(a, "ACT") self.assertEqual(a.info, Info({"Species": "mit", "OriginalSeq": "ACT"})) self.assertEqual(b, "AAA") self.assertEqual(b.info, Info({"Species": "pla", "OriginalSeq": "AAA"}))
def test_empty(self): """RdbParser should return empty list from 'file' w/o labels""" self.assertEqual(list(RdbParser(self.empty)), []) self.assertEqual(list(RdbParser(self.nolabels, strict=False)), []) self.assertRaises(RecordError, list, RdbParser(self.nolabels))