def test_single_constructor(self): """RdbParser should use constructors if supplied""" to_dna = lambda x, Info: DnaSequence(str(x).replace('U','T'), \ Info=Info) f = list(RdbParser(self.oneseq, to_dna)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, 'AGTCATCTAGATHCATHC') self.assertEqual(a.Info, Info({'Species':'H.Sapiens',\ 'OriginalSeq':'AGUCAUCUAGAUHCAUHC'})) def alternativeConstr(header_lines): info = Info() for line in header_lines: all = line.strip().split(':', 1) #strip out empty lines, lines without name, lines without colon if not all[0] or len(all) != 2: continue name = all[0].upper() value = all[1].strip().upper() info[name] = value return info f = list(RdbParser(self.oneseq, to_dna, alternativeConstr)) self.assertEqual(len(f), 1) a = f[0] self.assertEqual(a, 'AGTCATCTAGATHCATHC') exp_info = Info({'OriginalSeq':'AGUCAUCUAGAUHCAUHC',\ 'Refs':{}, 'SEQ':'H.SAPIENS'}) self.assertEqual(a.Info, Info({'OriginalSeq':'AGUCAUCUAGAUHCAUHC',\ 'Refs':{}, 'SEQ':'H.SAPIENS'}))
def test_only_sequences(self): """RdbParser should return empty list form file w/o lables""" #should fail if strict (the default) self.assertRaises(RecordError, list, RdbParser(self.nolabels, strict=True)) #if not strict, should skip the records self.assertEqual(list(RdbParser(self.nolabels, strict=False)), [])
def test_single(self): """RdbParser should read single record as (header,seq) tuple""" res = list(RdbParser(self.oneseq)) self.assertEqual(len(res), 1) first = res[0] self.assertEqual(first, Sequence('AGUCAUCUAGAUHCAUHC')) self.assertEqual(first.Info, Info({'Species':'H.Sapiens',\ 'OriginalSeq':'AGUCAUCUAGAUHCAUHC'})) res = list(RdbParser(self.multiline)) self.assertEqual(len(res), 1) first = res[0] self.assertEqual(first, Sequence('AGUCAUUAGAUHCAUHC')) self.assertEqual(first.Info, Info({'Species':'H.Sapiens',\ 'OriginalSeq':'AGUCAUUAGAUHCAUHC'}))
def test_full(self): """RdbParser: full data, valid and invalid""" # when only good record, should work independent of strict r1 = RnaSequence("-??GG-UGAA--CGCU---ACGU-N???---",\ Info=Info({'Species': "unidentified Thermus OPB AF027020",\ 'Refs':{'rRNA':['AF027020']},\ 'OriginalSeq':'-o[oGG-U{G}AA--C^GC]U---ACGU-Nooo---'})) r2 = RnaSequence("---CGAUCG--UAUACG-N???-",\ Info=Info({'Species':'Thermus silvanus X84211',\ 'Refs':{'rRNA':['X84211']},\ 'OriginalSeq':'---CGAU[C(G){--UA}U]ACG-Nooo-'})) obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split('\n'), strict=True)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].Info, r1.Info) self.assertEqual(obs[1], r2) self.assertEqual(str(obs[1]), str(r2)) self.assertEqual(obs[1].Info, r2.Info) obs = list(RdbParser(RDB_LINES_ONLY_GOOD.split('\n'), strict=False)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].Info, r1.Info) # when strict, should raise error on invalid record f = RdbParser(RDB_LINES_GOOD_BAD.split('\n'), strict=True) self.assertRaises(RecordError, list, f) # when not strict, malicious record is skipped obs = list(RdbParser(RDB_LINES_GOOD_BAD.split('\n'), strict=False)) self.assertEqual(len(obs), 2) self.assertEqual(obs[0], r1) self.assertEqual(str(obs[0]), str(r1)) self.assertEqual(obs[0].Info, r1.Info) self.assertEqual(obs[1], r2) self.assertEqual(str(obs[1]), str(r2)) self.assertEqual(obs[1].Info, r2.Info)
def test_multiple_constructor_bad(self): """RdbParser should complain or skip bad records w/ constructor""" def dnastrict(x, **kwargs): try: return DnaSequence(x, **kwargs) except Exception: raise RecordError, "Could not convert sequence" self.assertRaises(RecordError, list, RdbParser(self.oneX, dnastrict)) f = list(RdbParser(self.oneX, dnastrict, strict=False)) self.assertEqual(len(f), 2) a, b = f self.assertEqual(a, 'ACT') self.assertEqual(a.Info, Info({ 'Species': 'mit', 'OriginalSeq': 'ACT' })) self.assertEqual(b, 'AAA') self.assertEqual(b.Info, Info({ 'Species': 'pla', 'OriginalSeq': 'AAA' }))
def test_empty(self): """RdbParser should return empty list from 'file' w/o labels""" self.assertEqual(list(RdbParser(self.empty)), []) self.assertEqual(list(RdbParser(self.nolabels, strict=False)), []) self.assertRaises(RecordError, list, RdbParser(self.nolabels))