def test_init_empty(self): """LOC empty init should succeed with expected defaults""" l = LineOrientedConstructor() self.assertEqual(l.Lines, []) self.assertEqual(l.LabelSplitter(' ab cd '), ['ab', 'cd']) self.assertEqual(l.FieldMap, {}) self.assertEqual(l.Constructor, MappedRecord) self.assertEqual(l.Strict, False)
def test_empty_LOC(self): """LOC empty should fail if strict, fill fields if not strict""" data = ["abc def", "3 n", "\t abc \txyz\n\n", "fgh "] l = LineOrientedConstructor() result = l() self.assertEqual(result, {}) result = l([]) self.assertEqual(result, {}) result = l([' ', '\n\t ']) self.assertEqual(result, {}) result = l(data) self.assertEqual(result, {'abc': 'xyz', '3': 'n', 'fgh': None})
def test_full_LOC(self): """LOC should behave as expected when initialized with rich data""" data = [ "abc\t def", " 3 \t n", " abc \txyz\n\n", "x\t5", "fgh ", "x\t3 " ] class rec(MappedRecord): Required = {'abc': []} maps = {'abc': list_adder, 'x': int_setter, 'fgh': bool_setter} label_splitter = DelimitedSplitter('\t') constructor = rec strict = True loc_bad = LineOrientedConstructor(data, label_splitter, maps, \ constructor, strict) self.assertRaises(FieldError, loc_bad) strict = False loc_good = LineOrientedConstructor(data, label_splitter, maps, \ constructor, strict) result = loc_good() assert isinstance(result, rec) self.assertEqual(result, \ {'abc':['def','xyz'], '3':'n','fgh':False,'x':3})
def _sts_adder(obj, field, val): """Appends the current STS-type record to specified field""" list_adder(obj, field, _read_sts(val)) def _seq_adder(obj, field, val): """Appends the current Sequence-type record to specified field""" list_adder(obj, field, _read_seq(val)) def _protsim_adder(obj, field, val): """Appends the current ProtSim record to specified field""" list_adder(obj, field, _read_protsim(val)) LinesToUniGene = LineOrientedConstructor() LinesToUniGene.Constructor = UniGene LinesToUniGene.FieldMap = { 'LOCUSLINK': int_setter, 'EXPRESS': _expressions_setter, 'PROTSIM': _protsim_adder, 'SCOUNT': int_setter, 'SEQUENCE': _seq_adder, 'STS': _sts_adder, } def UniGeneParser(lines): """Treats lines as a stream of unigene records""" for record in GbFinder(lines): curr = LinesToUniGene(record)
"""Sets specified field to a list of expressions""" setattr(obj, field, semi_splitter(val)) def _sts_adder(obj, field, val): """Appends the current STS-type record to specified field""" list_adder(obj, field, _read_sts(val)) def _seq_adder(obj, field, val): """Appends the current Sequence-type record to specified field""" list_adder(obj, field, _read_seq(val)) def _protsim_adder(obj, field, val): """Appends the current ProtSim record to specified field""" list_adder(obj, field, _read_protsim(val)) LinesToUniGene = LineOrientedConstructor() LinesToUniGene.Constructor = UniGene LinesToUniGene.FieldMap = { 'LOCUSLINK':int_setter, 'EXPRESS':_expressions_setter, 'PROTSIM':_protsim_adder, 'SCOUNT':int_setter, 'SEQUENCE':_seq_adder, 'STS':_sts_adder, } def UniGeneParser(lines): """Treats lines as a stream of unigene records""" for record in GbFinder(lines): curr = LinesToUniGene(record) del curr['//'] #clean up delimiter
_ll_fieldmap['GO'] = _go_adder _ll_fieldmap['EXTANNOT'] = _extannot_adder _ll_fieldmap['MAP'] = _map_adder _ll_fieldmap['CDD'] = _cdd_adder _ll_fieldmap['ASSEMBLY'] = _assembly_adder _ll_fieldmap['CONTIG'] = _contig_adder for field in 'NG ACCNUM'.split(): _ll_fieldmap[field] = _accnum_adder for field in 'NR NM NC NP XG XR XM XP PROT'.split(): _ll_fieldmap[field] = _accession_adder for field in _ll_multi: if field not in _ll_fieldmap: _ll_fieldmap[field] = _generic_adder LinesToLocusLink = LineOrientedConstructor() LinesToLocusLink.Constructor = LocusLink LinesToLocusLink.FieldMap = _ll_fieldmap LinesToLocusLink.LabelSplitter = first_colon def LocusLinkParser(lines): """Treats lines as a stream of unigene records""" for record in LLFinder(lines): curr = LinesToLocusLink(record) yield curr if __name__ == '__main__': from sys import argv, stdout filename = argv[1] count = 0 for record in LocusLinkParser(open(filename)):