def test_full_LOC(self): """LOC should behave as expected when initialized with rich data""" data = [ "abc\t def", " 3 \t n", " abc \txyz\n\n", "x\t5", "fgh ", "x\t3 " ] class rec(MappedRecord): Required = {'abc': []} maps = {'abc': list_adder, 'x': int_setter, 'fgh': bool_setter} label_splitter = DelimitedSplitter('\t') constructor = rec strict = True loc_bad = LineOrientedConstructor(data, label_splitter, maps, constructor, strict) self.assertRaises(FieldError, loc_bad) strict = False loc_good = LineOrientedConstructor(data, label_splitter, maps, constructor, strict) result = loc_good() assert isinstance(result, rec) self.assertEqual(result, { 'abc': ['def', 'xyz'], '3': 'n', 'fgh': False, 'x': 3 })
def test_parsers(self): """DelimitedSplitter should return function with correct behavior""" empty = DelimitedSplitter() space = DelimitedSplitter(None) semicolon = DelimitedSplitter(';') twosplits = DelimitedSplitter(';', 2) allsplits = DelimitedSplitter(';', None) lastone = DelimitedSplitter(';', -1) lasttwo = DelimitedSplitter(';', -2) self.assertEqual(empty('a b c'), ['a', 'b c']) self.assertEqual(empty('abc'), ['abc']) self.assertEqual(empty(' '), []) self.assertEqual(empty('a b c'), space('a b c')) self.assertEqual(semicolon(' a ; b ; c d'), ['a', 'b ; c d']) self.assertEqual(twosplits(' a ; b ; c d'), ['a', 'b', 'c d']) self.assertEqual(allsplits(' a ; b ; c;;d;e ;'), ['a', 'b', 'c', '', 'd', 'e', '']) self.assertEqual(lastone(' a ; b ; c;;d;e ;'), ['a ; b ; c;;d;e', '']) self.assertEqual(lasttwo(' a ; b ; c;;d;e ;'), ['a ; b ; c;;d', 'e', '']) self.assertEqual(lasttwo(''), []) self.assertEqual(lasttwo('x'), ['x']) self.assertEqual(lasttwo('x;'), ['x', ''])
def test_label_line_parser(self): last_space = DelimitedSplitter(None, -1) self.assertEquals( _label_line_parser(StringIO('abc\tucag'), last_space), ({ "abc": ["ucag"] }, ['abc'])) with self.assertRaises(ClustalFormatError): _label_line_parser(StringIO('abctucag'), last_space)
def test_splitter(self): """StrictFieldWrapper with splitter should use that splitter""" fields = ['label', 'count'] splitter = DelimitedSplitter(':', -1) f = StrictFieldWrapper(fields, splitter) self.assertEqual(f('n:k:n:a:sd '), { 'label': 'n:k:n:a', 'count': 'sd' }) self.assertEqual(f('nknasd:'), {'label': 'nknasd', 'count': ''}) self.assertRaises(FieldError, f, '')
else: result[key] = [val] labels.append(key) return result, labels def _is_clustal_seq_line(line): """Returns True if line starts with a non-blank character but not 'CLUSTAL' Useful for filtering other lines out of the file. """ return line and (not line[0].isspace()) and\ (not line.startswith('CLUSTAL')) and (not line.startswith('MUSCLE')) last_space = DelimitedSplitter(None, -1) def _delete_trailing_number(line): """Deletes trailing number from a line. WARNING: does not preserve internal whitespace when a number is removed! (converts each whitespace run to a single space). Returns the original line if it didn't end in a number. """ pieces = line.split() try: int(pieces[-1]) return ' '.join(pieces[:-1]) except ValueError: # no trailing numbers return line