def test_make_iterparsers2(): exp = Martel.HeaderFooter("dataset", {}, Martel.Group("header", Martel.Re(r"(a*\R)*")), RecordReader.Until, ("b",), Martel.Group("record", Martel.Re(r"(b*\R)*")), RecordReader.Until, ("c",), Martel.Group("footer", Martel.Re(r"(c*\R)*")), RecordReader.Everything, (),) iterator = exp.make_iterator("record") assert isinstance(iterator, IterParser.IterHeaderFooter), iterator lines = ["a" "aa", "aaaaaaa", "b", "bb", "bbbb", "bbbbbbbb", "bbbbbbbbbbbbbbbb", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "cccc", "cc", "c", ] text = "\n".join(lines) + "\n" i = 0 for rec in iterator.iterateString(text): i = i + 1 assert i == 1, i
def test_header_footer7(): # header and footer but with no record data s = """\ This is some misc. header text that goes on until the end. FOOTER """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ), header.tagtable, RecordReader.EndsWith, ("//", ), record.tagtable, RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 0, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 0, count.count
def test_header_footer_parser(): # Check that I can pass same tag names in the header, record and # footer but not have them collide. header_format = Martel.Re("(?P<term?pos=header>a+)\R") record_format = Martel.Re("(?P<term?pos=body>b+)\R") footer_format = Martel.Re("(?P<term?pos=footer>c+)\R") format = Martel.HeaderFooter( "all", {"state": "New Mexico"}, header_format, RecordReader.CountLines, (1, ), record_format, RecordReader.CountLines, (1, ), footer_format, RecordReader.CountLines, (1, ), ) parser = format.make_parser() grab = GrabElements() parser.setContentHandler(grab) parser.parseString("a\nbb\nbb\nccc\n") elements = grab.elements assert len(elements) == 5, len(elements) check_element(elements[0], ("all", {"state": "New Mexico"})) check_element(elements[1], ("term", {"pos": "header"})) check_element(elements[2], ("term", {"pos": "body"})) check_element(elements[3], ("term", {"pos": "body"})) check_element(elements[4], ("term", {"pos": "footer"}))
def test_hf4(): ip = IterParser.IterHeaderFooter( Martel.Re(r"a*\R").make_parser(), RecordReader.CountLines, (1,), Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(), RecordReader.CountLines, (2,), None, None, None, "spam") lines = ["aaaaaaaaa", "b", "c", "bb", "cc", "bbb", "ccc", ] text = "\n".join(lines) + "\n" i = 1 for x in ip.iterateString(text): assert x["spam"][0] == "b" * i + "\n" + "c" * i + "\n" i = i + 1
def test_header_footer6(): # Make sure I can skip records when there are footer records s = """ This is some misc. header text that goes on until the end. ID 1 This is some data // ID A This is some more data // ID 3 This is again some more data // ID Q This blah // ID W QWE // ID 987 To be // ID 897 Or not to be // FOOTER """ header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.EndsWith, ("//", ), record.tagtable, RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, (0, 1, {})) count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == 3, err.error_count assert err.fatal_error_count == 0, err.fatal_error_count assert count.count == 4, count.count
def test_header_footer1(): s = """\ header XX record 1 // record 2 // record 3 // footer """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><header>header XX </header><record>record 1 // </record><record>record 2 // </record><record>record 3 // </record><footer>footer </footer></hf>""" debug_level = 1 # Don't use regexps like these in your code - for testing only! header = Martel.Group("header", Martel.Re(r"header(.|\n)*")) record = Martel.Group("record", Martel.Re(r"rec(.|\n)*")) footer = Martel.Group("footer", Martel.Re(r"footer(.|\n)*")) header = header.make_parser(debug_level=debug_level) record = record.make_parser(debug_level=debug_level) footer = footer.make_parser(debug_level=debug_level) hf = Parser.HeaderFooterParser("hf", {}, RecordReader.EndsWith, ("XX\n", ), header.tagtable, RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.StartsWith, ("f", ), footer.tagtable, (0, debug_level, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) result = outfile.getvalue() assert result == gold, (result, gold)
def gen_iterator(): return IterParser.IterHeaderFooter( Martel.Re(r"a*\R").make_parser(), RecordReader.CountLines, (1,), Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1), RecordReader.CountLines, (2,), Martel.Re(r"d*\R").make_parser(), RecordReader.CountLines, (1,), "spam")
def test_missing_end2(): # Same as the test_missing_end1 but using HeaderFooter exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Group("record", Martel.Re(r"(b*\R)*a\R")), RecordReader.EndsWith, ("a",), None, None, None ) lines = [ "bbb", "bb", "a", "b", "a", "a", ] text = "\n".join(lines) + "\n" iterator = exp.make_iterator("record") # This should work for x in iterator.iterateString(text): pass # This should not lines.append("c") text = "\n".join(lines) + "\n" try: for x in iterator.iterateString(text): pass raise AssertionError except Parser.ParserPositionException, exc: assert exc.pos == 15, exc.pos
def test_multi(): ele = get_element(Martel.Re("(?P<qwe?def=%7Edalke&a=1&b=2&cc=33>X)")) check_element(ele, ("qwe", { "def": "~dalke", "a": "1", "b": "2", "cc": "33" }))
def test_same_tag(): format = Martel.Re("(?P<char?type=a>a+)(?P<char?type=b>b+)") parser = format.make_parser() grab = GrabElements() parser.setContentHandler(grab) parser.parseString("aaabb") assert len(grab.elements) == 2, len(grab.elements) check_element(grab.elements[0], ("char", {"type": "a"})) check_element(grab.elements[1], ("char", {"type": "b"}))
def test_header_footer3(): # Have a footer but no header s = """\ ID 1 This is some data // ID 2 This is some more data // Okay, that was all of the data. """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><record>ID 1 This is some data // </record><record>ID 2 This is some more data // </record><footer>Okay, that was all of the data. </footer></hf>""" # Don't use a regexp like this in your code - for testing only! record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) # Require at least 5 characters (just to be safe) footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*")) record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Nothing, (), (), RecordReader.EndsWith, ("//\n", ), record.tagtable, RecordReader.Everything, (), footer.tagtable, (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def test_header_footer2(): # Have a header but no footer s = """ This is some misc. header text that goes on until the end. ID 1 This is some data ID 2 This is some more data """ gold = """\ <?xml version="1.0" encoding="iso-8859-1"?> <hf><header> This is some misc. header text that goes on until the end. </header><record>ID 1 This is some data </record><record>ID 2 This is some more data </record></hf>""" # Don't use a regexp like this in your code - for testing only! header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) header = header.make_parser() record = record.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ), header.tagtable, RecordReader.StartsWith, ("ID", ), record.tagtable, RecordReader.Nothing, (), (), (0, 1, {})) outfile = StringIO() hf.setContentHandler(saxutils.XMLGenerator(outfile)) hf.setErrorHandler(handler.ErrorHandler()) hf.parseFile(StringIO(s)) text = outfile.getvalue() assert text == gold, (text, gold)
def test_header_footer5(): exp = Martel.HeaderFooter( "dataset", {}, None, None, None, Martel.Re("a(?P<b>b*)a\R"), RecordReader.CountLines, (1, ), Martel.Re("end\R"), RecordReader.CountLines, (1, ), ) lines = ["aa", "aba", "abba", "END"] text = "\n".join(lines) + "\n" try: for info in exp.make_iterator("b").iterateString(text): assert len(info["b"]) == 1 except Parser.ParserPositionException, exc: assert exc.pos == 12, exc.pos
def test_filter(): # 8 stretches of "a"s # 10 stretches of "b"s # 4 stretches of "c"s data = "ababcbaaaababbbabccbaabcabcba" format = Martel.Re("((?P<a>a+)|(?P<b>b+)|(?P<c>c+))+") parser = format.make_parser() lax = LAX.LAX(["b", "c"]) parser.setContentHandler(lax) parser.parseString(data) assert lax.has_key("a") == 0 assert len(lax["b"]) == 10 assert len(lax["c"]) == 4
def test_header_footer2(): exp = Martel.HeaderFooter( "dataset", {}, None, None, None, Martel.Re("a(?P<b>b*)a\R"), RecordReader.CountLines, (1, ), Martel.Re("end\R"), RecordReader.CountLines, (1, ), ) lines = ["aa", "aba", "abba", "end"] text = "\n".join(lines) + "\n" i = 0 for info in exp.make_iterator("b").iterateString(text): assert len(info["b"]) == 1 assert len(info["b"][0]) == i, (info["b"][0], i) i = i + 1 assert i == 3, i
def test_header_footer4(): # Test that the errors are correct exp = Martel.HeaderFooter( "dataset", {}, Martel.Re("header\R"), RecordReader.CountLines, (1, ), Martel.Re("a(?P<b>b*)a\R"), RecordReader.CountLines, (1, ), Martel.Re("end\R"), RecordReader.CountLines, (1, ), ) lines = ["HEADER", "aa", "aba", "abba", "end"] text = "\n".join(lines) + "\n" try: for info in exp.make_iterator("b").iterateString(text): pass except Parser.ParserPositionException, exc: assert exc.pos == 0
def test4(): # Make sure the default returns LAX items exp = Martel.Re("(?P<term>(?P<a>a+)(?P<b>b+))+") x = exp.make_iterator("term").iterateString("aabbabaaaabb") term = x.next() assert len(term["a"]) == 1 and term["a"][0] == "aa", term["a"] assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"] term = x.next() assert len(term["a"]) == 1 and term["a"][0] == "a", term["a"] assert len(term["b"]) == 1 and term["b"][0] == "b", term["b"] term = x.next() assert len(term["a"]) == 1 and term["a"][0] == "aaaa", term["a"] assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"] term = x.next() assert term is None, "Did not stop correctly"
def test_missing_end3(): # This one is missing the footer exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Group("record", Martel.Re(r"(b*\R)*a\R")), RecordReader.EndsWith, ("a",), Martel.Group("footer", Martel.Re(r"c\R")), RecordReader.CountLines, (1,) ) lines = [ "bbb", "bb", "a", "b", "a", "a", "c", # This will be removed for the test ] text = "\n".join(lines) + "\n" iterator = exp.make_iterator("record") # This should work for x in iterator.iterateString(text): pass # This should not lines.pop() text = "\n".join(lines) + "\n" try: for x in iterator.iterateString(text): pass raise AssertionError except Parser.ParserPositionException, exc: assert exc.pos == 15, exc.pos
def test_make_iterparsers1(): exp = Martel.ParseRecords("dataset", {}, Martel.Group("spam", Martel.Re(r"a*\R")), RecordReader.CountLines, (1,)) iterator = exp.make_iterator("spam") assert isinstance(iterator, IterParser.IterRecords) lines = [] for i in range(0, 10): lines.append("a" * i + "\n") text = "".join(lines) i = 0 for rec in iterator.iterateString(text): assert len(rec["spam"][0][:-1]) == i, (i, rec["spam"][0]) i = i + 1 assert i == 10
def test_header_footer7(): exp = Martel.HeaderFooter("dataset", {}, None, None, None, Martel.Re("a(?P<b>b*)a\R"), RecordReader.CountLines, (1, ), None, None, None) lines = [ "aa", "aBbbba", "abba", ] text = "\n".join(lines) + "\n" try: for info in exp.make_iterator("b").iterateString(text): pass except Parser.ParserPositionException, exc: assert exc.pos == 4, exc.pos
def test_record_parser(): record = Martel.Group("A", Martel.Str("X\n") + Martel.Re("a*\n")) p = record.make_parser() parser = Parser.RecordParser("blah", {}, p.tagtable, (0, 1, {}), RecordReader.StartsWith, ("X", )) err = CountErrors() parser.setErrorHandler(err) count = CountRecords("A") parser.setContentHandler(count) parser.parseString("X\na\nX\nb\nX\naaa\nX\naaaa\nX\nq\nX\na\n") assert err.fatal_error_count == 0, err.fatal_error_count assert err.error_count == 2, err.error_count assert count.count == 4, count.count
def test_record_parser(): format = Martel.Re("(?P<term?field=first>...)" "(?P<term?field=second>...)" "(?P<last>.)\R") format = Martel.ParseRecords("all", {"author": "guido"}, format, RecordReader.CountLines, (1, )) parser = format.make_parser() grab = GrabElements() parser.setContentHandler(grab) parser.parseString("aaabbbZ\ncccdddZ\n") elements = grab.elements assert len(elements) == 7 check_element(elements[0], ("all", {"author": "guido"})) check_element(elements[1], ("term", {"field": "first"})) check_element(elements[2], ("term", {"field": "second"})) check_element(elements[3], ("last", {})) check_element(elements[4], ("term", {"field": "first"})) check_element(elements[5], ("term", {"field": "second"})) check_element(elements[6], ("last", {}))
def test_ri3(): # error in the second record ip = IterParser.IterRecords( Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1), RecordReader.CountLines, (2,), "spam") lines = ["b", "c", "b-", "cc", "bbb", "ccc", ] text = "\n".join(lines) + "\n" try: for x in ip.iterateString(text): pass except Parser.ParserPositionException, exc: assert exc.pos == 5, exc.pos
def test_escape(): name, attrs = get_element(Martel.Re("(?P<qwe?a=%7E>X)")) check_dicts({"a": "~"}, attrs) name, attrs = get_element(Martel.Re("(?P<qwe?a=%7e>X)")) check_dicts({"a": "~"}, attrs) name, attrs = get_element(Martel.Re("(?P<qwe?a=>X)")) check_dicts({"a": ""}, attrs) name, attrs = get_element(Martel.Re("(?P<qwe?>X)")) check_dicts({}, attrs) name, attrs = get_element(Martel.Re("(?P<qwe?a=%48%65%6c%6c%6f>X)")) check_dicts({"a": "Hello"}, attrs) name, attrs = get_element(Martel.Re("(?P<qwe?a=%7e%7E&b=%7e%7E>X)")) check_dicts({"a": "~~", "b": "~~"}, attrs) name, attrs = get_element(Martel.Re("(?P<qwe?a=%7e%7E&b=>X)")) check_dicts({"a": "~~", "b": ""}, attrs)
def test5(): # Does 'iter' work? try: iter except NameError: print "Test skipped - missing 'iter' builtin from Python 2.2." return exp = Martel.Re("(?P<term>(?P<a>a+)(?P<b>b+))+") x = exp.make_iterator("term") it = iter(x.iterateString("aabbabaaaabb")) term = it.next() assert len(term["a"]) == 1 and term["a"][0] == "aa", term["a"] assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"] term = it.next() assert len(term["a"]) == 1 and term["a"][0] == "a", term["a"] assert len(term["b"]) == 1 and term["b"][0] == "b", term["b"] term = it.next() assert len(term["a"]) == 1 and term["a"][0] == "aaaa", term["a"] assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"] try: it.next() raise AssertionError("Did not stop correctly") except StopIteration: pass
"""Martel definitions for the output files produced by primer3. """ import Martel any_space = Martel.Re("[ ]+") blank_line = Martel.AnyEol() comment_line = Martel.Str("#") + Martel.ToEol() # comments and blank lines in the file comments = Martel.Group("comments", blank_line + comment_line + blank_line + comment_line) # 1 PRODUCT SIZE: 289 product_size = Martel.Group("product_size", Martel.Re("[\d]+")) start_primer = Martel.Group( "start_primer", any_space + Martel.Re("[\d]+") + Martel.Str(" PRODUCT SIZE: ")) primer_start_line = Martel.Group("primer_start_line", start_primer + product_size + Martel.AnyEol()) # a blank line that signifies a new primer is coming up single_primer_line = Martel.Group("single_primer_line", blank_line) # FORWARD PRIMER 1725 20 59.96 55.00 AGGGAAGGGATGCTAGGTGT primer_space = Martel.Str(" " * 5) any_integer = Martel.Re("[\d]+") any_float = Martel.Re("[\d\.]+") sequence = Martel.Re("[GATCN]+")
""" import Martel from Martel import RecordReader def Simple(tag, tag_data): return Martel.Group( tag, Martel.Str(tag + " ") + Martel.Group(tag_data, Martel.Re("[^\R]*")) + Martel.AnyEol()) #--- ID ID = Martel.Group("ID", Martel.Re( r"ID (?P<entry_name>\w+) +(?P<data_class_table>\w+); +" \ r"(?P<molecule_type>\w+); +(?P<sequence_length>\d+) AA\.\R" )) #--- AC AC = Martel.Group( "AC", Martel.Re(r"AC (?P<ac_number>\w+);( (?P<ac_number>\w+);)*\R")) AC_block = Martel.Group("AC_block", Martel.Rep1(AC)) #--- DT DT_created = Martel.Group("DT_created", Martel.Re( r"DT (?P<day>\d\d)-(?P<month>...)-(?P<year>\d{4}) \(Rel. "\ r"(?P<release>\d\d), Created\)\R" )) DT_seq_update = Martel.Group("DT_seq_update", Martel.Re(
def Simple(tag, tag_data): return Martel.Group( tag, Martel.Str(tag + " ") + Martel.Group(tag_data, Martel.Re("[^\R]*")) + Martel.AnyEol())
def test(): class _Test(handler.ContentHandler, handler.ErrorHandler): def __init__(self): handler.ContentHandler.__init__(self) self.good_parse = 0 def startDocument(self): self.good_parse = 1 def fatalError(self, exc): if isinstance(exc, Parser.ParserPositionException): # Called when there aren't enough characters self.good_parse = 0 def error(self, exc): # shouldn't be called with these parsers raise exc cb = _Test() patterns = ( ("a", ("a",), ("A", "", "Z")), ("[a-z]", ("a", "b", "q"), ("A", "-")), ("[^abc]", ("A", "d", "f"), ("a", "b", "c", "ab", "")), ("a+", ("a", "aaa"), ("A", "baa")), ("a*", ("", "a", "aaa"), ()), ("\\]", ("]",), ("a",)), ("a*$", ("a", "aaa"), ("A", "baa", "aaaaab")), ("(ab|ac)", ("ab", "ac"), ("aa", "A", "a", "cb")), ("(ab|ac)*$", ("", "ab", "ac", "abacabac", "ababab"), ("aa", "A", "a", "cb", "ababababaca")), ("ab{3}$", ("abbb",), ("abb", "bbb", "abbbb")), ("ab{3,}$", ("abbb", "abbbb", "abbbbbbbbb"), ("abb", "bbb", "abbbc")), ("ab{3,}", ("abbb", "abbbb", "abbbbbbbbb"), ("abb", "bbb")), ("abc$|abcd|bc|d", ("abc", "abcd", "bc", "d"), ("xabc", "ab", "a", "", "abce")), ("^a.*", ("a", "aa"), ("b", "ba", "c", "")), ("^[^b]+", ("a", "aa", "c"), ("b", "ba", "")), ("a(?!b).b?", ("aa", "ac", "aab"), ("a", "ab", "abc")), ("a(?=[bc])..", ("abx", "acx", "aba"), ("ac", "ab", "adb")), ("ab?[bc]?", ("a", "ab", "abb", "ac"), ("", "cab", "x")), ("ab{2,4}c?", ("abb", "abbb", "abbbb", "abbbbc"), ("ab", "abc", "xabbb")), ("ab{2,4}$", ("abb", "abbb", "abbbb"), ("ab", "abc", "xabbb", "abbbbc", "abbbbbb")), ("ab{2,4}cd?", ("abbc", "abbbc", "abbbbc", "abbbbcd"), ("abc", "abbbbbc", "abcbbc")), ("ab?c", ("ac", "abc"), ("abb", "abbc", "abbbc")), (r"\R", ("\n", "\r", "\r\n"), (" ", "\r\r", "\n\n", "\r\n ")), (r"a\Rb\R", ("a\nb\n", "a\rb\r", "a\r\nb\r\n", "a\rb\r\n"), ("ab", "a", "a\n\nb\n", "a\nb", "a\r\nb")), (r"ID [^\R]+\R", ("ID A123\n", "ID A123\r", "ID A123\r\n"), ("ID A123\n\n", "ID A123\r\r", "ID A123", "ID \n")), # named group backreference (r"(?P<name>A+)B(?P=name)A", ("ABAA", "AABAAA", "AAABAAAA"), ("ABA", "AB", "ABAAA", "AABA", "AABAA", "AABAAAA")), # named group backreference which can be empty (r"(?P<name>A*)B(?P=name)A", ("BA", "ABAA", "AABAAA", "AAABAAAA"), ("BAA", "ABA", "AB", "ABAAA", "AABA", "AABAA", "AABAAAA")), ) for re_pat, good_list, bad_list in patterns: tree = Martel.Re(re_pat) exp = tree.make_parser() exp.setContentHandler(cb) exp.setErrorHandler(cb) if string.find(re_pat, r"\R") == -1: # \R is a Martel-specific flag pat = re.compile(re_pat) else: pat = None pat2 = re.compile(str(tree)) for word in good_list: exp.parseString(word) if pat is not None: m = pat.match(word) assert m, "Re problem recognizing " + repr(word) assert m.end() == len(word), "Did not parse all of %s: %d" % \ (repr(word), m.end()) m = pat2.match(word) assert m, "created Re problem recognizing " + repr(word) assert m.end() == len(word), "Did not parse all of created %s: %d"\ % (repr(word), m.end()) assert cb.good_parse, "Problem not recognizing %s with %s" % \ (repr(word), repr(re_pat)) for word in bad_list: exp.parseString(word) if pat is not None: m = pat.match(word) assert not m or m.end() != len(word), \ "Re should not recognize " + repr(word) m = pat2.match(word) assert not m or m.end() != len(word), \ "created Re should not recognize " + repr(word) assert not cb.good_parse, \ "Should not recognize %s\ntagtable is %s" % \ (repr(word), repr(exp.tagtable))
def test_header_footer8(): # header, record and footer, but with extra data s1 = """Two lines in the header. Data 1 Data 2 Data Q Data 4 FOOTER Abc FOOTER B """ s2 = """Two lines in the header. Data 1 Data 2 Data Q Data 4 FOOTER Abc """ s3 = """Two lines in the header. Data 1 Data 4 FOOTER Abc """ s4 = """Two lines in the header. Data Q FOOTER Abc """ s5 = """Two lines in the header. FOOTER Abc """ dataset = ( (s1, 3, 1, 1), (s2, 3, 1, 0), (s3, 2, 0, 0), (s4, 0, 1, 0), (s5, 0, 0, 0), ) header = Martel.Group("header", Martel.Re(r"(.|\n)*")) record = Martel.Group("record", Martel.Re(r"Data \d+\n")) footer = Martel.Group("footer", Martel.Re("FOOTER \w+\n")) header = header.make_parser() record = record.make_parser() footer = footer.make_parser() hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ), header.tagtable, RecordReader.CountLines, (1, ), record.tagtable, RecordReader.CountLines, (1, ), footer.tagtable, (0, 1, {})) for s, rec_count, err_count, fatal_count in dataset: count = CountRecords("record") hf.setContentHandler(count) err = CountErrors() hf.setErrorHandler(err) hf.parseFile(StringIO(s)) assert err.error_count == err_count, (s, err.error_count, err_count) assert err.fatal_error_count == fatal_count, \ (s, err.fatal_error_count, fatal_count) assert count.count == rec_count, (s, count.count, rec_count)