Exemple #1
0
def test_make_iterparsers2():
    exp = Martel.HeaderFooter("dataset", {},
                              Martel.Group("header", Martel.Re(r"(a*\R)*")),
                              RecordReader.Until, ("b",),
                              
                              Martel.Group("record", Martel.Re(r"(b*\R)*")),
                              RecordReader.Until, ("c",),
                              
                              Martel.Group("footer", Martel.Re(r"(c*\R)*")),
                              RecordReader.Everything, (),)
    
    iterator = exp.make_iterator("record")
    assert isinstance(iterator, IterParser.IterHeaderFooter), iterator
    lines = ["a"
             "aa",
             "aaaaaaa",
             "b",
             "bb",
             "bbbb",
             "bbbbbbbb",
             "bbbbbbbbbbbbbbbb",
             "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
             "cccc",
             "cc",
             "c",
             ]
    
    text = "\n".join(lines) + "\n"

    i = 0
    for rec in iterator.iterateString(text):
        i = i + 1
    assert i == 1, i
Exemple #2
0
def test_header_footer7():
    # header and footer but with no record data
    s = """\
This is some misc. header text
that goes on until the end.
FOOTER
"""
    header = Martel.Group("header", Martel.Re(r"(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))
    footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*"))

    header = header.make_parser()
    record = record.make_parser()
    footer = footer.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ),
                                   header.tagtable, RecordReader.EndsWith,
                                   ("//", ), record.tagtable,
                                   RecordReader.StartsWith, ("FOOTER", ),
                                   footer.tagtable, (0, 1, {}))
    count = CountRecords("record")
    hf.setContentHandler(count)
    err = CountErrors()
    hf.setErrorHandler(err)
    hf.parseFile(StringIO(s))

    assert err.error_count == 0, err.error_count
    assert err.fatal_error_count == 0, err.fatal_error_count
    assert count.count == 0, count.count
Exemple #3
0
def test_header_footer_parser():
    # Check that I can pass same tag names in the header, record and
    # footer but not have them collide.

    header_format = Martel.Re("(?P<term?pos=header>a+)\R")
    record_format = Martel.Re("(?P<term?pos=body>b+)\R")
    footer_format = Martel.Re("(?P<term?pos=footer>c+)\R")

    format = Martel.HeaderFooter(
        "all",
        {"state": "New Mexico"},
        header_format,
        RecordReader.CountLines,
        (1, ),
        record_format,
        RecordReader.CountLines,
        (1, ),
        footer_format,
        RecordReader.CountLines,
        (1, ),
    )

    parser = format.make_parser()
    grab = GrabElements()
    parser.setContentHandler(grab)
    parser.parseString("a\nbb\nbb\nccc\n")
    elements = grab.elements
    assert len(elements) == 5, len(elements)
    check_element(elements[0], ("all", {"state": "New Mexico"}))
    check_element(elements[1], ("term", {"pos": "header"}))
    check_element(elements[2], ("term", {"pos": "body"}))
    check_element(elements[3], ("term", {"pos": "body"}))
    check_element(elements[4], ("term", {"pos": "footer"}))
Exemple #4
0
def test_hf4():
    ip = IterParser.IterHeaderFooter(
        Martel.Re(r"a*\R").make_parser(),
        RecordReader.CountLines,
        (1,),

        Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(),
        RecordReader.CountLines,
        (2,),

        None,
        None,
        None,

        "spam")

    lines = ["aaaaaaaaa",
             "b",
             "c",
             "bb",
             "cc", 
             "bbb",
             "ccc",
             ]
    text = "\n".join(lines) + "\n"

    i = 1
    for x in ip.iterateString(text):
        assert x["spam"][0] == "b" * i + "\n" + "c" * i + "\n"
        i = i + 1
Exemple #5
0
def test_header_footer6():
    # Make sure I can skip records when there are footer records
    s = """
This is some misc. header text
that goes on until the end.
ID 1
This is some data
//
ID A
This is some more data
//
ID 3
This is again some more data
//
ID Q
This blah
//
ID W
QWE
//
ID 987
To be
//
ID 897
Or not to be
//
FOOTER
"""
    header = Martel.Group("header", Martel.Re(r"(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))
    footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*"))

    header = header.make_parser()
    record = record.make_parser()
    footer = footer.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ),
                                   header.tagtable, RecordReader.EndsWith,
                                   ("//", ), record.tagtable,
                                   RecordReader.StartsWith, ("FOOTER", ),
                                   footer.tagtable, (0, 1, {}))
    count = CountRecords("record")
    hf.setContentHandler(count)
    err = CountErrors()
    hf.setErrorHandler(err)
    hf.parseFile(StringIO(s))

    assert err.error_count == 3, err.error_count
    assert err.fatal_error_count == 0, err.fatal_error_count
    assert count.count == 4, count.count
Exemple #6
0
def test_header_footer1():
    s = """\
header
XX
record 1
//
record 2
//
record 3
//
footer
"""
    gold = """\
<?xml version="1.0" encoding="iso-8859-1"?>
<hf><header>header
XX
</header><record>record 1
//
</record><record>record 2
//
</record><record>record 3
//
</record><footer>footer
</footer></hf>"""

    debug_level = 1

    # Don't use regexps like these in your code - for testing only!
    header = Martel.Group("header", Martel.Re(r"header(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"rec(.|\n)*"))
    footer = Martel.Group("footer", Martel.Re(r"footer(.|\n)*"))

    header = header.make_parser(debug_level=debug_level)
    record = record.make_parser(debug_level=debug_level)
    footer = footer.make_parser(debug_level=debug_level)

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.EndsWith, ("XX\n", ),
                                   header.tagtable, RecordReader.EndsWith,
                                   ("//\n", ), record.tagtable,
                                   RecordReader.StartsWith, ("f", ),
                                   footer.tagtable, (0, debug_level, {}))

    outfile = StringIO()
    hf.setContentHandler(saxutils.XMLGenerator(outfile))
    hf.setErrorHandler(handler.ErrorHandler())
    hf.parseFile(StringIO(s))

    result = outfile.getvalue()
    assert result == gold, (result, gold)
Exemple #7
0
def gen_iterator():
    return IterParser.IterHeaderFooter(
        Martel.Re(r"a*\R").make_parser(),
        RecordReader.CountLines,
        (1,),

        Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1),
        RecordReader.CountLines,
        (2,),

        Martel.Re(r"d*\R").make_parser(),
        RecordReader.CountLines,
        (1,),

        "spam")
Exemple #8
0
def test_missing_end2():
    # Same as the test_missing_end1 but using HeaderFooter
    exp = Martel.HeaderFooter("dataset", {},
                              None, None, None,
                              Martel.Group("record", Martel.Re(r"(b*\R)*a\R")),
                              RecordReader.EndsWith, ("a",),
                              None, None, None
                              )
    lines = [
        "bbb",
        "bb",
        "a",
        "b",
        "a",
        "a",
        ]
    text = "\n".join(lines) + "\n"

    iterator = exp.make_iterator("record")
    # This should work
    for x in iterator.iterateString(text):
        pass

    # This should not
    lines.append("c")
    text = "\n".join(lines) + "\n"
    try:
        for x in iterator.iterateString(text):
            pass
        raise AssertionError
    except Parser.ParserPositionException, exc:
        assert exc.pos == 15, exc.pos
Exemple #9
0
def test_multi():
    ele = get_element(Martel.Re("(?P<qwe?def=%7Edalke&a=1&b=2&cc=33>X)"))
    check_element(ele, ("qwe", {
        "def": "~dalke",
        "a": "1",
        "b": "2",
        "cc": "33"
    }))
Exemple #10
0
def test_same_tag():
    format = Martel.Re("(?P<char?type=a>a+)(?P<char?type=b>b+)")
    parser = format.make_parser()
    grab = GrabElements()
    parser.setContentHandler(grab)
    parser.parseString("aaabb")
    assert len(grab.elements) == 2, len(grab.elements)

    check_element(grab.elements[0], ("char", {"type": "a"}))
    check_element(grab.elements[1], ("char", {"type": "b"}))
Exemple #11
0
def test_header_footer3():
    # Have a footer but no header
    s = """\
ID 1
This is some data
//
ID 2
This is some more data
//
Okay, that was all of the data.
"""
    gold = """\
<?xml version="1.0" encoding="iso-8859-1"?>
<hf><record>ID 1
This is some data
//
</record><record>ID 2
This is some more data
//
</record><footer>Okay, that was all of the data.
</footer></hf>"""

    # Don't use a regexp like this in your code - for testing only!
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))
    # Require at least 5 characters (just to be safe)
    footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*"))

    record = record.make_parser()
    footer = footer.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Nothing, (), (),
                                   RecordReader.EndsWith, ("//\n", ),
                                   record.tagtable, RecordReader.Everything,
                                   (), footer.tagtable, (0, 1, {}))

    outfile = StringIO()
    hf.setContentHandler(saxutils.XMLGenerator(outfile))
    hf.setErrorHandler(handler.ErrorHandler())
    hf.parseFile(StringIO(s))

    text = outfile.getvalue()
    assert text == gold, (text, gold)
Exemple #12
0
def test_header_footer2():
    # Have a header but no footer
    s = """
This is some misc. header text
that goes on until the end.
ID 1
This is some data
ID 2
This is some more data
"""
    gold = """\
<?xml version="1.0" encoding="iso-8859-1"?>
<hf><header>
This is some misc. header text
that goes on until the end.
</header><record>ID 1
This is some data
</record><record>ID 2
This is some more data
</record></hf>"""

    # Don't use a regexp like this in your code - for testing only!
    header = Martel.Group("header", Martel.Re(r"(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))

    header = header.make_parser()
    record = record.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ),
                                   header.tagtable, RecordReader.StartsWith,
                                   ("ID", ), record.tagtable,
                                   RecordReader.Nothing, (), (), (0, 1, {}))

    outfile = StringIO()
    hf.setContentHandler(saxutils.XMLGenerator(outfile))
    hf.setErrorHandler(handler.ErrorHandler())
    hf.parseFile(StringIO(s))

    text = outfile.getvalue()
    assert text == gold, (text, gold)
def test_header_footer5():
    exp = Martel.HeaderFooter(
        "dataset",
        {},
        None,
        None,
        None,
        Martel.Re("a(?P<b>b*)a\R"),
        RecordReader.CountLines,
        (1, ),
        Martel.Re("end\R"),
        RecordReader.CountLines,
        (1, ),
    )
    lines = ["aa", "aba", "abba", "END"]
    text = "\n".join(lines) + "\n"

    try:
        for info in exp.make_iterator("b").iterateString(text):
            assert len(info["b"]) == 1
    except Parser.ParserPositionException, exc:
        assert exc.pos == 12, exc.pos
Exemple #14
0
def test_filter():
    #  8 stretches of "a"s
    # 10 stretches of "b"s
    #  4 stretches of "c"s
    data = "ababcbaaaababbbabccbaabcabcba"
    format = Martel.Re("((?P<a>a+)|(?P<b>b+)|(?P<c>c+))+")
    parser = format.make_parser()
    lax = LAX.LAX(["b", "c"])
    parser.setContentHandler(lax)
    parser.parseString(data)
    assert lax.has_key("a") == 0
    assert len(lax["b"]) == 10
    assert len(lax["c"]) == 4
def test_header_footer2():
    exp = Martel.HeaderFooter(
        "dataset",
        {},
        None,
        None,
        None,
        Martel.Re("a(?P<b>b*)a\R"),
        RecordReader.CountLines,
        (1, ),
        Martel.Re("end\R"),
        RecordReader.CountLines,
        (1, ),
    )
    lines = ["aa", "aba", "abba", "end"]
    text = "\n".join(lines) + "\n"

    i = 0
    for info in exp.make_iterator("b").iterateString(text):
        assert len(info["b"]) == 1
        assert len(info["b"][0]) == i, (info["b"][0], i)
        i = i + 1
    assert i == 3, i
def test_header_footer4():
    # Test that the errors are correct
    exp = Martel.HeaderFooter(
        "dataset",
        {},
        Martel.Re("header\R"),
        RecordReader.CountLines,
        (1, ),
        Martel.Re("a(?P<b>b*)a\R"),
        RecordReader.CountLines,
        (1, ),
        Martel.Re("end\R"),
        RecordReader.CountLines,
        (1, ),
    )
    lines = ["HEADER", "aa", "aba", "abba", "end"]
    text = "\n".join(lines) + "\n"

    try:
        for info in exp.make_iterator("b").iterateString(text):
            pass
    except Parser.ParserPositionException, exc:
        assert exc.pos == 0
def test4():
    # Make sure the default returns LAX items
    exp = Martel.Re("(?P<term>(?P<a>a+)(?P<b>b+))+")
    x = exp.make_iterator("term").iterateString("aabbabaaaabb")
    term = x.next()
    assert len(term["a"]) == 1 and term["a"][0] == "aa", term["a"]
    assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"]
    term = x.next()
    assert len(term["a"]) == 1 and term["a"][0] == "a", term["a"]
    assert len(term["b"]) == 1 and term["b"][0] == "b", term["b"]
    term = x.next()
    assert len(term["a"]) == 1 and term["a"][0] == "aaaa", term["a"]
    assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"]
    term = x.next()
    assert term is None, "Did not stop correctly"
Exemple #18
0
def test_missing_end3():
    # This one is missing the footer
    exp = Martel.HeaderFooter("dataset", {},
                              None, None, None,
                              
                              Martel.Group("record", Martel.Re(r"(b*\R)*a\R")),
                              RecordReader.EndsWith, ("a",),

                              Martel.Group("footer", Martel.Re(r"c\R")),
                              RecordReader.CountLines, (1,)
                              )
    lines = [
        "bbb",
        "bb",
        "a",
        "b",
        "a",
        "a",
        "c",  # This will be removed for the test
        ]
    text = "\n".join(lines) + "\n"

    iterator = exp.make_iterator("record")
    # This should work
    for x in iterator.iterateString(text):
        pass

    # This should not
    lines.pop()
    text = "\n".join(lines) + "\n"
    try:
        for x in iterator.iterateString(text):
            pass
        raise AssertionError
    except Parser.ParserPositionException, exc:
        assert exc.pos == 15, exc.pos
Exemple #19
0
def test_make_iterparsers1():
    exp = Martel.ParseRecords("dataset", {},
                              Martel.Group("spam", Martel.Re(r"a*\R")),
                              RecordReader.CountLines, (1,))
    iterator = exp.make_iterator("spam")
    assert isinstance(iterator, IterParser.IterRecords)
    lines = []
    for i in range(0, 10):
        lines.append("a" * i + "\n")
    text = "".join(lines)

    i = 0
    for rec in iterator.iterateString(text):
        assert len(rec["spam"][0][:-1]) == i, (i, rec["spam"][0])
        i = i + 1
    assert i == 10
def test_header_footer7():
    exp = Martel.HeaderFooter("dataset", {}, None, None, None,
                              Martel.Re("a(?P<b>b*)a\R"),
                              RecordReader.CountLines, (1, ), None, None, None)
    lines = [
        "aa",
        "aBbbba",
        "abba",
    ]
    text = "\n".join(lines) + "\n"

    try:
        for info in exp.make_iterator("b").iterateString(text):
            pass
    except Parser.ParserPositionException, exc:
        assert exc.pos == 4, exc.pos
Exemple #21
0
def test_record_parser():
    record = Martel.Group("A", Martel.Str("X\n") + Martel.Re("a*\n"))
    p = record.make_parser()

    parser = Parser.RecordParser("blah", {}, p.tagtable, (0, 1, {}),
                                 RecordReader.StartsWith, ("X", ))

    err = CountErrors()
    parser.setErrorHandler(err)
    count = CountRecords("A")
    parser.setContentHandler(count)

    parser.parseString("X\na\nX\nb\nX\naaa\nX\naaaa\nX\nq\nX\na\n")

    assert err.fatal_error_count == 0, err.fatal_error_count
    assert err.error_count == 2, err.error_count
    assert count.count == 4, count.count
Exemple #22
0
def test_record_parser():
    format = Martel.Re("(?P<term?field=first>...)"
                       "(?P<term?field=second>...)"
                       "(?P<last>.)\R")
    format = Martel.ParseRecords("all", {"author": "guido"}, format,
                                 RecordReader.CountLines, (1, ))
    parser = format.make_parser()
    grab = GrabElements()
    parser.setContentHandler(grab)
    parser.parseString("aaabbbZ\ncccdddZ\n")
    elements = grab.elements
    assert len(elements) == 7
    check_element(elements[0], ("all", {"author": "guido"}))
    check_element(elements[1], ("term", {"field": "first"}))
    check_element(elements[2], ("term", {"field": "second"}))
    check_element(elements[3], ("last", {}))
    check_element(elements[4], ("term", {"field": "first"}))
    check_element(elements[5], ("term", {"field": "second"}))
    check_element(elements[6], ("last", {}))
Exemple #23
0
def test_ri3():
    # error in the second record
    ip = IterParser.IterRecords(
        Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1),
        RecordReader.CountLines,
        (2,),
        
        "spam")

    lines = ["b",
             "c",
             "b-",
             "cc",
             "bbb",
             "ccc",
             ]
    text = "\n".join(lines) + "\n"
    try:
        for x in ip.iterateString(text):
            pass
    except Parser.ParserPositionException, exc:
        assert exc.pos == 5, exc.pos
Exemple #24
0
def test_escape():
    name, attrs = get_element(Martel.Re("(?P<qwe?a=%7E>X)"))
    check_dicts({"a": "~"}, attrs)

    name, attrs = get_element(Martel.Re("(?P<qwe?a=%7e>X)"))
    check_dicts({"a": "~"}, attrs)

    name, attrs = get_element(Martel.Re("(?P<qwe?a=>X)"))
    check_dicts({"a": ""}, attrs)

    name, attrs = get_element(Martel.Re("(?P<qwe?>X)"))
    check_dicts({}, attrs)

    name, attrs = get_element(Martel.Re("(?P<qwe?a=%48%65%6c%6c%6f>X)"))
    check_dicts({"a": "Hello"}, attrs)

    name, attrs = get_element(Martel.Re("(?P<qwe?a=%7e%7E&b=%7e%7E>X)"))
    check_dicts({"a": "~~", "b": "~~"}, attrs)

    name, attrs = get_element(Martel.Re("(?P<qwe?a=%7e%7E&b=>X)"))
    check_dicts({"a": "~~", "b": ""}, attrs)
def test5():
    # Does 'iter' work?
    try:
        iter
    except NameError:
        print "Test skipped - missing 'iter' builtin from Python 2.2."
        return
    exp = Martel.Re("(?P<term>(?P<a>a+)(?P<b>b+))+")
    x = exp.make_iterator("term")
    it = iter(x.iterateString("aabbabaaaabb"))
    term = it.next()
    assert len(term["a"]) == 1 and term["a"][0] == "aa", term["a"]
    assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"]
    term = it.next()
    assert len(term["a"]) == 1 and term["a"][0] == "a", term["a"]
    assert len(term["b"]) == 1 and term["b"][0] == "b", term["b"]
    term = it.next()
    assert len(term["a"]) == 1 and term["a"][0] == "aaaa", term["a"]
    assert len(term["b"]) == 1 and term["b"][0] == "bb", term["b"]
    try:
        it.next()
        raise AssertionError("Did not stop correctly")
    except StopIteration:
        pass
Exemple #26
0
"""Martel definitions for the output files produced by primer3.
"""
import Martel

any_space = Martel.Re("[ ]+")
blank_line = Martel.AnyEol()

comment_line = Martel.Str("#") + Martel.ToEol()

# comments and blank lines in the file
comments = Martel.Group("comments",
                        blank_line + comment_line + blank_line + comment_line)

#   1 PRODUCT SIZE: 289
product_size = Martel.Group("product_size", Martel.Re("[\d]+"))
start_primer = Martel.Group(
    "start_primer",
    any_space + Martel.Re("[\d]+") + Martel.Str(" PRODUCT SIZE: "))
primer_start_line = Martel.Group("primer_start_line",
                                 start_primer + product_size + Martel.AnyEol())

# a blank line that signifies a new primer is coming up
single_primer_line = Martel.Group("single_primer_line", blank_line)

#      FORWARD PRIMER    1725   20  59.96  55.00  AGGGAAGGGATGCTAGGTGT
primer_space = Martel.Str(" " * 5)

any_integer = Martel.Re("[\d]+")
any_float = Martel.Re("[\d\.]+")
sequence = Martel.Re("[GATCN]+")
Exemple #27
0
"""
import Martel
from Martel import RecordReader


def Simple(tag, tag_data):
    return Martel.Group(
        tag,
        Martel.Str(tag + "   ") + Martel.Group(tag_data, Martel.Re("[^\R]*")) +
        Martel.AnyEol())


#--- ID

ID = Martel.Group("ID", Martel.Re(
    r"ID   (?P<entry_name>\w+) +(?P<data_class_table>\w+); +" \
    r"(?P<molecule_type>\w+); +(?P<sequence_length>\d+) AA\.\R"
    ))

#--- AC

AC = Martel.Group(
    "AC", Martel.Re(r"AC   (?P<ac_number>\w+);( (?P<ac_number>\w+);)*\R"))
AC_block = Martel.Group("AC_block", Martel.Rep1(AC))

#--- DT

DT_created = Martel.Group("DT_created", Martel.Re(
    r"DT   (?P<day>\d\d)-(?P<month>...)-(?P<year>\d{4}) \(Rel. "\
    r"(?P<release>\d\d), Created\)\R"
    ))
DT_seq_update = Martel.Group("DT_seq_update", Martel.Re(
Exemple #28
0
def Simple(tag, tag_data):
    return Martel.Group(
        tag,
        Martel.Str(tag + "   ") + Martel.Group(tag_data, Martel.Re("[^\R]*")) +
        Martel.AnyEol())
def test():
    class _Test(handler.ContentHandler, handler.ErrorHandler):
        def __init__(self):
            handler.ContentHandler.__init__(self)
            self.good_parse = 0
        def startDocument(self):
            self.good_parse = 1
        def fatalError(self, exc):
            if isinstance(exc, Parser.ParserPositionException):
                # Called when there aren't enough characters
                self.good_parse = 0

        def error(self, exc):
            # shouldn't be called with these parsers
            raise exc

    cb = _Test()
    
    patterns = (
        ("a", ("a",), ("A", "", "Z")),
        ("[a-z]", ("a", "b", "q"), ("A", "-")),
        ("[^abc]", ("A", "d", "f"), ("a", "b", "c", "ab", "")),
        ("a+", ("a", "aaa"), ("A", "baa")),
        ("a*", ("", "a", "aaa"), ()),
        ("\\]", ("]",), ("a",)),
        ("a*$", ("a", "aaa"), ("A", "baa", "aaaaab")),
        ("(ab|ac)", ("ab", "ac"), ("aa", "A", "a", "cb")),
        ("(ab|ac)*$", ("", "ab", "ac", "abacabac", "ababab"),
                      ("aa", "A", "a", "cb", "ababababaca")),
        ("ab{3}$", ("abbb",), ("abb", "bbb", "abbbb")),
        ("ab{3,}$", ("abbb", "abbbb", "abbbbbbbbb"), ("abb", "bbb", "abbbc")),
        ("ab{3,}", ("abbb", "abbbb", "abbbbbbbbb"), ("abb", "bbb")),
        ("abc$|abcd|bc|d", ("abc", "abcd", "bc", "d"),
                           ("xabc", "ab", "a", "", "abce")),
        ("^a.*", ("a", "aa"), ("b", "ba", "c", "")),
        ("^[^b]+", ("a", "aa", "c"), ("b", "ba", "")),
        ("a(?!b).b?", ("aa", "ac", "aab"), ("a", "ab", "abc")),
        ("a(?=[bc])..", ("abx", "acx", "aba"), ("ac", "ab", "adb")),

        ("ab?[bc]?", ("a", "ab", "abb", "ac"), ("", "cab", "x")),
        ("ab{2,4}c?", ("abb", "abbb", "abbbb", "abbbbc"),
         ("ab", "abc", "xabbb")),
        ("ab{2,4}$", ("abb", "abbb", "abbbb"),
         ("ab", "abc", "xabbb", "abbbbc", "abbbbbb")),
        ("ab{2,4}cd?", ("abbc", "abbbc", "abbbbc", "abbbbcd"),
         ("abc", "abbbbbc", "abcbbc")),
        ("ab?c", ("ac", "abc"), ("abb", "abbc", "abbbc")),

        (r"\R", ("\n", "\r", "\r\n"), (" ", "\r\r", "\n\n", "\r\n ")),
        (r"a\Rb\R", ("a\nb\n", "a\rb\r", "a\r\nb\r\n", "a\rb\r\n"),
         ("ab", "a", "a\n\nb\n", "a\nb", "a\r\nb")),
        (r"ID [^\R]+\R", ("ID A123\n", "ID A123\r", "ID A123\r\n"),
         ("ID A123\n\n", "ID A123\r\r", "ID A123", "ID \n")),

        # named group backreference
        (r"(?P<name>A+)B(?P=name)A", ("ABAA", "AABAAA", "AAABAAAA"),
         ("ABA", "AB", "ABAAA", "AABA", "AABAA", "AABAAAA")),
        # named group backreference which can be empty
        (r"(?P<name>A*)B(?P=name)A", ("BA", "ABAA", "AABAAA", "AAABAAAA"),
         ("BAA", "ABA", "AB", "ABAAA", "AABA", "AABAA", "AABAAAA")),
        )
    for re_pat, good_list, bad_list in patterns:
        tree = Martel.Re(re_pat)
        exp = tree.make_parser()
        exp.setContentHandler(cb)
        exp.setErrorHandler(cb)
        if string.find(re_pat, r"\R") == -1:  # \R is a Martel-specific flag
            pat = re.compile(re_pat)
        else:
            pat = None
        pat2 = re.compile(str(tree))
        
        for word in good_list:
            exp.parseString(word)

            if pat is not None:
                m = pat.match(word)
                assert m, "Re problem recognizing " + repr(word)
                assert m.end() == len(word), "Did not parse all of %s: %d" % \
                       (repr(word), m.end())

            m = pat2.match(word)
            assert m, "created Re problem recognizing " + repr(word)
            assert m.end() == len(word), "Did not parse all of created %s: %d"\
                   % (repr(word), m.end())
            
            assert cb.good_parse, "Problem not recognizing %s with %s" % \
                   (repr(word), repr(re_pat))
            
        for word in bad_list:
            exp.parseString(word)

            if pat is not None:
                m = pat.match(word)
                assert not m or m.end() != len(word), \
                       "Re should not recognize " + repr(word)

            m = pat2.match(word)
            assert not m or m.end() != len(word), \
                   "created Re should not recognize " + repr(word)
                   
            assert not cb.good_parse, \
                   "Should not recognize %s\ntagtable is %s" % \
                   (repr(word), repr(exp.tagtable))
Exemple #30
0
def test_header_footer8():
    # header, record and footer, but with extra data
    s1 = """Two lines in
the header.
Data 1
Data 2
Data Q
Data 4
FOOTER Abc
FOOTER B
"""
    s2 = """Two lines in
the header.
Data 1
Data 2
Data Q
Data 4
FOOTER Abc
"""
    s3 = """Two lines in
the header.
Data 1
Data 4
FOOTER Abc
"""
    s4 = """Two lines in
the header.
Data Q
FOOTER Abc
"""
    s5 = """Two lines in
the header.
FOOTER Abc
"""
    dataset = (
        (s1, 3, 1, 1),
        (s2, 3, 1, 0),
        (s3, 2, 0, 0),
        (s4, 0, 1, 0),
        (s5, 0, 0, 0),
    )

    header = Martel.Group("header", Martel.Re(r"(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"Data \d+\n"))
    footer = Martel.Group("footer", Martel.Re("FOOTER \w+\n"))

    header = header.make_parser()
    record = record.make_parser()
    footer = footer.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ),
                                   header.tagtable, RecordReader.CountLines,
                                   (1, ), record.tagtable,
                                   RecordReader.CountLines, (1, ),
                                   footer.tagtable, (0, 1, {}))
    for s, rec_count, err_count, fatal_count in dataset:
        count = CountRecords("record")
        hf.setContentHandler(count)
        err = CountErrors()
        hf.setErrorHandler(err)
        hf.parseFile(StringIO(s))

        assert err.error_count == err_count, (s, err.error_count, err_count)
        assert err.fatal_error_count == fatal_count, \
               (s, err.fatal_error_count, fatal_count)
        assert count.count == rec_count, (s, count.count, rec_count)