Example #1
0
def test_parse_list_of_integers():

    grammar = """
    Numbers: all_less_than_five EOF;
    all_less_than_five: all_less_than_five int_less_than_five
                      | int_less_than_five;

    int_less_than_five:;
    """

    def int_less_than_five(input, pos):
        if input[pos] < 5:
            return [input[pos]]

    recognizers = {'int_less_than_five': int_less_than_five}
    g = Grammar.from_string(grammar, recognizers=recognizers, debug=True)

    actions = {
        'Numbers': pass_single,
        'all_less_than_five': collect,
        'int_less_than_five': pass_single
    }
    parser = Parser(g, actions=actions)

    ints = [3, 4, 1, 4]
    p = parser.parse(ints)
    assert p == ints

    # Test that error is correctly reported.
    with pytest.raises(ParseError) as e:
        parser.parse([4, 2, 1, 6, 3])
    assert 'Error at position 1,3 => "[4, 2, 1]*[6, 3]".' in str(e)
    assert 'int_less_than_five' in str(e)
def test_repeatable_zero_or_more_with_separator():
    """
    Tests zero or more repeatable operator with separator.
    """

    grammar = """
    S: "2" b*[comma] "3";

    terminals
    b: "1";
    comma: ",";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_0_comma')

    p = Parser(g)

    input_str = '2 1, 1 , 1 3'
    result = p.parse(input_str)
    assert result == ["2", ["1", "1", "1"], "3"]

    input_str = '2 3'
    result = p.parse(input_str)
    assert result == ["2", [], "3"]
def test_repeatable_one_or_more_with_separator():
    """
    Tests one or more repeatable operator with separator.
    """

    grammar = """
    S: "2" b+[comma] "3";

    terminals
    b: "1";
    comma: ",";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_1_comma')

    p = Parser(g)

    input_str = '2 1, 1 , 1 3'
    result = p.parse(input_str)
    assert result == ["2", ["1", "1", "1"], "3"]

    input_str = '2 3'
    with pytest.raises(ParseError) as e:
        p.parse(input_str)
    assert 'Expected: b' in str(e)
def test_reduce_enough_empty():
    """In this unambiguous grammar parser must reduce as many empty A productions
    as there are "b" tokens ahead to be able to finish successfully, thus it
    needs unlimited lookahead

    Language is: xb^n, n>=0

    References:

    Nozohoor-Farshi, Rahman: "GLR Parsing for ε-Grammers", Generalized LR
    parsing, Springer, 1991.

    Rekers, Joan Gerard: "Parser generation for interactive environments",
    phD thesis, Universiteit van Amsterdam, 1992.

    """
    grammar = """
    S: A S "b";
    S: "x";
    A: EMPTY;
    """
    g = Grammar.from_string(grammar)

    p = GLRParser(g, debug=True)
    results = p.parse("xbbb")

    assert len(results) == 1
def test_failed_disambiguation(cf):

    grammar = r"""
    S: First | Second | Third;

    terminals
    First: /\d+\.\d+/ {15};
    Second: '14.7';
    Third: /\d+\.75/ {15};
    """

    g = Grammar.from_string(grammar)
    parser = Parser(g, actions=actions, debug=True)

    # All rules will match but First and Third have higher priority.
    # Both are regexes so longest match will be used.
    # Both have the same length.

    with pytest.raises(DisambiguationError) as e:
        parser.parse('14.75')

    assert 'disambiguate' in str(e)
    assert 'First' in str(e)
    assert 'Second' not in str(e)
    assert 'Third' in str(e)
Example #6
0
def test_lr2_grammar():

    grammar = """
    Model: Prods EOF;
    Prods: Prod | Prods Prod;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g = Grammar.from_string(grammar)

    # This grammar is not LR(1) as it requires
    # at least two tokens of lookahead to decide
    # what to do on each ID from the right side.
    # If '=' is after ID than it should reduce "Prod"
    # else it should reduce ID as ProdRefs.
    with pytest.raises(SRConflicts):
        Parser(g)

    # But it can be parsed unambiguously by GLR.
    p = GLRParser(g)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    results = p.parse(txt)
    assert len(results) == 1
def test_first_empty_in_rhs():
    """
    Test FIRST calculation when there are empty derivations in RHS of a
    production.
    """

    grammar = """
    S: A C;
    A: B | EMPTY;

    terminals
    B: "b";
    C: "c";
    """

    g = Grammar.from_string(grammar)

    first_set = first(g)

    A = g.get_nonterminal('A')
    B = g.get_terminal('B')
    C = g.get_terminal('C')
    S = g.get_nonterminal('S')

    assert EMPTY in first_set[A]
    assert B in first_set[A]

    assert B in first_set[S]

    # 'A' can derive empty, thus 'C' must be in firsts of 'S'.
    assert C in first_set[S]
Example #8
0
def test_parglare_builtin_action_override_repetition():
    """
    Test that user given action can override actions attached to
    repetition operator generated rule actions.
    """
    # B+ will product B_1 rule with `collect` common action
    grammar = """
    S: B+;
    B: "b";
    """

    called = [False]

    def my_collect(_, __):
        called[0] = True
        return "pass"

    my_actions = {
        "collect": my_collect,
    }

    g = Grammar.from_string(grammar)
    p = Parser(g, actions=my_actions)
    assert p.parse("b b") == 'pass'
    assert called[0]
Example #9
0
def test_layout_nested_comments(parser_class):
    grammar = """
    S: K EOF;
    K: 'a' B | 'a' C;
    B: 'b' | B 'b';
    C: 'c' | C 'c';

    LAYOUT: LayoutItem | LAYOUT LayoutItem;
    LayoutItem: WS | Comment | EMPTY;
    WS: /\s+/;
    Comment: '/*' CorNCs '*/' | /\/\/.*/;
    CorNCs: CorNC | CorNCs CorNC | EMPTY;
    CorNC: Comment | NotComment | WS;
    NotComment: /((\*[^\/])|[^\s*\/]|\/[^\*])+/;
    """
    g = Grammar.from_string(grammar)

    in_str = """//Line comment at beginning
    a  b b b   b // This is line comment
    b b b b b b  /* This is block
    comment */

    bbbb  b b b b b
    /* Another block comment
       // With nested line comment
       /* And nested block
    comment */
    */

    bbbb b b b
    """

    parser = parser_class(g)
    parser.parse(in_str)
Example #10
0
def test_cyclic_grammar_3():
    """
    Grammar with indirect cycle.
    r:EMPTY->A ; r:A->S; r:EMPTY->A; r:SA->S; r:EMPTY->A; r:SA->S;...
    """
    grammar = """
    S: S A | A;
    A: "a" | EMPTY;
    """

    g = Grammar.from_string(grammar)

    # In this grammar we have 3 S/R conflicts where each reduction is EMPTY.
    # If we turn off prefer shifts over empty strategy in LR parser
    # we will get S/R conflict
    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts_over_empty=False)

    # By default there is no S/R conflict with prefer shifts over
    # empty strategy
    Parser(g)

    p = GLRParser(g)
    results = p.parse('aa')

    with pytest.raises(LoopError):
        len(results)
Example #11
0
def test_user_grammar_actions():
    """
    Test that user supplied actions are used.
    """
    grammar = """
    S: A B C;
    @nonterm_action
    C: A B;
    A: "a";
    @term_action
    B: "b";
    """

    called = [False, False]

    def nonterm_action(_, __):
        called[0] = True

    def term_action(_, __):
        called[1] = True

    my_actions = {
        "nonterm_action": nonterm_action,
        "term_action": term_action,
    }

    g = Grammar.from_string(grammar)
    p = Parser(g, actions=my_actions)
    assert p.parse("a b a b")
    assert all(called)
Example #12
0
def test_rule_meta():

    grammar_str = r'''
    MyRule {label: 'My Label', nops}: 'a' {left, 1, dynamic};
    '''

    grammar = Grammar.from_string(grammar_str)
    my_rule = grammar.get_nonterminal('MyRule')

    # User meta-data is accessible on non-terminal
    assert my_rule.label == 'My Label'

    # But built-in disambiguation rules are not
    with pytest.raises(AttributeError):
        assert my_rule.nops

    # Also for nonexisting attributes
    with pytest.raises(AttributeError):
        assert my_rule.nonexisting

    # Production has its own meta-data
    prod = my_rule.productions[0]
    assert prod.assoc == ASSOC_LEFT
    assert prod.prior == 1
    assert prod.dynamic

    # Rule-level meta-data are propagated to productions
    assert prod.label == 'My Label'
Example #13
0
def test_rule_meta_override():
    """
    Test that meta-data are propagated to productions and can be overriden.
    """

    grammar_str = r'''
    MyRule {label: 'My Label', left}: 'a' {right, label: 'My overriden label'}
                                    | 'b';
    '''

    grammar = Grammar.from_string(grammar_str)
    my_rule = grammar.get_nonterminal('MyRule')

    # User meta-data is accessible on non-terminal
    assert my_rule.label == 'My Label'

    prod = my_rule.productions[0]
    # First production overrides meta-data
    assert prod.label == 'My overriden label'
    assert prod.assoc == ASSOC_RIGHT

    # If not overriden it uses meta-data from the rule.
    prod = my_rule.productions[1]
    assert prod.label == 'My Label'
    assert prod.assoc == ASSOC_LEFT
Example #14
0
def test_issue31_glr_drop_parses_on_lexical_ambiguity():
    grammar = """
    model: element+;
    element: title
           | table_with_note
           | table_with_title;
    table_with_title: table_title table_with_note;
    table_with_note: table note*;

    terminals
    title: /title/;   // <-- This is lexically ambiguous with the next.
    table_title: /title/;
    table: "table";
    note: "note";
    """

    # this input should yield 4 parse trees.
    input = "title table title table"

    g = Grammar.from_string(grammar)
    parser = GLRParser(g, debug=True, debug_colors=True)
    results = parser.parse(input)

    # We should have 4 solutions for the input.
    assert len(results) == 4
Example #15
0
def test_non_eof_grammar_nonempty():
    """
    Grammar that is not anchored by EOF at the end might
    result in multiple trees that are produced by sucessful
    parses of the incomplete input.
    """
    grammar_nonempty = """
    Model: Prods;
    Prods: Prod | Prods Prod;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g_nonempty = Grammar.from_string(grammar_nonempty)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    p = GLRParser(g_nonempty, debug=True)
    results = p.parse(txt)
    # There is three succesful parses.
    # e.g. one would be the production 'First = One Two three Second' and the
    # parser could not continue as the next token is '=' but it succeds as
    # we haven't terminated our model with EOF so we allow partial parses.
    assert len(results) == 3
Example #16
0
def test_infinite_recursions():
    """
    If rule have no recursion termination alternative as for example:

    Elements: Elements Element;

    instead of:
    Elements: Elements Element | Element;

    first set of "Elements" will be empty. GrammarError will be produced during
    parser construction.
    """

    grammar = """
    Elements: Elements Element;
    Element: "a" | "b";
    """

    g = Grammar.from_string(grammar)

    with pytest.raises(GrammarError) as e:
        Parser(g)

    assert 'First set empty for grammar symbol "Elements"' in str(e)
    assert 'infinite recursion' in str(e)
Example #17
0
def test_non_eof_grammar_empty():
    """
    Grammar that is not anchored by EOF at the end might
    result in multiple trees that are produced by sucessful
    parses of the incomplete input.
    """
    grammar_empty = """
    Model: Prods;
    Prods: Prod | Prods Prod | EMPTY;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g_empty = Grammar.from_string(grammar_empty)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    p = GLRParser(g_empty, debug=True)

    results = p.parse(txt)
    assert len(results) == 3

    results = p.parse("")
    assert len(results) == 1
Example #18
0
def test_epsilon_grammar():

    grammar = r"""
    Model: Prods;
    Prods: Prod | Prods Prod | EMPTY;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;

    terminals
    ID: /\w+/;
    """

    g = Grammar.from_string(grammar)
    p = GLRParser(g, debug=True)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    results = p.parse(txt)
    assert len(results) == 1

    results = p.parse("")
    assert len(results) == 1
Example #19
0
def test_invalid_number_of_actions():
    """
    Test that parser error is raised if rule is given list of actions
    where there is less/more actions than rule productions.
    """
    grammar = '''
    S: A+ | B+;
    A: 'a';
    B: 'b';
    '''
    g = Grammar.from_string(grammar)

    def some_action(_, nodes):
        return nodes[0]

    actions = {'S': [some_action, some_action]}
    Parser(g, actions=actions)

    actions = {'S': [some_action]}
    with pytest.raises(ParserInitError,
                       match=r'Lenght of list of actions must match.*'):
        Parser(g, actions=actions)

    actions = {'S': [some_action, some_action, some_action]}
    with pytest.raises(ParserInitError,
                       match=r'Lenght of list of actions must match.*'):
        Parser(g, actions=actions)
Example #20
0
def test_action_override():
    """
    Explicitely provided action in `actions` param overrides default or
    grammar provided.
    """
    grammar = """
    S: Foo Bar;
    @pass_nochange
    Bar: "1" a;

    terminals
    @pass_nochange
    Foo: 'foo';
    a: "a";
    """

    g = Grammar.from_string(grammar)
    p = Parser(g)
    input_str = "foo 1 a"
    result = p.parse(input_str)
    assert result == ["foo", ["1", "a"]]

    actions = {"Foo": lambda _, __: "eggs", "Bar": lambda _, __: "bar reduce"}

    p = Parser(g, actions=actions)
    result = p.parse(input_str)
    assert result == ["eggs", "bar reduce"]

    # Test with actions call postponing
    p = Parser(g, build_tree=True, actions=actions)
    tree = p.parse(input_str)
    result = p.call_actions(tree)
    assert result == ["eggs", "bar reduce"]
def test_highly_ambiguous_grammar():
    """
    This grammar has both Shift/Reduce and Reduce/Reduce conflicts and
    thus can't be parsed by a deterministic LR parsing.
    Shift/Reduce can be resolved by prefer_shifts strategy.
    """
    grammar = """
    S: "b" | S S | S S S;
    """

    g = Grammar.from_string(grammar)

    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    # S/R are resolved by selecting prefer_shifts strategy.
    # But R/R conflicts remain.
    with pytest.raises(RRConflicts):
        Parser(g, prefer_shifts=True)

    # GLR parser handles this fine.
    p = GLRParser(g)

    # For three tokens we have 3 valid derivations/trees.
    results = p.parse("bbb")
    assert len(results) == 3

    # For 4 tokens we have 10 valid derivations.
    results = p.parse("bbbb")
    assert len(results) == 10
Example #22
0
def test_assignment_of_repetition():
    """
    Test assignment of repetition.
    """

    grammar = """
    S: "1" first=some_match+ "3";

    terminals
    some_match: "2";
    """

    g = Grammar.from_string(grammar)
    assert assignment_in_productions(g.productions, 'S', 'first')

    called = [False]

    def act_s(_, nodes, first):
        called[0] = True
        assert first == ["2", "2"]
        return nodes

    actions = {"S": act_s}

    p = Parser(g, actions=actions)

    input_str = '1 2 2 3'

    result = p.parse(input_str)
    assert result == ["1", ["2", "2"], "3"]
    assert called[0]
def test_nondeterministic_LR_raise_error():
    """Language of even length palindromes.

    This is a non-deterministic grammar and the language is non-ambiguous.

    If the string is a even length palindrome parser should reduce EMPTY at he
    middle of the string and start to reduce by A and B.

    LR parsing is deterministic so this grammar can't parse the input as the
    EMPTY reduction will be tried only after consuming all the input by
    implicit disambiguation strategy of favouring shifts over empty reductions.

    OTOH, GLR parser can handle this by forking parser at each step and trying
    both empty reductions and shifts. Only the parser that has reduced empty at
    the middle of the input will succeed.

    """
    grammar = """
    S: A | B | EMPTY;
    A: '1' S '1';
    B: '0' S '0';
    """

    g = Grammar.from_string(grammar)
    with pytest.raises(ParseError):
        p = Parser(g)
        p.parse('0101000110001010')

    p = GLRParser(g)
    results = p.parse('0101000110001010')

    assert len(results) == 1
Example #24
0
def test_multiple_assignment_with_repetitions():
    """
    Test assignment of repetition.
    """

    grammar = """
    S: "1" first=some_match+[comma] second?=some_match* "3";

    terminals
    some_match: "2";
    comma: ",";
    """

    g = Grammar.from_string(grammar)
    assert assignment_in_productions(g.productions, 'S', 'first')
    assert assignment_in_productions(g.productions, 'S', 'second')

    called = [False]

    def act_s(_, nodes, first, second):
        called[0] = True
        assert first == ["2", "2"]
        assert second is True
        return nodes

    actions = {"S": act_s}

    p = Parser(g, actions=actions)

    input_str = '1 2, 2 2 2 2 3'

    result = p.parse(input_str)
    assert result == ["1", ["2", "2"], ["2", "2", "2"], "3"]
    assert called[0]
def test_optional():
    """
    Tests optional operator.
    """

    grammar = """
    S: "2" b? "3"? EOF;

    terminals
    b: "1";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_opt')

    p = Parser(g)

    input_str = '2 1 3'
    result = p.parse(input_str)
    assert result == ["2", "1", "3", None]

    input_str = '2 3'
    result = p.parse(input_str)
    assert result == ["2", None, "3", None]

    input_str = '2 1'
    result = p.parse(input_str)
    assert result == ["2", "1", None, None]

    input_str = ' 1 3'
    with pytest.raises(ParseError) as e:
        p.parse(input_str)
    assert 'Expected: 2' in str(e)
Example #26
0
def test_object_children_order():
    """Children may depend on the concrete production that matched. Test that order
    given in `_pg_children` is the same as the order provided in the grammar
    (this may be important for tree traversal order).

    """
    grammar = r'''
    S: a=A b=B
     | b=B a=A
     | b=B;
    A: val="a";
    B: val="b";
    '''
    g = Grammar.from_string(grammar)
    p = Parser(g)

    ast = p.parse('a b')
    res = ['a', 'b']
    assert len(res) == len(ast._pg_children)
    assert all((x.val == y for x, y in zip(ast._pg_children, res)))

    ast = p.parse('b a')
    res = ['b', 'a']
    assert len(res) == len(ast._pg_children)
    assert all((x.val == y for x, y in zip(ast._pg_children, res)))

    ast = p.parse('b')
    res = ['b']
    assert len(res) == len(ast._pg_children)
    assert all((x.val == y for x, y in zip(ast._pg_children, res)))
def test_repeatable_zero_or_more():
    """
    Tests zero or more repeatable operator.
    """

    grammar = """
    S: "2" b* "3";

    terminals
    b: "1";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_0')
    assert g.get_nonterminal('b_1')

    p = Parser(g)

    input_str = '2 1 1 1 3'
    result = p.parse(input_str)
    assert result == ["2", ["1", "1", "1"], "3"]

    input_str = '2 3'
    result = p.parse(input_str)
    assert result == ["2", [], "3"]
Example #28
0
def test_obj_position():
    """
    Test that object start/end position is set properly.
    """
    grammar = r"""
    S: "first" seconds=Second+;
    Second: value=digits;

    terminals
    digits:/\d+/;
    """
    g = Grammar.from_string(grammar)
    parser = Parser(g)

    result = parser.parse("""
    first 45 56
    66 3434342
    """)

    n = result.seconds[1]
    assert n._pg_start_position == 14
    assert n._pg_end_position == 16

    n = result.seconds[3]
    assert n._pg_start_position == 24
    assert n._pg_end_position == 31
Example #29
0
def test_assignment_bool():
    """
    Test bool assignment.
    """

    grammar = """
    S: "1" first?=some_match "3";
    some_match: "2";
    """

    g = Grammar.from_string(grammar)
    assert assignment_in_productions(g.productions, 'S', 'first')

    called = [False]

    def act_s(_, nodes, first):
        called[0] = True
        assert first is True
        return nodes

    actions = {"S": act_s}

    p = Parser(g, actions=actions)

    input_str = '1 2 3'

    result = p.parse(input_str)
    assert result == ["1", "2", "3"]
    assert called[0]
Example #30
0
def test_prefer_shifts_no_sr_conflicts():
    """
    Test that grammar with S/R conflict will be resolved to SHIFT actions
    if prefer_shift option is used.
    """
    # This grammar has S/R conflict as B+ may consume multiple single "a" A
    # because "b" is optional. Thus, parser can't decide if it should shift "a"
    # or reduce by 'B: "b"? A+' and later by 'S: B+'; Most of the time we want
    # gready behavior so in case of doubt parser will choose shift if
    # prefer_shift is set to `True`. This means that the parser will first
    # consume all "a" using A+ and that reduce B at the end.
    grammar = r"""
    S: B+;
    B: "b"? A+;

    terminals
    A: "a";
    """
    g = Grammar.from_string(grammar)

    # There is a shift reduce conflict so we can't use LR parser.
    table = create_table(g)
    assert len(table.sr_conflicts) == 1

    # But we can eliminate conflict by prefer_shifts strategy.
    table = create_table(g, prefer_shifts=True)
    assert len(table.sr_conflicts) == 0

    # With prefer_shifts we get a greedy behavior
    input_str = 'b a a a b a a'
    output = [['b', ['a', 'a', 'a']], ['b', ['a', 'a']]]
    parser = Parser(g, prefer_shifts=True)
    result = parser.parse(input_str)
    assert result == output

    # GLR parser can parse without prefer_shifts strategy. This grammar is
    # ambiguous and yields 11 solutions for the given input.
    parser = GLRParser(g)
    results = parser.parse(input_str)
    expected = [
        [['b', ['a']], [None, ['a']], [None, ['a']], ['b', ['a', 'a']]],
        [['b', ['a', 'a']], [None, ['a']], ['b', ['a', 'a']]],
        [['b', ['a', 'a', 'a']], ['b', ['a', 'a']]],
        [['b', ['a']], [None, ['a', 'a']], ['b', ['a', 'a']]],
        [['b', ['a']], [None, ['a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a']], [None, ['a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a']], [None, ['a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a']], [None, ['a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a']], [None, ['a']], [None, ['a']], ['b', ['a']], [None, ['a']]]  # noqa
    ]
    assert results == expected

    # But if `prefer_shift` is used we get only one solution
    parser = GLRParser(g, prefer_shifts=True)
    result = parser.parse(input_str)
    assert len(result) == 1
    assert result[0] == output