Ejemplo n.º 1
0
def test_undefined_grammar_symbol():
    "Tests that undefined grammar symbols raises errors."
    grammar = """
    S: A B;
    A: "a" | B;
    B: id;
    """
    with pytest.raises(GrammarError) as e:
        Grammar.from_string(grammar)

    assert 'Unknown symbol' in str(e)
    assert 'id' in str(e)
Ejemplo n.º 2
0
def test_keyword_must_be_regex():
    grammar = r"""
    S: "for" name=ID "=" from=INT "to" to=INT;
    KEYWORD: "id";
    ID: /\w+/;
    INT: /\d+/;
    """

    with pytest.raises(GrammarError) as e:
        Grammar.from_string(grammar)

    assert 'must have a regex recognizer defined' in str(e)
Ejemplo n.º 3
0
def test_no_terminal_associavitity():
    "Tests that terminals can't have associativity defined."
    grammar = """
    S: A | B;
    A: 'a' {15, left};
    B: 'b';
    """

    with pytest.raises(ParseError) as e:
        Grammar.from_string(grammar)

    assert 'Error at position 3,16' in str(e)
Ejemplo n.º 4
0
def test_multiple_terminal_definition():

    grammar = """
    S: A A;
    terminals
    A: "a";
    A: "b";
    """

    with pytest.raises(GrammarError,
                       match=r'.*Multiple definitions of terminal rule.*'):
        Grammar.from_string(grammar)
Ejemplo n.º 5
0
def test_no_terminal_associavitity():
    "Tests that terminals can't have associativity defined."
    grammar = """
    S: A | B;
    terminals
    A: 'a' {15, left};
    B: 'b';
    """

    with pytest.raises(ParseError) as e:
        Grammar.from_string(grammar)

    assert 'Expected: Prior or dynamic or finish or nofinish or prefer' \
        in str(e)
Ejemplo n.º 6
0
def test_no_terminal_associavitity():
    "Tests that terminals can't have associativity defined."
    grammar = """
    S: A | B;
    terminals
    A: 'a' {15, left};
    B: 'b';
    """

    with pytest.raises(ParseError) as e:
        Grammar.from_string(grammar)

    assert 'Expected: : but found <NotComment(};)> or <}(})>' \
        in str(e)
Ejemplo n.º 7
0
def test_terminal_nonterminal():

    # Production A is a terminal ("a") and non-terminal at the same time.
    # Thus, it must be recognized as non-terminal.
    grammar = """
    S: A B;
    A: "a" | B;
    B: "b";
    """
    g = Grammar.from_string(grammar)
    assert NonTerminal("A") in g.nonterminals
    assert Terminal("A") not in g.terminals
    assert Terminal("B") in g.terminals
    assert NonTerminal("B") not in g.nonterminals

    # Here A should be non-terminal while B should be terminal.
    grammar = """
    S: A B;
    A: B;
    B: "b";
    """

    g = Grammar.from_string(grammar)
    assert NonTerminal("A") in g.nonterminals
    assert Terminal("A") not in g.terminals
    assert Terminal("B") in g.terminals
    assert NonTerminal("B") not in g.nonterminals

    grammar = """
    S: A;
    A: S;
    A: 'x';
    """
    g = Grammar.from_string(grammar)
    assert NonTerminal("S") in g.nonterminals
    assert NonTerminal("A") in g.nonterminals
    assert Terminal("A") not in g.terminals
    assert Terminal("x") in g.terminals

    grammar = """
    S: S S;
    S: 'x';
    S: EMPTY;
    """
    g = Grammar.from_string(grammar)
    assert NonTerminal("S") in g.nonterminals
    assert Terminal("x") in g.terminals
    assert NonTerminal("x") not in g.nonterminals
    assert Terminal("S") not in g.terminals
Ejemplo n.º 8
0
def test_optional_no_modifiers():
    """
    Tests that optional operator doesn't allow modifiers.
    """

    grammar = """
    S: "2" b?[comma] "3"? EOF;
    b: "1";
    comma: ",";
    """

    with pytest.raises(GrammarError) as e:
        Grammar.from_string(grammar)

    assert "Repetition modifier not allowed" in str(e)
def test_imported_actions_override_by_grammar_actions():
    """
    Test that actions loaded from `*_actions.py` files can override actions
    imported from other grammar files.
    """

    g = Grammar.from_file(
        os.path.join(this_folder, 'in_grammar_by_symbol_name/model.pg'))
    model = Parser(g).parse(model_str)
    assert model.modelID == 43

    g = Grammar.from_file(
        os.path.join(this_folder, 'in_grammar_by_action_name/model.pg'))
    model = Parser(g).parse(model_str)
    assert model.modelID == 43
Ejemplo n.º 10
0
def test_action_override():
    """
    Explicitely provided action in `actions` param overrides default or
    grammar provided.
    """
    grammar = """
    S: Foo Bar;
    @pass_nochange
    Bar: "1" a;

    terminals
    @pass_nochange
    Foo: 'foo';
    a: "a";
    """

    g = Grammar.from_string(grammar)
    p = Parser(g)
    input_str = "foo 1 a"
    result = p.parse(input_str)
    assert result == ["foo", ["1", "a"]]

    actions = {"Foo": lambda _, __: "eggs", "Bar": lambda _, __: "bar reduce"}

    p = Parser(g, actions=actions)
    result = p.parse(input_str)
    assert result == ["eggs", "bar reduce"]

    # Test with actions call postponing
    p = Parser(g, build_tree=True, actions=actions)
    tree = p.parse(input_str)
    result = p.call_actions(tree)
    assert result == ["eggs", "bar reduce"]
Ejemplo n.º 11
0
def test_group_complex():
    grammar_str = r'''
    @obj
    s: (b c)*[comma];
    s: (b c)*[comma] a=(a+ (b | c)*)+[comma];
    terminals
    a: "a";
    b: "b";
    c: "c";
    comma: ",";
    '''
    grammar = Grammar.from_string(grammar_str)

    assert len(grammar.get_productions('s_g1')) == 1
    # B | C
    prods = grammar.get_productions('s_g3')
    assert len(prods) == 2
    assert prods[0].rhs[0].name == 'b'
    assert prods[1].rhs[0].name == 'c'

    # Nesting
    prods = grammar.get_productions('s_g2')
    assert len(prods) == 1
    assert prods[0].rhs[0].name == 'a_1'
    assert prods[0].rhs[1].name == 's_g3_0'
    assert grammar.get_productions('s')[1].rhs[1].name == 's_g2_1_comma'

    assert 's_g5' not in grammar

    parser = GLRParser(grammar)

    forest = parser.parse('b c, b c a a a b c c b, a b b')
    result = parser.call_actions(forest[0])
    assert result.a == [[['a', 'a', 'a'], ['b', 'c', 'c', 'b']],
                        [['a'], ['b', 'b']]]
Ejemplo n.º 12
0
def test_invalid_number_of_actions():
    """
    Test that parser error is raised if rule is given list of actions
    where there is less/more actions than rule productions.
    """
    grammar = '''
    S: A+ | B+;
    A: 'a';
    B: 'b';
    '''
    g = Grammar.from_string(grammar)

    def some_action(_, nodes):
        return nodes[0]

    actions = {
        'S': [some_action, some_action]
    }
    Parser(g, actions=actions)

    actions = {
        'S': [some_action]
    }
    with pytest.raises(ParserInitError,
                       match=r'Length of list of actions must match.*'):
        Parser(g, actions=actions)

    actions = {
        'S': [some_action, some_action, some_action]
    }
    with pytest.raises(ParserInitError,
                       match=r'Length of list of actions must match.*'):
        Parser(g, actions=actions)
Ejemplo n.º 13
0
def test_multiple_assignment_with_repetitions():
    """
    Test assignment of repetition.
    """

    grammar = """
    S: "1" first=some_match+[comma] second?=some_match* "3";

    terminals
    some_match: "2";
    comma: ",";
    """

    g = Grammar.from_string(grammar)
    assert assignment_in_productions(g.productions, 'S', 'first')
    assert assignment_in_productions(g.productions, 'S', 'second')

    called = [False]

    def act_s(_, nodes, first, second):
        called[0] = True
        assert first == ["2", "2"]
        assert second is True
        return nodes

    actions = {"S": act_s}

    p = Parser(g, actions=actions)

    input_str = '1 2, 2 2 2 2 3'

    result = p.parse(input_str)
    assert result == ["1", ["2", "2"], ["2", "2", "2"], "3"]
    assert called[0]
def get_grammar():
    # Expression grammar with float numbers
    return Grammar.from_struct(
        {
            'E': [
                ['E', 'PLUS', 'T'],
                ['T'],
            ],
            'T': [
                ['T', 'MULT', 'F'],
                ['F'],
            ],
            'F': [
                ['OPEN', 'E', 'CLOSE'],
                ['number'],
            ],
        },
        {
            'PLUS': ('string', '+'),
            'MULT': ('string', '*'),
            'OPEN': ('string', '('),
            'CLOSE': ('string', ')'),
            'number': ('regexp', r'\d+(\.\d+)?'),
        },
        'E',
    )[0]
Ejemplo n.º 15
0
def test_non_eof_grammar_empty():
    """
    Grammar that is not anchored by EOF at the end might
    result in multiple trees that are produced by sucessful
    parses of the incomplete input.
    """
    grammar_empty = """
    Model: Prods;
    Prods: Prod | Prods Prod | EMPTY;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g_empty = Grammar.from_string(grammar_empty)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    p = GLRParser(g_empty, debug=True)

    results = p.parse(txt)
    assert len(results) == 3

    results = p.parse("")
    assert len(results) == 1
Ejemplo n.º 16
0
def test_non_eof_grammar_nonempty():
    """
    Grammar that is not anchored by EOF at the end might
    result in multiple trees that are produced by sucessful
    parses of the incomplete input.
    """
    grammar_nonempty = """
    Model: Prods;
    Prods: Prod | Prods Prod;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g_nonempty = Grammar.from_string(grammar_nonempty)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    p = GLRParser(g_nonempty, debug=True)
    results = p.parse(txt)
    # There is three succesful parses.
    # e.g. one would be the production 'First = One Two three Second' and the
    # parser could not continue as the next token is '=' but it succeds as
    # we haven't terminated our model with EOF so we allow partial parses.
    assert len(results) == 3
Ejemplo n.º 17
0
def test_first_empty_in_rhs():
    """
    Test FIRST calculation when there are empty derivations in RHS of a
    production.
    """

    grammar = """
    S: A C;
    A: B | EMPTY;

    terminals
    B: "b";
    C: "c";
    """

    g = Grammar.from_string(grammar)

    first_set = first(g)

    A = g.get_nonterminal('A')
    B = g.get_terminal('B')
    C = g.get_terminal('C')
    S = g.get_nonterminal('S')

    assert EMPTY in first_set[A]
    assert B in first_set[A]

    assert B in first_set[S]

    # 'A' can derive empty, thus 'C' must be in firsts of 'S'.
    assert C in first_set[S]
Ejemplo n.º 18
0
def test_issue31_glr_drop_parses_on_lexical_ambiguity():
    grammar = """
    model: element+ EOF;
    element: title
           | table_with_note
           | table_with_title;
    table_with_title: table_title table_with_note;
    table_with_note: table note*;

    terminals
    title: /title/;   // <-- This is lexically ambiguous with the next.
    table_title: /title/;
    table: "table";
    note: "note";
    """

    # this input should yield 4 parse trees.
    input = "title table title table"

    g = Grammar.from_string(grammar)
    parser = GLRParser(g, debug=True, debug_colors=True)
    results = parser.parse(input)

    # We should have 4 solutions for the input.
    assert len(results) == 4
Ejemplo n.º 19
0
def test_failed_disambiguation(cf):

    grammar = r"""
    S: First | Second | Third;

    terminals
    First: /\d+\.\d+/ {15};
    Second: '14.7';
    Third: /\d+\.75/ {15};
    """

    g = Grammar.from_string(grammar)
    parser = Parser(g, actions=actions, debug=True)

    # All rules will match but First and Third have higher priority.
    # Both are regexes so longest match will be used.
    # Both have the same length.

    with pytest.raises(DisambiguationError) as e:
        parser.parse('14.75')

    assert 'disambiguate' in str(e.value)
    assert 'First' in str(e.value)
    assert 'Second' not in str(e.value)
    assert 'Third' in str(e.value)
Ejemplo n.º 20
0
def test_cyclic_grammar_2():
    """
    From the paper: "GLR Parsing for e-Grammers" by Rahman Nozohoor-Farshi

    """
    grammar = """
    S: S S;
    S: 'x';
    S: EMPTY;
    """
    g = Grammar.from_string(grammar)

    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    p = GLRParser(g)
    results = p.parse('xx')

    # We have 11 valid solutions
    assert len(results) == 11
    expected = [
        ['x', 'x'],
        [[[], 'x'], 'x'],
        [[[], [[], 'x']], 'x'],
        ['x', [[], 'x']],
        [[[], 'x'], [[], 'x']],
        [[], ['x', 'x']],
        [[], [[], ['x', 'x']]],
        ['x', [[], 'x']],
        [[[], 'x'], [[], 'x']],
        [[[], [[], 'x']], [[], 'x']],
        [[], [[[], 'x'], 'x']]
    ]

    assert expected == results
Ejemplo n.º 21
0
def test_assignment_of_repetition():
    """
    Test assignment of repetition.
    """

    grammar = """
    S: "1" first=some_match+ "3";

    terminals
    some_match: "2";
    """

    g = Grammar.from_string(grammar)
    assert assignment_in_productions(g.productions, 'S', 'first')

    called = [False]

    def act_s(_, nodes, first):
        called[0] = True
        assert first == ["2", "2"]
        return nodes

    actions = {"S": act_s}

    p = Parser(g, actions=actions)

    input_str = '1 2 2 3'

    result = p.parse(input_str)
    assert result == ["1", ["2", "2"], "3"]
    assert called[0]
Ejemplo n.º 22
0
def test_reduce_enough_empty():
    """
    In this unambiguous grammar parser must reduce as many empty A productions
    as there are "b" tokens ahead to be able to finish successfully, thus it
    needs unlimited lookahead

    Language is: xb^n, n>=0

    References:

    Nozohoor-Farshi, Rahman: "GLR Parsing for ε-Grammers", Generalized LR
    parsing, Springer, 1991.

    Rekers, Joan Gerard: "Parser generation for interactive environments",
    phD thesis, Universiteit van Amsterdam, 1992.

    """
    grammar = """
    S: A S "b";
    S: "x";
    A: EMPTY;
    """
    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("xbbb")

    assert len(results) == 1
Ejemplo n.º 23
0
def test_optional():
    """
    Tests optional operator.
    """

    grammar = """
    S: "2" b? "3"? EOF;

    terminals
    b: "1";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_opt')

    p = Parser(g)

    input_str = '2 1 3'
    result = p.parse(input_str)
    assert result == ["2", "1", "3", None]

    input_str = '2 3'
    result = p.parse(input_str)
    assert result == ["2", None, "3", None]

    input_str = '2 1'
    result = p.parse(input_str)
    assert result == ["2", "1", None, None]

    input_str = ' 1 3'
    with pytest.raises(ParseError) as e:
        p.parse(input_str)
    assert 'Expected: 2' in str(e)
Ejemplo n.º 24
0
def test_assignment_bool():
    """
    Test bool assignment.
    """

    grammar = """
    S: "1" first?=some_match "3";
    some_match: "2";
    """

    g = Grammar.from_string(grammar)
    assert assignment_in_productions(g.productions, 'S', 'first')

    called = [False]

    def act_s(_, nodes, first):
        called[0] = True
        assert first is True
        return nodes

    actions = {"S": act_s}

    p = Parser(g, actions=actions)

    input_str = '1 2 3'

    result = p.parse(input_str)
    assert result == ["1", "2", "3"]
    assert called[0]
Ejemplo n.º 25
0
def test_obj_position():
    """
    Test that object start/end position is set properly.
    """
    grammar = r"""
    S: "first" seconds=Second+;
    Second: value=digits;

    terminals
    digits:/\d+/;
    """
    g = Grammar.from_string(grammar)
    parser = Parser(g)

    result = parser.parse("""
    first 45 56
    66 3434342
    """)

    n = result.seconds[1]
    assert n._pg_start_position == 14
    assert n._pg_end_position == 16

    n = result.seconds[3]
    assert n._pg_start_position == 24
    assert n._pg_end_position == 31
Ejemplo n.º 26
0
def test_repeatable_zero_or_more_with_separator():
    """
    Tests zero or more repeatable operator with separator.
    """

    grammar = """
    S: "2" b*[comma] "3";

    terminals
    b: "1";
    comma: ",";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_0_comma')

    p = Parser(g)

    input_str = '2 1, 1 , 1 3'
    result = p.parse(input_str)
    assert result == ["2", ["1", "1", "1"], "3"]

    input_str = '2 3'
    result = p.parse(input_str)
    assert result == ["2", [], "3"]
Ejemplo n.º 27
0
def test_highly_ambiguous_grammar():
    """
    This grammar has both Shift/Reduce and Reduce/Reduce conflicts and
    thus can't be parsed by a deterministic LR parsing.
    Shift/Reduce can be resolved by prefer_shifts strategy.
    """
    grammar = """
    S: "b" | S S | S S S;
    """

    g = Grammar.from_string(grammar)

    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    # S/R are resolved by selecting prefer_shifts strategy.
    # But R/R conflicts remain.
    with pytest.raises(RRConflicts):
        Parser(g, prefer_shifts=True)

    # GLR parser handles this fine.
    p = GLRParser(g, build_tree=True)

    # For three tokens we have 3 valid derivations/trees.
    results = p.parse("bbb")
    assert len(results) == 3

    # For 4 tokens we have 10 valid derivations.
    results = p.parse("bbbb")
    assert len(results) == 10
Ejemplo n.º 28
0
def test_repeatable_zero_or_more():
    """
    Tests zero or more repeatable operator.
    """

    grammar = """
    S: "2" b* "3";

    terminals
    b: "1";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_0')
    assert g.get_nonterminal('b_1')

    p = Parser(g)

    input_str = '2 1 1 1 3'
    result = p.parse(input_str)
    assert result == ["2", ["1", "1", "1"], "3"]

    input_str = '2 3'
    result = p.parse(input_str)
    assert result == ["2", [], "3"]
Ejemplo n.º 29
0
def test_nondeterministic_LR_raise_error():
    """Language of even length palindromes.

    This is a non-deterministic grammar and the language is non-ambiguous.

    If the string is a even length palindrome parser should reduce EMPTY at he
    middle of the string and start to reduce by A and B.

    LR parsing is deterministic so this grammar can't parse the input as the
    EMPTY reduction will be tried only after consuming all the input by
    implicit disambiguation strategy of favouring shifts over empty reductions.

    OTOH, GLR parser can handle this by forking parser at each step and trying
    both empty reductions and shifts. Only the parser that has reduced empty at
    the middle of the input will succeed.

    """
    grammar = """
    S: A | B | EMPTY;
    A: '1' S '1';
    B: '0' S '0';
    """

    g = Grammar.from_string(grammar)
    with pytest.raises(ParseError):
        p = Parser(g)
        p.parse('0101000110001010')

    p = GLRParser(g)
    results = p.parse('0101000110001010')

    assert len(results) == 1
Ejemplo n.º 30
0
def test_repeatable_one_or_more_with_separator():
    """
    Tests one or more repeatable operator with separator.
    """

    grammar = """
    S: "2" b+[comma] "3";

    terminals
    b: "1";
    comma: ",";
    """

    g = Grammar.from_string(grammar)
    assert g.get_nonterminal('b_1_comma')

    p = Parser(g)

    input_str = '2 1, 1 , 1 3'
    result = p.parse(input_str)
    assert result == ["2", ["1", "1", "1"], "3"]

    input_str = '2 3'
    with pytest.raises(ParseError) as e:
        p.parse(input_str)
    assert 'Expected: b' in str(e)