Example #1
0
def test_non_eof_grammar_nonempty():
    """
    Grammar that is not anchored by EOF at the end might
    result in multiple trees that are produced by sucessful
    parses of the incomplete input.
    """
    grammar_nonempty = """
    Model: Prods;
    Prods: Prod | Prods Prod;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g_nonempty = Grammar.from_string(grammar_nonempty)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    p = GLRParser(g_nonempty, debug=True)
    results = p.parse(txt)
    # There is three succesful parses.
    # e.g. one would be the production 'First = One Two three Second' and the
    # parser could not continue as the next token is '=' but it succeds as
    # we haven't terminated our model with EOF so we allow partial parses.
    assert len(results) == 3
def test_epsilon_grammar():

    grammar = r"""
    Model: Prods EOF;
    Prods: Prod | Prods Prod | EMPTY;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;

    terminals
    ID: /\w+/;
    """

    g = Grammar.from_string(grammar)
    p = GLRParser(g, debug=True)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    results = p.parse(txt)
    assert len(results) == 1

    results = p.parse("")
    assert len(results) == 1
def test_glr_recovery_default():
    """
    Test default error recovery in GLR parsing. Default recovery should report
    the error, drop current input at position and try to recover.
    In case of multiple subsequent errouneous chars only one error should be
    reported.
    """
    parser = GLRParser(g, actions=actions, error_recovery=True)

    results = parser.parse('1 + 2 + * 3 & 89 - 5')

    assert len(parser.errors) == 2
    e1, e2 = parser.errors

    # First errors is '*' at position 8 and of length 2
    assert e1.location.start_position == 8
    assert e1.location.end_position == 10

    # Second error is '& 89' at position 12 and length 5
    assert e2.location.start_position == 12
    assert e2.location.end_position == 17

    # There are 5 trees for '1 + 2 + 3 - 5'
    # All results are the same
    assert len(results) == 5
    result_set = set([parser.call_actions(tree) for tree in results])
    assert len(result_set) == 1
    assert 1 in set(result_set)
Example #4
0
def test_non_eof_grammar_empty():
    """
    Grammar that is not anchored by EOF at the end might
    result in multiple trees that are produced by sucessful
    parses of the incomplete input.
    """
    grammar_empty = """
    Model: Prods;
    Prods: Prod | Prods Prod | EMPTY;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g_empty = Grammar.from_string(grammar_empty)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    p = GLRParser(g_empty, debug=True)

    results = p.parse(txt)
    assert len(results) == 3

    results = p.parse("")
    assert len(results) == 1
Example #5
0
def test_lr2_grammar():

    grammar = """
    Model: Prods EOF;
    Prods: Prod | Prods Prod;
    Prod: ID "=" ProdRefs;
    ProdRefs: ID | ProdRefs ID;
    ID: /\w+/;
    """

    g = Grammar.from_string(grammar)

    # This grammar is not LR(1) as it requires
    # at least two tokens of lookahead to decide
    # what to do on each ID from the right side.
    # If '=' is after ID than it should reduce "Prod"
    # else it should reduce ID as ProdRefs.
    with pytest.raises(SRConflicts):
        Parser(g)

    # But it can be parsed unambiguously by GLR.
    p = GLRParser(g)

    txt = """
    First = One Two three
    Second = Foo Bar
    Third = Baz
    """

    results = p.parse(txt)
    assert len(results) == 1
Example #6
0
def test_group_complex():
    grammar_str = r'''
    @obj
    s: (b c)*[comma];
    s: (b c)*[comma] a=(a+ (b | c)*)+[comma];
    terminals
    a: "a";
    b: "b";
    c: "c";
    comma: ",";
    '''
    grammar = Grammar.from_string(grammar_str)

    assert len(grammar.get_productions('s_g1')) == 1
    # B | C
    prods = grammar.get_productions('s_g3')
    assert len(prods) == 2
    assert prods[0].rhs[0].name == 'b'
    assert prods[1].rhs[0].name == 'c'

    # Nesting
    prods = grammar.get_productions('s_g2')
    assert len(prods) == 1
    assert prods[0].rhs[0].name == 'a_1'
    assert prods[0].rhs[1].name == 's_g3_0'
    assert grammar.get_productions('s')[1].rhs[1].name == 's_g2_1_comma'

    assert 's_g5' not in grammar

    parser = GLRParser(grammar)

    forest = parser.parse('b c, b c a a a b c c b, a b b')
    result = parser.call_actions(forest[0])
    assert result.a == [[['a', 'a', 'a'], ['b', 'c', 'c', 'b']],
                        [['a'], ['b', 'b']]]
Example #7
0
def test_cyclic_grammar_2():
    """
    From the paper: "GLR Parsing for e-Grammers" by Rahman Nozohoor-Farshi

    """
    grammar = """
    S: S S;
    S: 'x';
    S: EMPTY;
    """
    g = Grammar.from_string(grammar)

    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    p = GLRParser(g)
    results = p.parse('xx')

    # We have 11 valid solutions
    assert len(results) == 11
    expected = [
        ['x', 'x'],
        [[[], 'x'], 'x'],
        [[[], [[], 'x']], 'x'],
        ['x', [[], 'x']],
        [[[], 'x'], [[], 'x']],
        [[], ['x', 'x']],
        [[], [[], ['x', 'x']]],
        ['x', [[], 'x']],
        [[[], 'x'], [[], 'x']],
        [[[], [[], 'x']], [[], 'x']],
        [[], [[[], 'x'], 'x']]
    ]

    assert expected == results
Example #8
0
def test_glr_recovery_default():
    """
    Test default error recovery in GLR parsing. Default recovery should report
    the error, drop current input at position and try to recover.
    In case of multiple subsequent errouneous chars only one error should be
    reported.
    """
    parser = GLRParser(g, actions=actions, error_recovery=True, debug=True)

    results = parser.parse('1 + 2 + * 3 & 89 - 5')

    assert len(parser.errors) == 2
    e1, e2 = parser.errors

    # First errors is '*' at position 8 and of length 1
    assert e1.position == 8
    assert e1.length == 1

    # Second error is '& 89' at position 12 and lenght 4
    assert e2.position == 12
    assert e2.length == 4

    # There are 5 trees for '1 + 2 + 3 - 5'
    # All results are the same
    assert len(results) == 5
    assert len(set(results)) == 1
    assert 1 in set(results)
Example #9
0
def test_highly_ambiguous_grammar():
    """
    This grammar has both Shift/Reduce and Reduce/Reduce conflicts and
    thus can't be parsed by a deterministic LR parsing.
    Shift/Reduce can be resolved by prefer_shifts strategy.
    """
    grammar = """
    S: "b" | S S | S S S;
    """

    g = Grammar.from_string(grammar)

    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts=False)

    # S/R are resolved by selecting prefer_shifts strategy.
    # But R/R conflicts remain.
    with pytest.raises(RRConflicts):
        Parser(g, prefer_shifts=True)

    # GLR parser handles this fine.
    p = GLRParser(g, build_tree=True)

    # For three tokens we have 3 valid derivations/trees.
    results = p.parse("bbb")
    assert len(results) == 3

    # For 4 tokens we have 10 valid derivations.
    results = p.parse("bbbb")
    assert len(results) == 10
Example #10
0
def test_cyclic_grammar_3():
    """
    Grammar with indirect cycle.
    r:EMPTY->A ; r:A->S; r:EMPTY->A; r:SA->S; r:EMPTY->A; r:SA->S;...
    """
    grammar = """
    S: S A | A;
    A: "a" | EMPTY;
    """

    g = Grammar.from_string(grammar)

    # In this grammar we have 3 S/R conflicts where each reduction is EMPTY.
    # If we turn off prefer shifts over empty strategy in LR parser
    # we will get S/R conflict
    with pytest.raises(SRConflicts):
        Parser(g, prefer_shifts_over_empty=False)

    # By default there is no S/R conflict with prefer shifts over
    # empty strategy
    Parser(g)

    p = GLRParser(g)
    results = p.parse('aa')

    with pytest.raises(LoopError):
        len(results)
Example #11
0
def test_reduce_enough_empty():
    """
    In this unambiguous grammar parser must reduce as many empty A productions
    as there are "b" tokens ahead to be able to finish successfully, thus it
    needs unlimited lookahead

    Language is: xb^n, n>=0

    References:

    Nozohoor-Farshi, Rahman: "GLR Parsing for ε-Grammers", Generalized LR
    parsing, Springer, 1991.

    Rekers, Joan Gerard: "Parser generation for interactive environments",
    phD thesis, Universiteit van Amsterdam, 1992.

    """
    grammar = """
    S: A S "b";
    S: "x";
    A: EMPTY;
    """
    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("xbbb")

    assert len(results) == 1
Example #12
0
def test_issue31_glr_drop_parses_on_lexical_ambiguity():
    grammar = """
    model: element+ EOF;
    element: title
           | table_with_note
           | table_with_title;
    table_with_title: table_title table_with_note;
    table_with_note: table note*;

    terminals
    title: /title/;   // <-- This is lexically ambiguous with the next.
    table_title: /title/;
    table: "table";
    note: "note";
    """

    # this input should yield 4 parse trees.
    input = "title table title table"

    g = Grammar.from_string(grammar)
    parser = GLRParser(g, debug=True, debug_colors=True)
    results = parser.parse(input)

    # We should have 4 solutions for the input.
    assert len(results) == 4
Example #13
0
def test_prefer_shifts_no_sr_conflicts():
    """
    Test that grammar with S/R conflict will be resolved to SHIFT actions
    if prefer_shift option is used.
    """
    # This grammar has S/R conflict as B+ may consume multiple single "a" A
    # because "b" is optional. Thus, parser can't decide if it should shift "a"
    # or reduce by 'B: "b"? A+' and later by 'S: B+'; Most of the time we want
    # gready behavior so in case of doubt parser will choose shift if
    # prefer_shift is set to `True`. This means that the parser will first
    # consume all "a" using A+ and that reduce B at the end.
    grammar = r"""
    S: B+;
    B: "b"? A+;

    terminals
    A: "a";
    """
    g = Grammar.from_string(grammar)

    # There is a shift reduce conflict so we can't use LR parser.
    table = create_table(g)
    assert len(table.sr_conflicts) == 1

    # But we can eliminate conflict by prefer_shifts strategy.
    table = create_table(g, prefer_shifts=True)
    assert len(table.sr_conflicts) == 0

    # With prefer_shifts we get a greedy behavior
    input_str = 'b a a a b a a'
    output = [['b', ['a', 'a', 'a']], ['b', ['a', 'a']]]
    parser = Parser(g, prefer_shifts=True)
    result = parser.parse(input_str)
    assert result == output

    # GLR parser can parse without prefer_shifts strategy. This grammar is
    # ambiguous and yields 11 solutions for the given input.
    parser = GLRParser(g)
    results = parser.parse(input_str)
    expected = [
        [['b', ['a']], [None, ['a']], [None, ['a']], ['b', ['a', 'a']]],
        [['b', ['a', 'a']], [None, ['a']], ['b', ['a', 'a']]],
        [['b', ['a', 'a', 'a']], ['b', ['a', 'a']]],
        [['b', ['a']], [None, ['a', 'a']], ['b', ['a', 'a']]],
        [['b', ['a']], [None, ['a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a']], [None, ['a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a']], [None, ['a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a', 'a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a', 'a']], [None, ['a']], ['b', ['a']], [None, ['a']]],
        [['b', ['a']], [None, ['a']], [None, ['a']], ['b', ['a']], [None, ['a']]]  # noqa
    ]
    assert results == expected

    # But if `prefer_shift` is used we get only one solution
    parser = GLRParser(g, prefer_shifts=True)
    result = parser.parse(input_str)
    assert len(result) == 1
    assert result[0] == output
Example #14
0
def test_empty_terminal():
    g = Grammar.from_string("""
    a: a t | t;
    terminals
    t: /b*/;
    """)
    p = GLRParser(g)
    with pytest.raises(ParseError):
        p.parse("a")
Example #15
0
def test_glr_last_heads_in_error():

    grammar = get_grammar()
    p = GLRParser(grammar)

    with pytest.raises(ParseError) as e:
        p.parse("id+id*+id")

    assert len(e.value.last_heads) == 1
def test_positions_glr():
    """
    See https://github.com/igordejanovic/parglare/issues/110
    """
    g = Grammar.from_string(grammar)
    parser = GLRParser(g, build_tree=True)
    result = parser.parse(expression)

    assert result[0].start_position == 0
    assert result[0].end_position == 2
Example #17
0
def main(debug=False):
    this_folder = os.path.dirname(__file__)
    g = Grammar.from_file(os.path.join(this_folder, 'c.pg'),
                          re_flags=re.MULTILINE | re.VERBOSE)
    parser = GLRParser(g, debug=debug, debug_colors=True)

    # The input is C code after preprocessing
    forest = parser.parse_file(os.path.join(this_folder, 'example.c'))

    print('Solutions: ', len(forest))
    print('Ambiguities: ', forest.ambiguities)
Example #18
0
def parse(ctx, grammar_file, input_file, input, glr, recovery, dot, positions):
    if not (input_file or input):
        prints('Expected either input_file or input string.')
        sys.exit(1)
    colors = ctx.obj['colors']
    debug = ctx.obj['debug']
    prefer_shifts = ctx.obj['prefer_shifts']
    prefer_shifts_over_empty = ctx.obj['prefer_shifts_over_empty']
    grammar = Grammar.from_file(grammar_file, debug=debug, debug_colors=colors)
    if glr:
        parser = GLRParser(grammar,
                           debug=False,
                           debug_colors=colors,
                           error_recovery=recovery,
                           prefer_shifts=prefer_shifts,
                           prefer_shifts_over_empty=prefer_shifts_over_empty)
    else:
        parser = Parser(grammar,
                        build_tree=True,
                        debug=False,
                        debug_colors=colors,
                        error_recovery=recovery,
                        prefer_shifts=prefer_shifts,
                        prefer_shifts_over_empty=prefer_shifts_over_empty)

    if input:
        result = parser.parse(input)
    else:
        result = parser.parse_file(input_file)

    if glr:
        print(f'Solutions:{result.solutions}')
        print(f'Ambiguities:{result.ambiguities}')

    if recovery:
        print(f'Errors: {len(parser.errors)}')
        for error in parser.errors:
            print('\t', str(error))

    if glr and result.solutions > 1:
        print('Printing the forest:\n')
        result = result
    else:
        print('Printing the parse tree:\n')

    print(result.to_str())

    if dot:
        f_name = 'forest.dot' if glr and result.solutions > 1 else 'tree.dot'
        with open(f_name, 'w') as f:
            f.write(result.to_dot(positions))
        print('Created dot file ', f_name)
def test_regex_alternative_match_bug():
    """
    """

    grammar = """
    A: "Begin" Eq "End";

    terminals
    Eq: /=|EQ/;
    """
    g = Grammar.from_string(grammar)
    parser = GLRParser(g)
    parser.parse('Begin EQ End')
Example #20
0
def test_issue_112_fail_on_empty():

    grammar = r'''
    sentence1:              subordinateClause* clause sentenceEnd;

    subordinateClause:      clause clauseConnector;

    clause:                 singleNounPhrase* verbPhrase;

    // ----------
    singleNounPhrase:       determiner? simpleNoun;
    // ----------

    verbPhrase:             simpleVerb verbSuffix* predicateEndingSuffix?;

    terminals
        sentenceEnd:            /[^:]+:(SF);/;
        clauseConnector:        /[^:]+:(EC|CCF|CCMOD|CCNOM);/;
        determiner:             /[^:]+:(MM);/;
        simpleNoun:             /[^:]+:(NNG|NNP|NNB|NR|SL|NP|SN);/;
        simpleVerb:             /[^:]+:(VV|VVD|VHV);/;
        verbSuffix:             /[^:]+:(EP|TNS);/;
        predicateEndingSuffix:  /[^:]+:(SEF|EF);/;
    '''

    g = Grammar.from_string(grammar)
    parser = GLRParser(g, build_tree=True)

    results = parser.parse('자전거:NNG; 있:VV; 어요:SEF; .:SF;')

    expected = r'''
sentence1[0->28]
subordinateClause_0[0->0]
clause[0->22]
  singleNounPhrase_0[0->8]
    singleNounPhrase_1[0->8]
      singleNounPhrase[0->8]
        determiner_opt[0->0]
        simpleNoun[0->8, "자전거:NNG;"]
  verbPhrase[9->22]
    simpleVerb[9->14, "있:VV;"]
    verbSuffix_0[15->15]
    predicateEndingSuffix_opt[15->22]
      predicateEndingSuffix[15->22, "어요:SEF;"]
sentenceEnd[23->28, ".:SF;"]

'''

    assert len(results) == 1
    assert results[0].tree_str().strip() == expected.strip()
Example #21
0
def test_glr_list_building_bug():
    """Test regression for a bug in building lists from default `collect` actions.

    """
    grammar = r"""
        S: B+;
        B: "b"? A+;
        A: "a";
    """
    g = Grammar.from_string(grammar)
    parser = GLRParser(g)
    result = parser.parse('b a b a a a')
    assert len(result) == 1
    assert result[0] == [['b', ['a']], ['b', ['a', 'a', 'a']]]
def test_glr_recovery_custom_unsuccessful():
    """
    Test unsuccessful error recovery.
    """
    def custom_recovery(head, error):
        return False

    parser = GLRParser(g, actions=actions, error_recovery=custom_recovery)

    with pytest.raises(ParseError) as e:
        parser.parse('1 + 5 8 - 2')

    error = e.value
    assert error.location.start_position == 6
def test_cyclic_grammar_3():
    grammar = """
    S: S A | A;
    A: "a" | EMPTY;
    """

    g = Grammar.from_string(grammar)

    Parser(g)

    p = GLRParser(g, debug=True)
    results = p.parse('aa')

    assert len(results) == 1
Example #24
0
    def _setup_parser(self):
        """Setup parser."""
        file_path = os.path.realpath(os.path.dirname(__file__))
        root_path = os.path.split(os.path.abspath(os.path.join(file_path)))[0]
        grammar_path = os.path.join(root_path, "cparser", "cgrammar.pg")

        grammar = Grammar.from_file(grammar_path)

        def typedef_filter(context, action, subresults):
            """Filter for dynamic disambiguation

            Solves problems with typedef_name disambiguation. Whenever the
            REDUCE is called on typedef_name rule, we first check if the
            ID that is trying to be reduced is actually a user-defined type
            (struct, union, typedef). If yes, than the REDUCE will be called.

            """
            if action is None:
                return

            production = context.production

            if action is REDUCE and production.symbol.fqn == "typedef_name":

                var_name = subresults[0].value
                if var_name not in self.user_defined_types:
                    return False

            if action is REDUCE and production.symbol.fqn == "primary_exp":
                child = subresults[0]
                if child.symbol.fqn == "id":
                    if child.value in self.user_defined_types:
                        return False

            if action is REDUCE and production.symbol.fqn == "iteration_stat":
                if isrule(subresults[2], "decl_body"):
                    init_declarator_list_opt = subresults[2].children[1]
                    if len(init_declarator_list_opt.children) == 0:
                        return False

            return True

        self._glr = GLRParser(grammar,
                              build_tree=True,
                              call_actions_during_tree_build=True,
                              dynamic_filter=typedef_filter,
                              actions=self._setup_actions(),
                              ws='\n\r\t ')
Example #25
0
def test_nops():
    """
    Test that nops (no prefer shifts) will honored per rule.
    """
    grammar = """
    Program: "begin"
             statements=Statements
             ProgramEnd EOF;
    Statements: Statements1 | EMPTY;
    Statements1: Statements1 Statement | Statement;
    ProgramEnd: End;
    Statement: End "transaction" | "command";
    End: "end";
    """

    g = Grammar.from_string(grammar, ignore_case=True)
    parser = GLRParser(g, build_tree=True, prefer_shifts=True)

    # Here we have "end transaction" which is a statement and "end" which
    # finish program. Prefer shift strategy will make parser always choose to
    # shift "end" in anticipation of "end transaction" statement instead of
    # reducing by "Statements" and finishing.
    with pytest.raises(ParseError):
        parser.parse("""
        begin
            command
            end transaction
            command
            end transaction
            command
        end
        """)

    # When {nops} is used, GLR parser will investigate both possibilities at
    # this place and find the correct interpretation while still using
    # prefer_shift strategy globaly.
    grammar = """
    Program: "begin"
             statements=Statements
             ProgramEnd EOF;
    Statements: Statements1 {nops} | EMPTY;
    Statements1: Statements1 Statement | Statement;
    ProgramEnd: End;
    Statement: End "transaction" | "command";
    End: "end";
    """

    g = Grammar.from_string(grammar, ignore_case=True)
    parser = GLRParser(g, build_tree=True, prefer_shifts=True)
    parser.parse("""
    begin
        command
        end transaction
        command
        end transaction
        command
    end
    """)
Example #26
0
def test_nondeterministic_LR_raise_error():
    """Language of even length palindromes.

    This is a non-deterministic grammar and the language is non-ambiguous.

    If the string is a even length palindrome parser should reduce EMPTY at he
    middle of the string and start to reduce by A and B.

    LR parsing is deterministic so this grammar can't parse the input as the
    EMPTY reduction will be tried only after consuming all the input by
    implicit disambiguation strategy of favouring shifts over empty reductions.

    OTOH, GLR parser can handle this by forking parser at each step and trying
    both empty reductions and shifts. Only the parser that has reduced empty at
    the middle of the input will succeed.

    """
    grammar = """
    S: A | B | EMPTY;
    A: '1' S '1';
    B: '0' S '0';
    """

    g = Grammar.from_string(grammar)
    with pytest.raises(ParseError):
        p = Parser(g)
        p.parse('0101000110001010')

    p = GLRParser(g)
    results = p.parse('0101000110001010')

    assert len(results) == 1
Example #27
0
def test_number_of_trees():
    """
    Test that number_of_trees is correctly calculated.
    """
    g = Grammar.from_string(r"""
    E: E '+' E | E '-' E | number;
    terminals
    number: /\d+/;
    """)

    p = GLRParser(g, build_tree=True)

    results = p.parse('1 + 2 + 3 - 7')
    assert len(results) == 5
    assert all([p.context.head == results[0].context.head for p in results])
    assert results[0].context.head.number_of_trees == 5
Example #28
0
def test_g7():
    """
    Grammar G7 from: Nozohoor-Farshi, Rahman: "GLR Parsing for ε-Grammers"
    """
    grammar = """
    S: "a" S "a" | B S "b" | C S "c" | "x";
    B: "a";
    C: "a";
    """

    g = Grammar.from_string(grammar)

    p = GLRParser(g)
    results = p.parse("aaaaaaaaxbbcaacaa")

    assert len(results) == 1
Example #29
0
def parser():
    grammar = r"""
    E: E "+" E | E "*" E | "(" E ")" | Number;
    terminals
    Number: /\d+/;
    """
    return GLRParser(Grammar.from_string(grammar))
Example #30
0
def test_ambiguous_glr():
    grammar = r"""
    E: E '+' E
     | E '*' E
     | number;

    terminals
    number: /\d+(\.\d+)?/;
    """
    g = Grammar.from_string(grammar)
    parser = GLRParser(g)

    with pytest.raises(ParseError) as e:
        parser.parse("1 + 2 * 3 / 5")

    assert e.value.location.start_position == 10
    assert 'number' in [s.name for s in e.value.symbols_before]