Example #1
0
def fix_class_scope(rules):
    """
    Because otherwise class_type consume last id after ::
    and it is not possible to recover
    """
    r = rule_by_name(rules, "class_scope")
    _inline_rule([
        r,
    ], rule_by_name(rules, "class_type"))
Example #2
0
def split_rule(rules, rule_name, symbols_to_extract: List[str], subrule_name: str):
    """
    Let only options which are starting with symbols from symbols_to_extract.
    Put the rest to a subrule.
    """
    r = rule_by_name(rules, rule_name)

    assert isinstance(r.body, Antlr4Selection), r

    sub_options = Antlr4Selection([])
    for o in r.body:
        start_symbols = set()
        _direct_left_corner(o, start_symbols, allow_eps_in_sel=True)
        if not start_symbols.intersection(symbols_to_extract):
            sub_options.append(o)
    r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)])
    r.body.insert(0, Antlr4Symbol(subrule_name, False))
    if len(r.body) == 1:
        r.body = r.body[0]

    assert len(sub_options) > 0
    if len(sub_options) == 1:
        sub_options = sub_options[0]
    else:
        sub_options = Antlr4Selection(sub_options)

    sub_r = Antlr4Rule(subrule_name, sub_options)
    rules.insert(rules.index(r), sub_r)
    return sub_r
Example #3
0
def iterate_everything_except_first(rules, rule_name):
    r = rule_by_name(rules, rule_name)
    if isinstance(r.body, Antlr4Sequence):
        _iterate_everything_except_first_and_replace_first(
            r.body, next(iter_non_visuals(r.body)))
    else:
        raise NotImplementedError()
Example #4
0
def left_recurse_remove(rules):
    """
    Removing Left Recursion from Context-Free Grammars
    https://www.microsoft.com/en-us/research/wp-content/uploads/2000/04/naacl2k-proc-rev.pdf

    http://web.science.mq.edu.au/~mjohnson/papers/johnson-left-corner.pdf

    :note: supports the '|',?,* in rules
    """
    # :note: higher priority = sooner in parse tree

    rules = optimise_selections(rules)
    direct_left_recurse_rm(rules, 'block_event_expression')
    direct_left_recurse_rm(rules, 'event_expression')
    # direct_left_recurse_rm(rules, 'constant_expression')
    solve_left_recurse_and_op_precedence_for_constant_expression(rules)
    # method_call_root - only in method_call
    # method_call      - only in subroutine_call
    inline_rule(rules, "method_call")
    inline_rule(rules, "method_call_root")
    split_rule(rules, "primary", ["cast", "subroutine_call"],
               "primary_no_cast_no_call")

    split_rule(rules, "constant_primary", ["constant_cast", "subroutine_call"],
               "constant_primary_no_cast_no_call")

    # inline_rule(rules, "cast")
    # inline_rule(rules, "constant_cast")
    #iterate_everything_except_first(
    #   rules, "cast")
    #iterate_everything_except_first(
    #   rules, "constant_cast")
    # [TODO] check if really all combinations of cast/call are possible
    replace_symbol_in_rule(rules, "casting_type", "constant_primary",
                           "constant_primary_no_cast_no_call")

    # solve expression - conditional_expression left recurse
    # copy cond_predicate

    inline_rule(rules, "inside_expression")
    subroutine_call_rm_lr(rules)

    inline_rule(rules, "module_path_conditional_expression")
    direct_left_recurse_rm(rules, 'module_path_expression')

    # inline_rule(rules, "inside_expression")
    inline_rule(rules, "expression_or_cond_pattern")
    inline_rule(rules, "cond_pattern")
    # inline_rule(rules, "conditional_expression")
    rules = optimise_selections(rules)

    solve_left_recurse_and_op_precedence_for_expression(rules)
    binary_operator = rule_by_name(rules, "binary_operator")
    rules.remove(binary_operator)

    return rules
Example #5
0
def rm_semi_from_cross_body_item(rules):
    """
    Because SEMI is already part of cross_body_item
    """
    rule = rule_by_name(rules, "cross_body")
    semi = Antlr4Symbol("SEMI", False)

    def match_replace_fn(o):
        if o == semi:
            return Antlr4Sequence([])

    replace_item_by_sequence(rule.body[0], match_replace_fn)
Example #6
0
def rm_ambiguity(rules):
    rule = rule_by_name(rules, "variable_decl_assignment")
    to_repl = Antlr4Option(
        Antlr4Sequence(
            [Antlr4Symbol("ASSIGN", False),
             Antlr4Symbol("class_new", False)]))

    def match_replace_fn(o):
        if o == to_repl:
            return o.body

    replace_item_by_sequence(rule, match_replace_fn)
Example #7
0
def direct_left_recurse_rm(rules, rule_name):
    r = rule_by_name(rules, rule_name)

    if isinstance(r.body, Antlr4Selection):
        choices = r.body
    elif isinstance(r.body, Antlr4Sequence):
        choices = [r.body, ]
    else:
        raise NotImplementedError()

    # find choices which starts with this rule non terminal
    lr_choices = []
    for c in choices:
        if isinstance(c, Antlr4Sequence):
            first = next(iter_non_visuals(c))
            if isinstance(first, Antlr4Symbol) and first.symbol == rule_name:
                lr_choices.append(c)
        else:
            raise NotImplementedError()

    # remove choices which are causing left recursion
    assert len(lr_choices) >= 1, rule_name
    for lr_choice in lr_choices:
        choices.remove(lr_choice)

    if len(choices) == 0:
        raise NotImplementedError()
    elif len(choices) == 1:
        r.body = choices[0]

    # renaame this rule to rule_item
    r_base_name = r.name + "_item"
    for _r in rules:
        assert r.name != r_base_name, r_base_name
    r.name = r_base_name

    # create new rule which will implement removed choices and also expands to rule_item
    choices_new = Antlr4Selection([])
    for lr_choice in lr_choices:
        first = next(iter_non_visuals(lr_choice))
        assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name
        repl = Antlr4Symbol(r_base_name, False)
        _iterate_everything_except_first_and_replace_first(lr_choice, repl)

        if not choices_new:
            lr_choice.insert(0, Antlr4Newline())
            lr_choice.insert(1, Antlr4Indent(1))
        choices_new.append(lr_choice)

    body_new = choices_new[0] if len(choices_new) == 1 else choices_new
    r_new = Antlr4Rule(rule_name, body_new)
    rules.insert(rules.index(r), r_new)
Example #8
0
def subroutine_call_rm_lr(rules):
    r = rule_by_name(rules, "subroutine_call")
    assert isinstance(r.body, Antlr4Selection)
    c = r.body[2]
    _body = list(iter_non_visuals(c))
    assert _body[-1].symbol == "method_call_body", _body[-1].symbol
    start: Antlr4Selection = _body[0]
    start.clear()
    start.extend([
        Antlr4Symbol("primary_no_cast_no_call", False),
        Antlr4Symbol("cast", False),
        Antlr4Symbol("implicit_class_handle", False)
    ])
Example #9
0
def wrap_in_lexer_mode(rules, mode_name, enter_tokens, exit_tokens, tokens,
                       shared_tokens):
    for enter_token in enter_tokens:
        enter_rule = rule_by_name(rules, enter_token)
        enter_rule.lexer_actions.append(Antlr4LexerAction.pushMode(mode_name))

    for t_name in sorted(tokens.union(shared_tokens)):
        t_rule = rule_by_name(rules, t_name)
        if t_name in shared_tokens:
            # copy the rule
            # translate mode specific token to a original token
            actions = deepcopy(t_rule.lexer_actions)
            if not Antlr4LexerAction.skip() in actions:
                actions.append(Antlr4LexerAction.type(t_name))
            mode_specific_t_rule = Antlr4Rule(mode_name + "_" + t_name,
                                              deepcopy(t_rule.body),
                                              lexer_mode=mode_name,
                                              lexer_actions=actions)
            rules.append(mode_specific_t_rule)
            t_rule = mode_specific_t_rule

        t_rule.lexer_mode = mode_name
        if t_name in sorted(exit_tokens):
            t_rule.lexer_actions.append(Antlr4LexerAction.popMode())
Example #10
0
def solve_left_recurse_and_op_precedence_for_constant_expression(rules):
    # constant_expression:
    #       constant_primary
    #       | unary_operator ( attribute_instance )* constant_primary
    #       | constant_expression binary_operator ( attribute_instance )* constant_expression
    #       | constant_expression QUESTIONMARK ( attribute_instance )* constant_expression COLON constant_expression;

    c_expression_0 = extract_option_as_rule(rules, "constant_expression",
                                            [0, 1], "constant_expression_0")

    # constant_expression_0:
    #       constant_primary
    #       | unary_operator ( attribute_instance )* constant_primary

    def handle_conditional_fn(bin_op_choices, current_expr_rule):
        bin_op_choices.extend([
            Antlr4Symbol(current_expr_rule.name, False),
            Antlr4Iteration(
                Antlr4Sequence([
                    Antlr4Symbol("QUESTIONMARK", False),
                    Antlr4Iteration(Antlr4Symbol("attribute_instance", False)),
                    Antlr4Symbol("constant_expression", False),
                    Antlr4Symbol("COLON", False),
                    Antlr4Symbol("constant_expression", False),
                ]))
        ])

    def handle_inside_fn(bin_op_choices, current_expr_rule):
        pass

    rules.remove(rule_by_name(rules, "constant_expression"))
    current_expr_rule = c_expression_0
    op_group = get_operator_precedence_groups()
    for i, prec_group in enumerate(op_group):
        is_last = i == len(op_group) - 1
        if is_last:
            new_rule_name = "constant_expression"
        else:
            new_rule_name = "constant_expression_%d" % (i + 1)
        current_expr_rule = extract_bin_ops(rules, current_expr_rule,
                                            prec_group, new_rule_name,
                                            "constant_expression",
                                            handle_conditional_fn,
                                            handle_inside_fn)
Example #11
0
def extract_option_as_rule(rules, rule_name, options_i, new_rule_name):
    r = rule_by_name(rules, rule_name)
    assert isinstance(r.body, Antlr4Selection)
    new_body = Antlr4Selection([])
    for i in options_i:
        new_body.append(r.body[i])

    r.body[options_i[0]] = Antlr4Sequence(
        [Antlr4Symbol(new_rule_name, False),
         Antlr4Newline(),
         Antlr4Indent(1)])
    r.body = Antlr4Selection(
        [x for i, x in enumerate(r.body) if i not in options_i[1:]])

    if len(new_body) == 1:
        new_body = new_body[0]

    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(r), new_r)
    return new_r
Example #12
0
def replace_symbol_in_rule(rules,
                           rule_name,
                           symbol_name,
                           symbol_name_replace,
                           only_first=False):
    r = rule_by_name(rules, rule_name)

    class FirstFound(Exception):
        pass

    def renamer(obj):
        if isinstance(obj, Antlr4Symbol) and obj.symbol == symbol_name:
            obj.symbol = symbol_name_replace
            if only_first:
                raise FirstFound()

    try:
        r.walk(renamer)
    except FirstFound:
        pass
Example #13
0
def get_all_used_lexer_tokens(rules, rule_name):
    tokens = set()
    seen = set()
    used_parser_rules = {
        rule_name,
    }

    def walk(obj: iAntlr4GramElem):
        if isinstance(obj, Antlr4Symbol) and not obj.symbol in seen:
            if obj.is_lexer_nonterminal():
                tokens.add(obj.symbol)
            elif not obj.is_terminal:
                used_parser_rules.add(obj.symbol)

    while used_parser_rules:
        r_name = used_parser_rules.pop()
        r = rule_by_name(rules, r_name)
        seen.add(r.name)
        r.walk(walk)

    return tokens
Example #14
0
    def handle_conditional_fn(bin_op_choices, current_expr_rule):
        # rm left recursion from cond_predicate/conditional_expression
        replace_symbol_in_rule(rules,
                               "conditional_expression",
                               "cond_predicate",
                               "cond_expr_predicate",
                               only_first=True)
        iterate_everything_except_first(rules, "conditional_expression")
        # create new cond_predicate (cond_expr_predicate) whout left recursion
        cond_predicate = rule_by_name(rules, "cond_predicate")
        cond_expr_predicate = deepcopy(cond_predicate)
        cond_expr_predicate.name = "cond_expr_predicate"
        rules.insert(rules.index(cond_predicate), cond_expr_predicate)
        replace_symbol_in_rule(rules,
                               "cond_expr_predicate",
                               "expression",
                               current_expr_rule.name,
                               only_first=True)

        bin_op_choices.extend([
            Antlr4Symbol(current_expr_rule.name, False),
            Antlr4Symbol("conditional_expression", False)
        ])
Example #15
0
def solve_left_recurse_and_op_precedence_for_expression(rules):
    # split_rule(rules, "expression",
    #            ["inside_expression"],
    #            "expression_no_inside")
    # replace_symbol_in_rule(
    #     rules, "inside_expression",
    #     "expression",
    #     "expression_no_inside")
    # iterate_everything_except_first(
    #    rules, "inside_expression")
    #
    # # cond_predicate starting with expression_no_conditional instead of expression
    # # expression_no_conditional
    # split_rule(rules, "expression_no_inside",
    #            ["conditional_expression"],
    #            "expression_no_conditional")

    # expression only from rules for highest precedence ops etc.

    # expression:
    #   primary
    #   | unary_operator ( attribute_instance )* primary
    #   | inc_or_dec_expression
    #   | LPAREN operator_assignment RPAREN
    #   | expression binary_operator ( attribute_instance )* expression
    #   | conditional_expression
    #   | expression KW_INSIDE LBRACE open_range_list RBRACE
    #   | tagged_union_expression;
    expression_0 = extract_option_as_rule(rules, "expression", [0, 1, 2, 3, 7],
                                          "expression_0")

    # expression:
    #   | expression binary_operator ( attribute_instance )* expression
    #   | conditional_expression
    #   | expression KW_INSIDE LBRACE open_range_list RBRACE;

    def handle_conditional_fn(bin_op_choices, current_expr_rule):
        # rm left recursion from cond_predicate/conditional_expression
        replace_symbol_in_rule(rules,
                               "conditional_expression",
                               "cond_predicate",
                               "cond_expr_predicate",
                               only_first=True)
        iterate_everything_except_first(rules, "conditional_expression")
        # create new cond_predicate (cond_expr_predicate) whout left recursion
        cond_predicate = rule_by_name(rules, "cond_predicate")
        cond_expr_predicate = deepcopy(cond_predicate)
        cond_expr_predicate.name = "cond_expr_predicate"
        rules.insert(rules.index(cond_predicate), cond_expr_predicate)
        replace_symbol_in_rule(rules,
                               "cond_expr_predicate",
                               "expression",
                               current_expr_rule.name,
                               only_first=True)

        bin_op_choices.extend([
            Antlr4Symbol(current_expr_rule.name, False),
            Antlr4Symbol("conditional_expression", False)
        ])

    def handle_inside_fn(bin_op_choices, current_expr_rule):
        bin_op_choices[-1].extend([Antlr4Newline(), Antlr4Indent(1)])
        # expression (KW_INSIDE LBRACE open_range_list RBRACE)*;
        bin_op_choice = Antlr4Sequence([
            Antlr4Symbol(current_expr_rule.name, False),
            Antlr4Iteration(
                Antlr4Sequence([
                    Antlr4Symbol("KW_INSIDE", False),
                    Antlr4Symbol("LBRACE", False),
                    Antlr4Symbol("open_range_list", False),
                    Antlr4Symbol("RBRACE", False),
                ]))
        ])
        bin_op_choices.append(bin_op_choice)

    rules.remove(rule_by_name(rules, "expression"))
    current_expr_rule = expression_0
    op_group = get_operator_precedence_groups()
    for i, prec_group in enumerate(op_group):
        is_last = i == len(op_group) - 1
        if is_last:
            new_rule_name = "expression"
        else:
            new_rule_name = "expression_%d" % (i + 1)
        current_expr_rule = extract_bin_ops(rules, current_expr_rule,
                                            prec_group, new_rule_name,
                                            "expression",
                                            handle_conditional_fn,
                                            handle_inside_fn)
Example #16
0
def remove_simple_rule(name, p):
    r = rule_by_name(p.rules, name)
    assert r is not None, name
    assert len(r.body) == 1, r
    assert isinstance(r.body[0], Antlr4Symbol)
    inline_rule(p.rules, name)
Example #17
0
def inline_rule(rules, rule_name):
    rule = rule_by_name(rules, rule_name)
    _inline_rule(rules, rule)
    rules.remove(rule)