def fix_class_scope(rules): """ Because otherwise class_type consume last id after :: and it is not possible to recover """ r = rule_by_name(rules, "class_scope") _inline_rule([ r, ], rule_by_name(rules, "class_type"))
def split_rule(rules, rule_name, symbols_to_extract: List[str], subrule_name: str): """ Let only options which are starting with symbols from symbols_to_extract. Put the rest to a subrule. """ r = rule_by_name(rules, rule_name) assert isinstance(r.body, Antlr4Selection), r sub_options = Antlr4Selection([]) for o in r.body: start_symbols = set() _direct_left_corner(o, start_symbols, allow_eps_in_sel=True) if not start_symbols.intersection(symbols_to_extract): sub_options.append(o) r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)]) r.body.insert(0, Antlr4Symbol(subrule_name, False)) if len(r.body) == 1: r.body = r.body[0] assert len(sub_options) > 0 if len(sub_options) == 1: sub_options = sub_options[0] else: sub_options = Antlr4Selection(sub_options) sub_r = Antlr4Rule(subrule_name, sub_options) rules.insert(rules.index(r), sub_r) return sub_r
def iterate_everything_except_first(rules, rule_name): r = rule_by_name(rules, rule_name) if isinstance(r.body, Antlr4Sequence): _iterate_everything_except_first_and_replace_first( r.body, next(iter_non_visuals(r.body))) else: raise NotImplementedError()
def left_recurse_remove(rules): """ Removing Left Recursion from Context-Free Grammars https://www.microsoft.com/en-us/research/wp-content/uploads/2000/04/naacl2k-proc-rev.pdf http://web.science.mq.edu.au/~mjohnson/papers/johnson-left-corner.pdf :note: supports the '|',?,* in rules """ # :note: higher priority = sooner in parse tree rules = optimise_selections(rules) direct_left_recurse_rm(rules, 'block_event_expression') direct_left_recurse_rm(rules, 'event_expression') # direct_left_recurse_rm(rules, 'constant_expression') solve_left_recurse_and_op_precedence_for_constant_expression(rules) # method_call_root - only in method_call # method_call - only in subroutine_call inline_rule(rules, "method_call") inline_rule(rules, "method_call_root") split_rule(rules, "primary", ["cast", "subroutine_call"], "primary_no_cast_no_call") split_rule(rules, "constant_primary", ["constant_cast", "subroutine_call"], "constant_primary_no_cast_no_call") # inline_rule(rules, "cast") # inline_rule(rules, "constant_cast") #iterate_everything_except_first( # rules, "cast") #iterate_everything_except_first( # rules, "constant_cast") # [TODO] check if really all combinations of cast/call are possible replace_symbol_in_rule(rules, "casting_type", "constant_primary", "constant_primary_no_cast_no_call") # solve expression - conditional_expression left recurse # copy cond_predicate inline_rule(rules, "inside_expression") subroutine_call_rm_lr(rules) inline_rule(rules, "module_path_conditional_expression") direct_left_recurse_rm(rules, 'module_path_expression') # inline_rule(rules, "inside_expression") inline_rule(rules, "expression_or_cond_pattern") inline_rule(rules, "cond_pattern") # inline_rule(rules, "conditional_expression") rules = optimise_selections(rules) solve_left_recurse_and_op_precedence_for_expression(rules) binary_operator = rule_by_name(rules, "binary_operator") rules.remove(binary_operator) return rules
def rm_semi_from_cross_body_item(rules): """ Because SEMI is already part of cross_body_item """ rule = rule_by_name(rules, "cross_body") semi = Antlr4Symbol("SEMI", False) def match_replace_fn(o): if o == semi: return Antlr4Sequence([]) replace_item_by_sequence(rule.body[0], match_replace_fn)
def rm_ambiguity(rules): rule = rule_by_name(rules, "variable_decl_assignment") to_repl = Antlr4Option( Antlr4Sequence( [Antlr4Symbol("ASSIGN", False), Antlr4Symbol("class_new", False)])) def match_replace_fn(o): if o == to_repl: return o.body replace_item_by_sequence(rule, match_replace_fn)
def direct_left_recurse_rm(rules, rule_name): r = rule_by_name(rules, rule_name) if isinstance(r.body, Antlr4Selection): choices = r.body elif isinstance(r.body, Antlr4Sequence): choices = [r.body, ] else: raise NotImplementedError() # find choices which starts with this rule non terminal lr_choices = [] for c in choices: if isinstance(c, Antlr4Sequence): first = next(iter_non_visuals(c)) if isinstance(first, Antlr4Symbol) and first.symbol == rule_name: lr_choices.append(c) else: raise NotImplementedError() # remove choices which are causing left recursion assert len(lr_choices) >= 1, rule_name for lr_choice in lr_choices: choices.remove(lr_choice) if len(choices) == 0: raise NotImplementedError() elif len(choices) == 1: r.body = choices[0] # renaame this rule to rule_item r_base_name = r.name + "_item" for _r in rules: assert r.name != r_base_name, r_base_name r.name = r_base_name # create new rule which will implement removed choices and also expands to rule_item choices_new = Antlr4Selection([]) for lr_choice in lr_choices: first = next(iter_non_visuals(lr_choice)) assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name repl = Antlr4Symbol(r_base_name, False) _iterate_everything_except_first_and_replace_first(lr_choice, repl) if not choices_new: lr_choice.insert(0, Antlr4Newline()) lr_choice.insert(1, Antlr4Indent(1)) choices_new.append(lr_choice) body_new = choices_new[0] if len(choices_new) == 1 else choices_new r_new = Antlr4Rule(rule_name, body_new) rules.insert(rules.index(r), r_new)
def subroutine_call_rm_lr(rules): r = rule_by_name(rules, "subroutine_call") assert isinstance(r.body, Antlr4Selection) c = r.body[2] _body = list(iter_non_visuals(c)) assert _body[-1].symbol == "method_call_body", _body[-1].symbol start: Antlr4Selection = _body[0] start.clear() start.extend([ Antlr4Symbol("primary_no_cast_no_call", False), Antlr4Symbol("cast", False), Antlr4Symbol("implicit_class_handle", False) ])
def wrap_in_lexer_mode(rules, mode_name, enter_tokens, exit_tokens, tokens, shared_tokens): for enter_token in enter_tokens: enter_rule = rule_by_name(rules, enter_token) enter_rule.lexer_actions.append(Antlr4LexerAction.pushMode(mode_name)) for t_name in sorted(tokens.union(shared_tokens)): t_rule = rule_by_name(rules, t_name) if t_name in shared_tokens: # copy the rule # translate mode specific token to a original token actions = deepcopy(t_rule.lexer_actions) if not Antlr4LexerAction.skip() in actions: actions.append(Antlr4LexerAction.type(t_name)) mode_specific_t_rule = Antlr4Rule(mode_name + "_" + t_name, deepcopy(t_rule.body), lexer_mode=mode_name, lexer_actions=actions) rules.append(mode_specific_t_rule) t_rule = mode_specific_t_rule t_rule.lexer_mode = mode_name if t_name in sorted(exit_tokens): t_rule.lexer_actions.append(Antlr4LexerAction.popMode())
def solve_left_recurse_and_op_precedence_for_constant_expression(rules): # constant_expression: # constant_primary # | unary_operator ( attribute_instance )* constant_primary # | constant_expression binary_operator ( attribute_instance )* constant_expression # | constant_expression QUESTIONMARK ( attribute_instance )* constant_expression COLON constant_expression; c_expression_0 = extract_option_as_rule(rules, "constant_expression", [0, 1], "constant_expression_0") # constant_expression_0: # constant_primary # | unary_operator ( attribute_instance )* constant_primary def handle_conditional_fn(bin_op_choices, current_expr_rule): bin_op_choices.extend([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration( Antlr4Sequence([ Antlr4Symbol("QUESTIONMARK", False), Antlr4Iteration(Antlr4Symbol("attribute_instance", False)), Antlr4Symbol("constant_expression", False), Antlr4Symbol("COLON", False), Antlr4Symbol("constant_expression", False), ])) ]) def handle_inside_fn(bin_op_choices, current_expr_rule): pass rules.remove(rule_by_name(rules, "constant_expression")) current_expr_rule = c_expression_0 op_group = get_operator_precedence_groups() for i, prec_group in enumerate(op_group): is_last = i == len(op_group) - 1 if is_last: new_rule_name = "constant_expression" else: new_rule_name = "constant_expression_%d" % (i + 1) current_expr_rule = extract_bin_ops(rules, current_expr_rule, prec_group, new_rule_name, "constant_expression", handle_conditional_fn, handle_inside_fn)
def extract_option_as_rule(rules, rule_name, options_i, new_rule_name): r = rule_by_name(rules, rule_name) assert isinstance(r.body, Antlr4Selection) new_body = Antlr4Selection([]) for i in options_i: new_body.append(r.body[i]) r.body[options_i[0]] = Antlr4Sequence( [Antlr4Symbol(new_rule_name, False), Antlr4Newline(), Antlr4Indent(1)]) r.body = Antlr4Selection( [x for i, x in enumerate(r.body) if i not in options_i[1:]]) if len(new_body) == 1: new_body = new_body[0] new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(r), new_r) return new_r
def replace_symbol_in_rule(rules, rule_name, symbol_name, symbol_name_replace, only_first=False): r = rule_by_name(rules, rule_name) class FirstFound(Exception): pass def renamer(obj): if isinstance(obj, Antlr4Symbol) and obj.symbol == symbol_name: obj.symbol = symbol_name_replace if only_first: raise FirstFound() try: r.walk(renamer) except FirstFound: pass
def get_all_used_lexer_tokens(rules, rule_name): tokens = set() seen = set() used_parser_rules = { rule_name, } def walk(obj: iAntlr4GramElem): if isinstance(obj, Antlr4Symbol) and not obj.symbol in seen: if obj.is_lexer_nonterminal(): tokens.add(obj.symbol) elif not obj.is_terminal: used_parser_rules.add(obj.symbol) while used_parser_rules: r_name = used_parser_rules.pop() r = rule_by_name(rules, r_name) seen.add(r.name) r.walk(walk) return tokens
def handle_conditional_fn(bin_op_choices, current_expr_rule): # rm left recursion from cond_predicate/conditional_expression replace_symbol_in_rule(rules, "conditional_expression", "cond_predicate", "cond_expr_predicate", only_first=True) iterate_everything_except_first(rules, "conditional_expression") # create new cond_predicate (cond_expr_predicate) whout left recursion cond_predicate = rule_by_name(rules, "cond_predicate") cond_expr_predicate = deepcopy(cond_predicate) cond_expr_predicate.name = "cond_expr_predicate" rules.insert(rules.index(cond_predicate), cond_expr_predicate) replace_symbol_in_rule(rules, "cond_expr_predicate", "expression", current_expr_rule.name, only_first=True) bin_op_choices.extend([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Symbol("conditional_expression", False) ])
def solve_left_recurse_and_op_precedence_for_expression(rules): # split_rule(rules, "expression", # ["inside_expression"], # "expression_no_inside") # replace_symbol_in_rule( # rules, "inside_expression", # "expression", # "expression_no_inside") # iterate_everything_except_first( # rules, "inside_expression") # # # cond_predicate starting with expression_no_conditional instead of expression # # expression_no_conditional # split_rule(rules, "expression_no_inside", # ["conditional_expression"], # "expression_no_conditional") # expression only from rules for highest precedence ops etc. # expression: # primary # | unary_operator ( attribute_instance )* primary # | inc_or_dec_expression # | LPAREN operator_assignment RPAREN # | expression binary_operator ( attribute_instance )* expression # | conditional_expression # | expression KW_INSIDE LBRACE open_range_list RBRACE # | tagged_union_expression; expression_0 = extract_option_as_rule(rules, "expression", [0, 1, 2, 3, 7], "expression_0") # expression: # | expression binary_operator ( attribute_instance )* expression # | conditional_expression # | expression KW_INSIDE LBRACE open_range_list RBRACE; def handle_conditional_fn(bin_op_choices, current_expr_rule): # rm left recursion from cond_predicate/conditional_expression replace_symbol_in_rule(rules, "conditional_expression", "cond_predicate", "cond_expr_predicate", only_first=True) iterate_everything_except_first(rules, "conditional_expression") # create new cond_predicate (cond_expr_predicate) whout left recursion cond_predicate = rule_by_name(rules, "cond_predicate") cond_expr_predicate = deepcopy(cond_predicate) cond_expr_predicate.name = "cond_expr_predicate" rules.insert(rules.index(cond_predicate), cond_expr_predicate) replace_symbol_in_rule(rules, "cond_expr_predicate", "expression", current_expr_rule.name, only_first=True) bin_op_choices.extend([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Symbol("conditional_expression", False) ]) def handle_inside_fn(bin_op_choices, current_expr_rule): bin_op_choices[-1].extend([Antlr4Newline(), Antlr4Indent(1)]) # expression (KW_INSIDE LBRACE open_range_list RBRACE)*; bin_op_choice = Antlr4Sequence([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration( Antlr4Sequence([ Antlr4Symbol("KW_INSIDE", False), Antlr4Symbol("LBRACE", False), Antlr4Symbol("open_range_list", False), Antlr4Symbol("RBRACE", False), ])) ]) bin_op_choices.append(bin_op_choice) rules.remove(rule_by_name(rules, "expression")) current_expr_rule = expression_0 op_group = get_operator_precedence_groups() for i, prec_group in enumerate(op_group): is_last = i == len(op_group) - 1 if is_last: new_rule_name = "expression" else: new_rule_name = "expression_%d" % (i + 1) current_expr_rule = extract_bin_ops(rules, current_expr_rule, prec_group, new_rule_name, "expression", handle_conditional_fn, handle_inside_fn)
def remove_simple_rule(name, p): r = rule_by_name(p.rules, name) assert r is not None, name assert len(r.body) == 1, r assert isinstance(r.body[0], Antlr4Symbol) inline_rule(p.rules, name)
def inline_rule(rules, rule_name): rule = rule_by_name(rules, rule_name) _inline_rule(rules, rule) rules.remove(rule)