def optimise_subroutine_call(rules): r = rule_by_name(rules, "subroutine_call") Antlr4GenericOptimizer().optimize([ r, ]) c0 = Antlr4parser().from_str(""" ( class_qualifier | ( primary | implicit_class_handle ) DOT )? ( identifier ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? | array_method_name ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? ( KW_WITH LPAREN expression RPAREN )? | randomize_call ) """) assert r.body[0].eq_relaxed(c0), r.body[0] subroutine_call_args = Antlr4Rule( "subroutine_call_args", Antlr4parser().from_str(""" ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? ( KW_WITH LPAREN expression RPAREN )? """)) rules.insert(rules.index(r), subroutine_call_args) new_c0 = Antlr4parser().from_str(""" ( primary_no_cast_no_call | cast ) subroutine_call_args ( DOT ( array_method_name | randomize_call | primary_no_cast_no_call | cast ) subroutine_call_args )* """) r.body[0] = new_c0 primary = rule_by_name(rules, "primary") assert primary.body[0].eq_relaxed( Antlr4Symbol("primary_no_cast_no_call", False)) del primary.body[0] c2 = Antlr4parser().from_str(""" any_system_tf_identifier ( LPAREN ( list_of_arguments | data_type ( COMMA expression )? | expression ( COMMA ( expression )? )* ( COMMA ( clocking_event )? )? ) RPAREN )? """) assert r.body[2].eq_relaxed(c2) r.body[2] = Antlr4parser().from_str(""" any_system_tf_identifier ( LPAREN ( ( data_type )? list_of_arguments ( COMMA clocking_event )? ) RPAREN )? """) c1 = Antlr4parser().from_str(""" ps_or_hierarchical_identifier ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? """) assert r.body[1].eq_relaxed(c1), r.body[1] del r.body[1]
def fix_class_scope(rules): """ Because otherwise class_type consume last id after :: and it is not possible to recover """ r = rule_by_name(rules, "class_scope") _inline_rule([ r, ], rule_by_name(rules, "class_type"))
def handle_conditional_fn(bin_op_choices, current_expr_rule): # rm left recursion from cond_predicate/conditional_expression cond_predicate = rule_by_name(rules, "cond_predicate") conditional_expression = rule_by_name(rules, "conditional_expression") rules.remove(conditional_expression) _inline_rule([ conditional_expression, ], cond_predicate) bin_op_choices.append(Antlr4Sequence(conditional_expression.body[1:]))
def fix_lexer_for_table_def(p): # because OUTPUT_SYMBOL is a special case of LEVEL_SYMBOL OUTPUT_SYMBOL = Antlr4Symbol("OUTPUT_SYMBOL", False) def OUTPUT_SYMBOL_to_LEVEL_SYMBOL(o): if o == OUTPUT_SYMBOL: o.symbol = "LEVEL_SYMBOL" for r in p.rules: r.body.walk(OUTPUT_SYMBOL_to_LEVEL_SYMBOL) p.rules.remove(rule_by_name(p.rules, "OUTPUT_SYMBOL")) table_tokens = get_all_used_lexer_tokens(p.rules, "combinational_body") table_tokens2 = get_all_used_lexer_tokens(p.rules, "sequential_entry") table_tokens = table_tokens.union(table_tokens2) # [TODO] += comments, whitespaces table_tokens.remove("KW_TABLE") table_shared_tokens = { 'SEMI', 'RPAREN', 'COLON', 'LPAREN', 'MINUS', *COMMENT_AND_WS_TOKENS } wrap_in_lexer_mode(p.rules, "TABLE_MODE", { "KW_TABLE", }, { "KW_ENDTABLE", }, table_tokens, table_shared_tokens)
def split_rule(rules, rule_name, symbols_to_extract: List[str], subrule_name: str): """ Let only options which are starting with symbols from symbols_to_extract. Put the rest to a subrule. """ r = rule_by_name(rules, rule_name) assert isinstance(r.body, Antlr4Selection), r sub_options = Antlr4Selection([]) for o in r.body: start_symbols = set() _direct_left_corner(o, start_symbols, allow_eps_in_sel=True) if not start_symbols.intersection(symbols_to_extract): sub_options.append(o) r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)]) r.body.insert(0, Antlr4Symbol(subrule_name, False)) if len(r.body) == 1: r.body = r.body[0] assert len(sub_options) > 0 if len(sub_options) == 1: sub_options = sub_options[0] else: sub_options = Antlr4Selection(sub_options) sub_r = Antlr4Rule(subrule_name, sub_options) rules.insert(rules.index(r), sub_r) return sub_r
def iterate_everything_except_first(rules, rule_name): r = rule_by_name(rules, rule_name) if isinstance(r.body, Antlr4Sequence): _iterate_everything_except_first_and_replace_first( r.body, next(iter_non_visuals(r.body))) else: raise NotImplementedError()
def remove_simple_rule(name, p): r = rule_by_name(p.rules, name) assert r is not None, name body = r.body assert len(body) == 1, r assert isinstance(body[0], Antlr4Symbol) inline_rule(p.rules, name)
def numbers_add_whitespace_after_base(rules): number_rules = set([ "DECIMAL_NUMBER_WITH_BASE", "DECIMAL_INVALID_NUMBER_WITH_BASE", "DECIMAL_TRISTATE_NUMBER_WITH_BASE", "BINARY_NUMBER", "OCTAL_NUMBER", "HEX_NUMBER", ]) number_base_rules = set([ "DECIMAL_BASE", "BINARY_BASE", "OCTAL_BASE", "HEX_BASE", ]) # used only in integral_number inline_rule(rules, "decimal_number") def opt_ws(): return Antlr4Option(Antlr4Symbol("WHITE_SPACE", False)) Antlr4Option(Antlr4Symbol("UNSIGNED_NUMBER", False)), for r in rules: if r.name in number_rules: # ( SIZE )? *_BASE .... assert r.body[0].body.symbol == "SIZE", r assert r.body[1].symbol.endswith("_BASE"), r del r.body[0] r.is_fragment = True elif r.name in number_base_rules: # APOSTROPHE ( [sS] )? [dD]; r.body.insert(2, opt_ws()) r.body.insert(1, opt_ws()) r.body.append(opt_ws()) any_based_number = Antlr4Rule( "ANY_BASED_NUMBER", Antlr4Selection([Antlr4Symbol(n, False) for n in number_rules])) rules.insert(rules.index(rule_by_name(rules, "HEX_NUMBER")), any_based_number) integral_number = rule_by_name(rules, "integral_number") integral_number.body = Antlr4parser().from_str(""" ( UNSIGNED_NUMBER )? ANY_BASED_NUMBER | UNSIGNED_NUMBER """)
def optimize_action_block(rules): action_block = rule_by_name(rules, "action_block") assert action_block.body.eq_relaxed(Antlr4parser().from_str("( ( statement )? KW_ELSE )? statement_or_null")) action_block.body = Antlr4parser().from_str(""" ( attribute_instance )* SEMI | KW_ELSE statement_or_null | statement ( KW_ELSE statement_or_null )? """)
def fix_subroutine_call(rules): r = rule_by_name(rules, "subroutine_call") r.body.insert( 0, Antlr4Sequence([ Antlr4Option(Antlr4Symbol("class_qualifier", False)), Antlr4Symbol("method_call_body", False) ]))
def rm_ambiguity(rules): rule = rule_by_name(rules, "variable_decl_assignment") to_repl = Antlr4parser().from_str("( ASSIGN class_new )?") def match_replace_fn(o): if o == to_repl: return o.body replace_item_by_sequence(rule, match_replace_fn)
def _optimize_ps_parameter_identifier(rules): ps_parameter_identifier = rule_by_name(rules, "ps_parameter_identifier") # ( ( package_scope | class_scope )? | ( # identifier ( LSQUARE_BR constant_expression RSQUARE_BR )? DOT )* # ) identifier ps_parameter_identifier.body = Antlr4parser().from_str(""" package_or_class_scoped_id ( DOT identifier ( LSQUARE_BR constant_expression RSQUARE_BR )? )* """)
def left_recurse_remove(rules): """ Removing Left Recursion from Context-Free Grammars https://www.microsoft.com/en-us/research/wp-content/uploads/2000/04/naacl2k-proc-rev.pdf http://web.science.mq.edu.au/~mjohnson/papers/johnson-left-corner.pdf :note: supports the '|',?,* in rules """ # :note: higher priority = sooner in parse tree rules = optimise_selections(rules) direct_left_recurse_rm(rules, 'block_event_expression') direct_left_recurse_rm(rules, 'event_expression') # direct_left_recurse_rm(rules, 'constant_expression') solve_left_recurse_and_op_precedence_for_constant_expression(rules) # method_call_root - only in method_call # method_call - only in subroutine_call inline_rule(rules, "method_call") inline_rule(rules, "method_call_root") split_rule(rules, "primary", ["cast", "subroutine_call"], "primary_no_cast_no_call") split_rule(rules, "constant_primary", ["constant_cast", "subroutine_call"], "constant_primary_no_cast_no_call") # inline_rule(rules, "cast") # inline_rule(rules, "constant_cast") # iterate_everything_except_first( # rules, "cast") # iterate_everything_except_first( # rules, "constant_cast") # [TODO] check if really all combinations of cast/call are possible replace_symbol_in_rule(rules, "casting_type", "constant_primary", "constant_primary_no_cast_no_call") # solve expression - conditional_expression left recurse # copy cond_predicate inline_rule(rules, "inside_expression") subroutine_call_rm_lr(rules) inline_rule(rules, "module_path_conditional_expression") direct_left_recurse_rm(rules, 'module_path_expression') # inline_rule(rules, "inside_expression") inline_rule(rules, "expression_or_cond_pattern") inline_rule(rules, "cond_pattern") # inline_rule(rules, "conditional_expression") rules = optimise_selections(rules) solve_left_recurse_and_op_precedence_for_expression(rules) binary_operator = rule_by_name(rules, "binary_operator") rules.remove(binary_operator) return rules
def replace_symbol_in_rule(rules, rule_name, symbol_name, symbol_name_replace, only_first=False): rule = rule_by_name(rules, rule_name) _replace_symbol_in_rule(rule, symbol_name, symbol_name_replace, only_first=only_first)
def optimize_item_rules(rules): for r in ["package_or_generate_item_declaration", "module_or_generate_item", "module_or_generate_item_declaration", "module_common_item", "interface_or_generate_item", "checker_or_generate_item_declaration", ]: inline_rule(rules, r) generate_item = rule_by_name(rules, "generate_item") assert generate_item.body[-1].eq_relaxed(Antlr4Symbol("checker_or_generate_item", False)) generate_item.body[-1] = Antlr4parser().from_str("KW_RAND data_declaration") generate_item.body.append(Antlr4parser().from_str("program_generate_item"))
def fix_dpi_import_export(rules): C_IDENTIFIER = Antlr4Symbol("C_IDENTIFIER", False) def match_replace_fn(o): if o == C_IDENTIFIER: return Antlr4Selection( [C_IDENTIFIER, Antlr4Symbol("ESCAPED_IDENTIFIER", False)]) r = rule_by_name(rules, "dpi_import_export") replace_item_by_sequence(r.body, match_replace_fn)
def direct_left_recurse_rm(rules, rule_name): r = rule_by_name(rules, rule_name) if isinstance(r.body, Antlr4Selection): choices = r.body elif isinstance(r.body, Antlr4Sequence): choices = [ r.body, ] else: raise NotImplementedError() # find choices which starts with this rule non terminal lr_choices = [] for c in choices: if isinstance(c, Antlr4Sequence): first = next(iter_non_visuals(c)) if isinstance(first, Antlr4Symbol) and first.symbol == rule_name: lr_choices.append(c) else: raise NotImplementedError() # remove choices which are causing left recursion assert len(lr_choices) >= 1, rule_name for lr_choice in lr_choices: choices.remove(lr_choice) if len(choices) == 0: raise NotImplementedError() elif len(choices) == 1: r.body = choices[0] # renaame this rule to rule_item r_base_name = r.name + "_item" for _r in rules: assert r.name != r_base_name, r_base_name r.name = r_base_name # create new rule which will implement removed choices and also expands to rule_item choices_new = Antlr4Selection([]) for lr_choice in lr_choices: first = next(iter_non_visuals(lr_choice)) assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name repl = Antlr4Symbol(r_base_name, False) _iterate_everything_except_first_and_replace_first(lr_choice, repl) if not choices_new: lr_choice.insert(0, Antlr4Newline()) lr_choice.insert(1, Antlr4Indent(1)) choices_new.append(lr_choice) body_new = choices_new[0] if len(choices_new) == 1 else choices_new r_new = Antlr4Rule(rule_name, body_new) rules.insert(rules.index(r), r_new)
def rm_ambiguity(rules): rule = rule_by_name(rules, "variable_decl_assignment") to_repl = Antlr4Option( Antlr4Sequence( [Antlr4Symbol("ASSIGN", False), Antlr4Symbol("class_new", False)])) def match_replace_fn(o): if o == to_repl: return o.body replace_item_by_sequence(rule, match_replace_fn)
def rm_semi_from_cross_body_item(rules): """ Because SEMI is already part of cross_body_item """ rule = rule_by_name(rules, "cross_body") semi = Antlr4Symbol("SEMI", False) def match_replace_fn(o): if o == semi: return Antlr4Sequence([]) replace_item_by_sequence(rule.body[0], match_replace_fn)
def subroutine_call_rm_lr(rules): r = rule_by_name(rules, "subroutine_call") assert isinstance(r.body, Antlr4Selection) c = r.body[2] _body = list(iter_non_visuals(c)) assert _body[-1].symbol == "method_call_body", _body[-1].symbol start: Antlr4Selection = _body[0] start.clear() start.extend([ Antlr4Symbol("primary_no_cast_no_call", False), Antlr4Symbol("cast", False), Antlr4Symbol("implicit_class_handle", False) ])
def replace_same_rules(rules, rules_to_replace: List[str], replacement: str): r = None for name in rules_to_replace: _r = rule_by_name(rules, name) if r is None: r = _r else: assert r.body == _r.body or r.body.toAntlr4() == _r.body.toAntlr4(), (r, _r) rules.remove(_r) for rule in rules: for symbol_name in rules_to_replace: _replace_symbol_in_rule(rule, symbol_name, replacement, False)
def optimize_primary(rules): primary_no_cast_no_call = rule_by_name(rules, "primary_no_cast_no_call") def assert_eq(index, s): elm = Antlr4parser().from_str(s) assert (primary_no_cast_no_call.body[index].eq_relaxed(elm) ), primary_no_cast_no_call.body[index] assert_eq(5, "package_or_class_scoped_hier_id_with_const_select select") assert_eq(8, "let_expression") # is just call primary_no_cast_no_call.body[5] = Antlr4parser().from_str(""" package_or_class_scoped_hier_id_with_select """) del primary_no_cast_no_call.body[8]
def fix_implicit_data_type(rules): r = rule_by_name(rules, "implicit_data_type") # : (signing)? (packed_dimension)* # -> # : signing (packed_dimension)* # | (packed_dimension)+ # ; r.body = Antlr4Selection([ Antlr4Sequence([ Antlr4Symbol("signing", False), Antlr4Iteration(Antlr4Symbol("packed_dimension", False)) ]), Antlr4Iteration(Antlr4Symbol("packed_dimension", False), positive=True) ])
def selection_extract_common(rules, rule_name_a, rule_name_b, new_rule_name): """ a0: a b c b0: a b d -> new_rule_name: a b a0: new_rule_name c b0: new_rule_name d """ a = rule_by_name(rules, rule_name_a) b = rule_by_name(rules, rule_name_b) assert isinstance(a.body, Antlr4Sequence), a assert isinstance(b.body, Antlr4Sequence), b i = 0 for i, (_a, _b) in enumerate(zip(a.body, b.body)): if _a != _b: break assert i > 1 body = Antlr4Sequence(a.body[:i]) c = Antlr4Rule(new_rule_name, body) rules.insert(rules.index(a), c) a.body[:i] = [Antlr4Symbol(new_rule_name, False)] b.body[:i] = [Antlr4Symbol(new_rule_name, False)]
def wrap_in_lexer_mode(rules, mode_name, enter_tokens, exit_tokens, tokens, shared_tokens): for enter_token in enter_tokens: enter_rule = rule_by_name(rules, enter_token) enter_rule.lexer_actions.append(Antlr4LexerAction.pushMode(mode_name)) for t_name in sorted(tokens.union(shared_tokens)): t_rule = rule_by_name(rules, t_name) if t_name in shared_tokens: # copy the rule # translate mode specific token to a original token actions = deepcopy(t_rule.lexer_actions) if not Antlr4LexerAction.skip() in actions: actions.append(Antlr4LexerAction.type(t_name)) mode_specific_t_rule = Antlr4Rule(mode_name + "_" + t_name, deepcopy(t_rule.body), lexer_mode=mode_name, lexer_actions=actions) rules.append(mode_specific_t_rule) t_rule = mode_specific_t_rule t_rule.lexer_mode = mode_name if t_name in sorted(exit_tokens): t_rule.lexer_actions.append(Antlr4LexerAction.popMode())
def solve_left_recurse_and_op_precedence_for_constant_expression(rules): # constant_expression: # constant_primary # | unary_operator ( attribute_instance )* constant_primary # | constant_expression binary_operator ( attribute_instance )* constant_expression # | constant_expression QUESTIONMARK ( attribute_instance )* constant_expression COLON constant_expression; c_expression_0 = extract_option_as_rule(rules, "constant_expression", [0, 1], "constant_expression_0") # constant_expression_0: # constant_primary # | unary_operator ( attribute_instance )* constant_primary def handle_conditional_fn(bin_op_choices, current_expr_rule): bin_op_choices.extend([ Antlr4Symbol(current_expr_rule.name, False), Antlr4Iteration( Antlr4Sequence([ Antlr4Symbol("QUESTIONMARK", False), Antlr4Iteration(Antlr4Symbol("attribute_instance", False)), Antlr4Symbol("constant_expression", False), Antlr4Symbol("COLON", False), Antlr4Symbol("constant_expression", False), ])) ]) def handle_inside_fn(bin_op_choices, current_expr_rule): pass rules.remove(rule_by_name(rules, "constant_expression")) current_expr_rule = c_expression_0 op_group = get_operator_precedence_groups() for i, prec_group in enumerate(op_group): is_last = i == len(op_group) - 1 if is_last: new_rule_name = "constant_expression" else: new_rule_name = "constant_expression_%d" % (i + 1) current_expr_rule = extract_bin_ops(rules, current_expr_rule, prec_group, new_rule_name, handle_conditional_fn, handle_inside_fn)
def replace_and_rename_same(rules: List[Antlr4Rule], rules_to_replace: List[str], name_of_new_rule: str): r = None for name in rules_to_replace: _r = rule_by_name(rules, name) if r is None: r = _r else: assert r.body == _r.body or r.body.toAntlr4() == _r.body.toAntlr4(), (r, _r) rules.remove(_r) for rule in rules: assert r.name != name_of_new_rule, name_of_new_rule r.name = name_of_new_rule for rule in rules: for symbol_name in rules_to_replace: _replace_symbol_in_rule(rule, symbol_name, name_of_new_rule, False)
def move_iteration_up_in_parse_tree(rules, rule_name): r = rule_by_name(rules, rule_name) # remove ()* from the rule body if isinstance(r.body, Antlr4Sequence): assert len(r.body) == 1, r.body r.body = r.body[0] assert isinstance(r.body, Antlr4Iteration) and not r.body.positive r.body = r.body.body # wrap rule appearence in ()* r_symb = Antlr4Symbol(rule_name, False) def match_replace_fn(o): if o == r_symb: return Antlr4Iteration(o, positive=False) for r in rules: replace_item_by_sequence(r.body, match_replace_fn)
def extract_option_as_rule(rules, rule_name, options_i, new_rule_name): r = rule_by_name(rules, rule_name) assert isinstance(r.body, Antlr4Selection) new_body = Antlr4Selection([]) for i in options_i: new_body.append(r.body[i]) r.body[options_i[0]] = Antlr4Sequence( [Antlr4Symbol(new_rule_name, False), Antlr4Newline(), Antlr4Indent(1)]) r.body = Antlr4Selection( [x for i, x in enumerate(r.body) if i not in options_i[1:]]) if len(new_body) == 1: new_body = new_body[0] new_r = Antlr4Rule(new_rule_name, new_body) rules.insert(rules.index(r), new_r) return new_r
def replace_symbol_in_rule(rules, rule_name, symbol_name, symbol_name_replace, only_first=False): r = rule_by_name(rules, rule_name) class FirstFound(Exception): pass def renamer(obj): if isinstance(obj, Antlr4Symbol) and obj.symbol == symbol_name: obj.symbol = symbol_name_replace if only_first: raise FirstFound() try: r.walk(renamer) except FirstFound: pass