def add_comments_and_ws(rules): # ONE_LINE_COMMENT: '//' .*? '\\r'? '\\n' -> channel(HIDDEN); olc = Antlr4Rule("ONE_LINE_COMMENT", Antlr4Sequence([ Antlr4Symbol("//", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Option(Antlr4Symbol("\r", True)), Antlr4Symbol("\n", True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(olc) # BLOCK_COMMENT: '/*' .*? '*/' -> channel (HIDDEN); bc = Antlr4Rule("BLOCK_COMMENT", Antlr4Sequence([ Antlr4Symbol("/*", True), Antlr4Symbol(".*?", True, is_regex=True), Antlr4Symbol("*/", True), ]), lexer_actions=[Antlr4LexerAction.channel("HIDDEN")]) rules.append(bc) # WHITE_SPACE: [ \\t\\n\\r] + -> skip; ws = Antlr4Rule("WHITE_SPACE", Antlr4Sequence([ Antlr4Symbol("[ \\t\\n\\r] +", True, is_regex=True), ]), lexer_actions=[Antlr4LexerAction.skip()]) rules.append(ws)
def parse_element_optional(self, ctx: Element_optionalContext) -> Antlr4Option: """ element_optional: '[' element ']'; """ body = self.parse_element(ctx.element()) return Antlr4Option(body)
def rm_ambiguity(rules): rule = rule_by_name(rules, "variable_decl_assignment") to_repl = Antlr4Option( Antlr4Sequence( [Antlr4Symbol("ASSIGN", False), Antlr4Symbol("class_new", False)])) def match_replace_fn(o): if o == to_repl: return o.body replace_item_by_sequence(rule, match_replace_fn)
def match_replace_fn(o): if isinstance(o, Antlr4Selection): potential_prefix = None potential_prefix_i = None to_remove = [] for i, c in enumerate(o): if potential_prefix is None: potential_prefix = c potential_prefix_i = i else: # check if the potential_prefix is really a prefix of this rule is_prefix, suffix = is_prefix_of_elem(potential_prefix, c) if is_prefix: # put suffix as a optional to a prefix if list(iter_non_visuals(suffix)): if not isinstance(potential_prefix, Antlr4Sequence): assert o[potential_prefix_i] is potential_prefix potential_prefix = Antlr4Sequence([ potential_prefix, ]) o[potential_prefix_i] = potential_prefix if len(suffix) == 1: suffix = suffix[0] else: suffix = Antlr4Sequence(suffix) potential_prefix.append(Antlr4Option(suffix)) to_remove.append(c) potential_prefix = None potential_prefix_i = None modified = True else: potential_prefix = c potential_prefix_i = i for c in to_remove: o.remove(c) if len(o) == 1: return Antlr4Sequence([ o[0], ])
def match_replace_fn(o): if isinstance(o, Antlr4Selection): non_optional_items = [] for c in o: if isinstance(c, Antlr4Sequence): c = list(iter_non_visuals(c)) if len(c) != 1: return c = c[0] if not isinstance(c, Antlr4Option): return non_optional_items.append(c.body) o.clear() o.extend(non_optional_items) modified = True return Antlr4Sequence([ Antlr4Option(o), ])
def add_file_path_literal_rules(p): FILE_PATH_SPEC_CHAR = Antlr4Rule( "FILE_PATH_SPEC_CHAR", Antlr4Symbol( "[^ !$`&()+] | ( '\\\\' [ !$`&*()+] )", True, True), is_fragment=True) p.rules.append(FILE_PATH_SPEC_CHAR) file_spec_path = Antlr4Rule( "FILE_PATH_SPEC", Antlr4Iteration(Antlr4Sequence([ Antlr4Symbol("FILE_PATH_SPEC_CHAR", False), Antlr4Option(Antlr4Sequence([ Antlr4Symbol('SEMI', False), Antlr4Symbol("FILE_PATH_SPEC_CHAR", False), ])), ]), positive=True ) ) p.rules.append(file_spec_path)
def remove_useless_and_normalize_names(p): renames = {} for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items(): renames[k] = v # rm_newline_from_simple_rules(p.rules) # nts = get_used_non_terminals(p.rules) # def_nts = get_defined_non_terminals(p.rules) # overspecified # finish_number 0 - 2 replace_rule("finish_number", "UNSIGNED_NUMBER", renames, p) # scalar_constant 1b number replace_rule("scalar_constant", "integral_number", renames, p) # init_val 1b value replace_rule("init_val", "integral_number", renames, p) # edge_descriptor 2 tristate digits # edge_descriptor: '01' | '10' | Z_OR_X ZERO_OR_ONE | ZERO_OR_ONE Z_OR_X; # dpi_spec_string two concrete strings replace_rule("dpi_spec_string", "STRING_LITERAL", renames, p) # #0 -> # UNSIGNED_NUMBER primitive_delay = Antlr4Rule( "primitive_delay", Antlr4Sequence([ Antlr4Symbol("HASH", False), Antlr4Symbol("UNSIGNED_NUMBER", False), ])) p.rules.append(primitive_delay) replace_rule("#0", "primitive_delay", renames, p) # all same ps_identifier_rules = [ "ps_class_identifier", "ps_covergroup_identifier", "ps_checker_identifier", ] for name in ps_identifier_rules: replace_rule(name, "ps_identifier", renames, p) ps_or_hierarchical_id_rules = [ "ps_or_hierarchical_net_identifier", "ps_or_hierarchical_property_identifier", "ps_or_hierarchical_sequence_identifier", "ps_or_hierarchical_tf_identifier", ] ps_or_hierarchical_identifier = Antlr4Rule( "ps_or_hierarchical_identifier", Antlr4Selection([ Antlr4Sequence([ Antlr4Option(Antlr4Symbol("package_scope", False)), Antlr4Symbol("identifier", False) ]), Antlr4Symbol("hierarchical_identifier", False), ])) p.rules.append(ps_or_hierarchical_identifier) for name in ps_or_hierarchical_id_rules: replace_rule(name, "ps_or_hierarchical_identifier", renames, p) to_lexer = [ "c_identifier", "unsigned_number", "simple_identifier", "system_tf_identifier", "unsigned_number", "string_literal", "binary_number", "octal_number", "hex_number", "octal_number", "hex_number", "fixed_point_number", "escaped_identifier", "unbased_unsized_literal", "time_literal", # because it is very hard to switch mode to parse # edge_descriptor and it is easy to just parse coma separated list of 2 chars "edge_control_specifier", "level_symbol", "output_symbol", "edge_symbol", "file_path_spec", ] for tl in to_lexer: renames[tl] = tl.upper() fragments = { "binary_value", "octal_value", "hex_value", "decimal_base", "binary_base", "octal_base", "hex_base", "non_zero_unsigned_number", "size", "sign", "edge_descriptor", "non_zero_decimal_digit", "decimal_digit", "binary_digit", "octal_digit", "hex_digit", "x_digit", "z_digit", "exp", 'white_space', 'zero_or_one', 'z_or_x', 'Any_ASCII_Characters', "any_printable_ASCII_character_except_white_space", "time_unit" } for r in p.rules: if r.name.startswith("$"): renames[r.name] = r.name.replace("$", "dolar_") for fr in fragments: if r.name in fragments: r.is_fragment = True renames[fr] = fr.upper() for r in p.rules: rm_redunt_whitespaces_on_end(r) identifier_rule_equivalents = { r.name for r in collect_simple_rules(p.rules, "identifier") } hierarchical_identifier_rule_equivalents = { r.name for r in collect_simple_rules(p.rules, "hierarchical_identifier") } to_remove = { "comment", "one_line_comment", "block_comment", "comment_text", "white_space", } to_remove.update(identifier_rule_equivalents) to_remove.update(hierarchical_identifier_rule_equivalents) simple_rules_to_remove = [ "default_clause", # default kw "variable_port_type", "limit_value", # used only in more specific limit values "dpi_function_proto", # used only in dpi block so we already know "dpi_task_proto", # used only in dpi block so we already know "property_lvar_port_direction", # used only in property so we already know # "consecutive_repetition", # useless "trans_item", "ordered_parameter_assignment", "function_statement", "case_expression", "case_item_expression", "open_value_range", # used only in open_range_list so we already know "constant_assignment_pattern_expression", # parser do not see the difference between const/non const "clockvar", # used only in clockvar_expression "path_delay_expression", # used only in more specific rules "constant_function_call", # parser do not see the difference between const/non const "function_subroutine_call", "constant_let_expression", # parser do not see the difference between const/non const "attr_name", # used only in attr_spec "array_identifier", # never used "checker_identifier", # used only in rule with same name "class_identifier", "class_variable_identifier", "clocking_identifier", "config_identifier", "const_identifier", "constraint_identifier", "covergroup_identifier", "covergroup_variable_identifier", "cover_point_identifier", "cross_identifier", "enum_identifier", "formal_identifier", "function_identifier", "generate_block_identifier", "genvar_identifier", "hierarchical_array_identifier", "hierarchical_block_identifier", "hierarchical_event_identifier", "hierarchical_net_identifier", "hierarchical_parameter_identifier", "hierarchical_property_identifier", "hierarchical_sequence_identifier", "hierarchical_task_identifier", "hierarchical_tf_identifier", "hierarchical_variable_identifier", "index_variable_identifier", "interface_identifier", "interface_instance_identifier", # "inout_port_identifier", # "input_port_identifier", "instance_identifier", "member_identifier", "method_identifier", "modport_identifier", "module_identifier", "net_identifier", # "output_port_identifier" "package_identifier", "parameter_identifier", "port_identifier", "production_identifier", "program_identifier", "property_identifier", "sequence_identifier", "signal_identifier", "specparam_identifier", "task_identifier", "tf_identifier", "terminal_identifier", "topmodule_identifier", "udp_identifier", "variable_identifier", ] for sr in simple_rules_to_remove: remove_simple_rule(sr, p) p.rules = [r for r in p.rules if r.name not in to_remove] for idname in identifier_rule_equivalents: renames[idname] = "identifier" for idname in hierarchical_identifier_rule_equivalents: renames[idname] = "hierarchical_identifier" apply_rename = generate_renamer(renames, True) for r in p.rules: r.walk(apply_rename) r.walk(mark_regex) for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items(): body = Antlr4Symbol(k, True) r = Antlr4Rule(v, body) p.rules.append(r) # because C_IDENTIFIER is just normal identifier without $ and can match identifiers for r in p.rules: if r.name == "identifier": r.body.insert(0, Antlr4Symbol("C_IDENTIFIER", False))