Esempio n. 1
0
 def parse_element_text(self, ctx: Element_textContext) -> Antlr4Symbol:
     """
     element_text: NAME | TERMINAL;
     """
     n = ctx.NAME()
     if n is not None:
         return Antlr4Symbol(n.getText(), False)
     else:
         n = ctx.TERMINAL().getText()
         n = n[len("<b>"):-len("</b>")]
         return Antlr4Symbol(n, True)
Esempio n. 2
0
def rm_ambiguity(rules):
    rule = rule_by_name(rules, "variable_decl_assignment")
    to_repl = Antlr4Option(
        Antlr4Sequence(
            [Antlr4Symbol("ASSIGN", False),
             Antlr4Symbol("class_new", False)]))

    def match_replace_fn(o):
        if o == to_repl:
            return o.body

    replace_item_by_sequence(rule, match_replace_fn)
Esempio n. 3
0
def extract_bin_ops(rules, current_expr_rule, ops_to_extrat, new_rule_name,
                    top_rule_name, handle_conditional_fn, handle_inside_fn):
    # find option with binary op rule
    # expr = rule_by_name(rules, "expression")
    ops_no_special = [
        o for o in ops_to_extrat if o not in [
            "KW_INSIDE",
            "KW_DIST",
            "QUESTIONMARK",
        ]
    ]

    bin_op_choices = []
    if len(ops_no_special) > 0:
        if len(ops_no_special) == 1:
            op = Antlr4Symbol(ops_no_special[0], False)
        else:
            op = Antlr4Selection(
                [Antlr4Symbol(o, False) for o in ops_no_special])

        # expression (binary_operator ( attribute_instance )* expression)*
        bin_op_choice = Antlr4Sequence([
            Antlr4Symbol(current_expr_rule.name, False),
            Antlr4Iteration(
                Antlr4Sequence([
                    op,
                    Antlr4Iteration(Antlr4Symbol("attribute_instance", False)),
                    Antlr4Symbol(top_rule_name, False)
                ]))
        ])
        bin_op_choices.append(bin_op_choice)

    if "KW_INSIDE" in ops_to_extrat:
        handle_inside_fn(bin_op_choices, current_expr_rule)

    if "KW_DIST" in ops_to_extrat:
        # handled differently, only allowed on specified places
        pass

    if "QUESTIONMARK" in ops_to_extrat:
        handle_conditional_fn(bin_op_choices, current_expr_rule)

    # create a new rule which contains rule for extracted binary operators
    if len(bin_op_choices) > 1:
        new_body = Antlr4Selection(bin_op_choices)
    else:
        new_body = bin_op_choices[0]

    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(current_expr_rule), new_r)

    return new_r
Esempio n. 4
0
def subroutine_call_rm_lr(rules):
    r = rule_by_name(rules, "subroutine_call")
    assert isinstance(r.body, Antlr4Selection)
    c = r.body[2]
    _body = list(iter_non_visuals(c))
    assert _body[-1].symbol == "method_call_body", _body[-1].symbol
    start: Antlr4Selection = _body[0]
    start.clear()
    start.extend([
        Antlr4Symbol("primary_no_cast_no_call", False),
        Antlr4Symbol("cast", False),
        Antlr4Symbol("implicit_class_handle", False)
    ])
Esempio n. 5
0
def add_interface_class_declaration(rules):
    """
    Because interface_class_definition is not used anywhere
    (is missing in specified rules)
    """
    intf = Antlr4Symbol("interface_class_declaration", False)
    cls = Antlr4Symbol("class_declaration", False)

    def match_replace_fn(o):
        if o == cls:
            return Antlr4Selection([o, deepcopy(intf)])

    for rule in rules:
        replace_item_by_sequence(rule, match_replace_fn)
Esempio n. 6
0
def add_comments_and_ws(rules):
    # ONE_LINE_COMMENT: '//' .*? '\\r'? '\\n' -> channel(HIDDEN);
    olc = Antlr4Rule("ONE_LINE_COMMENT",
                     Antlr4Sequence([
                         Antlr4Symbol("//", True),
                         Antlr4Symbol(".*?", True, is_regex=True),
                         Antlr4Option(Antlr4Symbol("\r", True)),
                         Antlr4Symbol("\n", True),
                     ]),
                     lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(olc)
    # BLOCK_COMMENT: '/*' .*? '*/' -> channel (HIDDEN);
    bc = Antlr4Rule("BLOCK_COMMENT",
                    Antlr4Sequence([
                        Antlr4Symbol("/*", True),
                        Antlr4Symbol(".*?", True, is_regex=True),
                        Antlr4Symbol("*/", True),
                    ]),
                    lexer_actions=[Antlr4LexerAction.channel("HIDDEN")])
    rules.append(bc)
    # WHITE_SPACE: [ \\t\\n\\r] + -> skip;
    ws = Antlr4Rule("WHITE_SPACE",
                    Antlr4Sequence([
                        Antlr4Symbol("[ \\t\\n\\r] +", True, is_regex=True),
                    ]),
                    lexer_actions=[Antlr4LexerAction.skip()])
    rules.append(ws)
Esempio n. 7
0
 def handle_inside_fn(bin_op_choices, current_expr_rule):
     bin_op_choices[-1].extend([Antlr4Newline(), Antlr4Indent(1)])
     # expression (KW_INSIDE LBRACE open_range_list RBRACE)*;
     bin_op_choice = Antlr4Sequence([
         Antlr4Symbol(current_expr_rule.name, False),
         Antlr4Iteration(
             Antlr4Sequence([
                 Antlr4Symbol("KW_INSIDE", False),
                 Antlr4Symbol("LBRACE", False),
                 Antlr4Symbol("open_range_list", False),
                 Antlr4Symbol("RBRACE", False),
             ]))
     ])
     bin_op_choices.append(bin_op_choice)
    def match_replace_fn(o):
        if isinstance(o, Antlr4Selection):
            char_symb_to_replace = []
            for orig_c in o:
                c = orig_c
                c = list(iter_non_visuals(c))
                if len(c) > 1:
                    continue
                c = c[0]
                if isinstance(c, Antlr4Symbol) and c.is_terminal and len(
                        c.symbol) == 1:
                    char_symb_to_replace.append((orig_c, c))
            if len(char_symb_to_replace) > 1:
                # build an regex out of them
                # and replace them by the regex
                for c, _ in char_symb_to_replace:
                    o.remove(c)

                re_str = "[%s]" % ("".join(
                    [c._escaped() for _, c in char_symb_to_replace]))
                re = Antlr4Symbol(re_str, True, is_regex=True)
                if len(list(iter_non_visuals(o))):
                    o.append(re)
                else:
                    return Antlr4Sequence([
                        re,
                    ])
Esempio n. 9
0
def fix_priority_of__class_scope__package_scope(rules):
    orig = Antlr4Selection([
        Antlr4Symbol("class_scope", False),
        Antlr4Symbol("package_scope", False)
    ])
    repl = Antlr4Selection([
        Antlr4Symbol("package_scope", False),
        Antlr4Symbol("class_scope", False)
    ])

    def match_replace_fn(o):
        if o == orig:
            return deepcopy(repl)

    for rule in rules:
        replace_item_by_sequence(rule, match_replace_fn)
Esempio n. 10
0
def extract_keywords_to_specific_rule(p: SvRule2Antlr4Rule):
    keywords = set()

    def collect_keywords(obj):
        if isinstance(obj, Antlr4Symbol) and obj.is_terminal:
            s = obj.symbol
            keywords.add(s)

    for r in p.rules:
        if not r.is_lexer_rule():
            r.walk(collect_keywords)

    def get_kw_name(k):
        return "KW_" + k.replace("$", "DOLAR_").upper()

    def renamer(obj: iAntlr4GramElem):
        if isinstance(obj, Antlr4Symbol) and obj.is_terminal\
                and obj.symbol in keywords:
            obj.is_terminal = False
            obj.symbol = get_kw_name(obj.symbol)

    for r in p.rules:
        if not r.is_lexer_rule():
            r.walk(renamer)

    for k in sorted(keywords):
        kw_name = get_kw_name(k)
        kw_rule = Antlr4Rule(kw_name, Antlr4Symbol(k, True))
        p.rules.append(kw_rule)
Esempio n. 11
0
def split_rule(rules, rule_name, symbols_to_extract: List[str], subrule_name: str):
    """
    Let only options which are starting with symbols from symbols_to_extract.
    Put the rest to a subrule.
    """
    r = rule_by_name(rules, rule_name)

    assert isinstance(r.body, Antlr4Selection), r

    sub_options = Antlr4Selection([])
    for o in r.body:
        start_symbols = set()
        _direct_left_corner(o, start_symbols, allow_eps_in_sel=True)
        if not start_symbols.intersection(symbols_to_extract):
            sub_options.append(o)
    r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)])
    r.body.insert(0, Antlr4Symbol(subrule_name, False))
    if len(r.body) == 1:
        r.body = r.body[0]

    assert len(sub_options) > 0
    if len(sub_options) == 1:
        sub_options = sub_options[0]
    else:
        sub_options = Antlr4Selection(sub_options)

    sub_r = Antlr4Rule(subrule_name, sub_options)
    rules.insert(rules.index(r), sub_r)
    return sub_r
Esempio n. 12
0
def rm_semi_from_cross_body_item(rules):
    """
    Because SEMI is already part of cross_body_item
    """
    rule = rule_by_name(rules, "cross_body")
    semi = Antlr4Symbol("SEMI", False)

    def match_replace_fn(o):
        if o == semi:
            return Antlr4Sequence([])

    replace_item_by_sequence(rule.body[0], match_replace_fn)
Esempio n. 13
0
def direct_left_recurse_rm(rules, rule_name):
    r = rule_by_name(rules, rule_name)

    if isinstance(r.body, Antlr4Selection):
        choices = r.body
    elif isinstance(r.body, Antlr4Sequence):
        choices = [r.body, ]
    else:
        raise NotImplementedError()

    # find choices which starts with this rule non terminal
    lr_choices = []
    for c in choices:
        if isinstance(c, Antlr4Sequence):
            first = next(iter_non_visuals(c))
            if isinstance(first, Antlr4Symbol) and first.symbol == rule_name:
                lr_choices.append(c)
        else:
            raise NotImplementedError()

    # remove choices which are causing left recursion
    assert len(lr_choices) >= 1, rule_name
    for lr_choice in lr_choices:
        choices.remove(lr_choice)

    if len(choices) == 0:
        raise NotImplementedError()
    elif len(choices) == 1:
        r.body = choices[0]

    # renaame this rule to rule_item
    r_base_name = r.name + "_item"
    for _r in rules:
        assert r.name != r_base_name, r_base_name
    r.name = r_base_name

    # create new rule which will implement removed choices and also expands to rule_item
    choices_new = Antlr4Selection([])
    for lr_choice in lr_choices:
        first = next(iter_non_visuals(lr_choice))
        assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name
        repl = Antlr4Symbol(r_base_name, False)
        _iterate_everything_except_first_and_replace_first(lr_choice, repl)

        if not choices_new:
            lr_choice.insert(0, Antlr4Newline())
            lr_choice.insert(1, Antlr4Indent(1))
        choices_new.append(lr_choice)

    body_new = choices_new[0] if len(choices_new) == 1 else choices_new
    r_new = Antlr4Rule(rule_name, body_new)
    rules.insert(rules.index(r), r_new)
def add_file_path_literal_rules(p):
    FILE_PATH_SPEC_CHAR = Antlr4Rule(
        "FILE_PATH_SPEC_CHAR",
        Antlr4Symbol(
            "[^ !$`&()+] | ( '\\\\' [ !$`&*()+] )",
            True, True),
        is_fragment=True)
    p.rules.append(FILE_PATH_SPEC_CHAR)

    file_spec_path = Antlr4Rule(
        "FILE_PATH_SPEC",
        Antlr4Iteration(Antlr4Sequence([
                Antlr4Symbol("FILE_PATH_SPEC_CHAR", False),
                Antlr4Option(Antlr4Sequence([
                    Antlr4Symbol('SEMI', False),
                    Antlr4Symbol("FILE_PATH_SPEC_CHAR", False),
                ])),
            ]),
            positive=True
        )
    )
    p.rules.append(file_spec_path)
Esempio n. 15
0
    def handle_conditional_fn(bin_op_choices, current_expr_rule):
        # rm left recursion from cond_predicate/conditional_expression
        replace_symbol_in_rule(rules,
                               "conditional_expression",
                               "cond_predicate",
                               "cond_expr_predicate",
                               only_first=True)
        iterate_everything_except_first(rules, "conditional_expression")
        # create new cond_predicate (cond_expr_predicate) whout left recursion
        cond_predicate = rule_by_name(rules, "cond_predicate")
        cond_expr_predicate = deepcopy(cond_predicate)
        cond_expr_predicate.name = "cond_expr_predicate"
        rules.insert(rules.index(cond_predicate), cond_expr_predicate)
        replace_symbol_in_rule(rules,
                               "cond_expr_predicate",
                               "expression",
                               current_expr_rule.name,
                               only_first=True)

        bin_op_choices.extend([
            Antlr4Symbol(current_expr_rule.name, False),
            Antlr4Symbol("conditional_expression", False)
        ])
Esempio n. 16
0
 def handle_conditional_fn(bin_op_choices, current_expr_rule):
     bin_op_choices.extend([
         Antlr4Symbol(current_expr_rule.name, False),
         Antlr4Iteration(
             Antlr4Sequence([
                 Antlr4Symbol("QUESTIONMARK", False),
                 Antlr4Iteration(Antlr4Symbol("attribute_instance", False)),
                 Antlr4Symbol("constant_expression", False),
                 Antlr4Symbol("COLON", False),
                 Antlr4Symbol("constant_expression", False),
             ]))
     ])
Esempio n. 17
0
def extract_option_as_rule(rules, rule_name, options_i, new_rule_name):
    r = rule_by_name(rules, rule_name)
    assert isinstance(r.body, Antlr4Selection)
    new_body = Antlr4Selection([])
    for i in options_i:
        new_body.append(r.body[i])

    r.body[options_i[0]] = Antlr4Sequence(
        [Antlr4Symbol(new_rule_name, False),
         Antlr4Newline(),
         Antlr4Indent(1)])
    r.body = Antlr4Selection(
        [x for i, x in enumerate(r.body) if i not in options_i[1:]])

    if len(new_body) == 1:
        new_body = new_body[0]

    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(r), new_r)
    return new_r
Esempio n. 18
0
def remove_useless_and_normalize_names(p):
    renames = {}
    for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items():
        renames[k] = v
    # rm_newline_from_simple_rules(p.rules)
    # nts = get_used_non_terminals(p.rules)
    # def_nts = get_defined_non_terminals(p.rules)

    # overspecified
    # finish_number 0 - 2
    replace_rule("finish_number", "UNSIGNED_NUMBER", renames, p)

    # scalar_constant 1b number
    replace_rule("scalar_constant", "integral_number", renames, p)

    # init_val 1b value
    replace_rule("init_val", "integral_number", renames, p)

    # edge_descriptor 2 tristate digits
    # edge_descriptor: '01' | '10' | Z_OR_X ZERO_OR_ONE | ZERO_OR_ONE Z_OR_X;
    # dpi_spec_string  two concrete strings
    replace_rule("dpi_spec_string", "STRING_LITERAL", renames, p)

    # #0 -> # UNSIGNED_NUMBER
    primitive_delay = Antlr4Rule(
        "primitive_delay",
        Antlr4Sequence([
            Antlr4Symbol("HASH", False),
            Antlr4Symbol("UNSIGNED_NUMBER", False),
        ]))
    p.rules.append(primitive_delay)
    replace_rule("#0", "primitive_delay", renames, p)

    # all same
    ps_identifier_rules = [
        "ps_class_identifier",
        "ps_covergroup_identifier",
        "ps_checker_identifier",
    ]
    for name in ps_identifier_rules:
        replace_rule(name, "ps_identifier", renames, p)

    ps_or_hierarchical_id_rules = [
        "ps_or_hierarchical_net_identifier",
        "ps_or_hierarchical_property_identifier",
        "ps_or_hierarchical_sequence_identifier",
        "ps_or_hierarchical_tf_identifier",
    ]

    ps_or_hierarchical_identifier = Antlr4Rule(
        "ps_or_hierarchical_identifier",
        Antlr4Selection([
            Antlr4Sequence([
                Antlr4Option(Antlr4Symbol("package_scope", False)),
                Antlr4Symbol("identifier", False)
            ]),
            Antlr4Symbol("hierarchical_identifier", False),
        ]))
    p.rules.append(ps_or_hierarchical_identifier)
    for name in ps_or_hierarchical_id_rules:
        replace_rule(name, "ps_or_hierarchical_identifier", renames, p)

    to_lexer = [
        "c_identifier",
        "unsigned_number",
        "simple_identifier",
        "system_tf_identifier",
        "unsigned_number",
        "string_literal",
        "binary_number",
        "octal_number",
        "hex_number",
        "octal_number",
        "hex_number",
        "fixed_point_number",
        "escaped_identifier",
        "unbased_unsized_literal",
        "time_literal",

        # because it is very hard to switch mode to parse
        # edge_descriptor and it is easy to just parse coma separated list of 2 chars
        "edge_control_specifier",
        "level_symbol",
        "output_symbol",
        "edge_symbol",
        "file_path_spec",
    ]
    for tl in to_lexer:
        renames[tl] = tl.upper()

    fragments = {
        "binary_value", "octal_value", "hex_value", "decimal_base",
        "binary_base", "octal_base", "hex_base", "non_zero_unsigned_number",
        "size", "sign", "edge_descriptor", "non_zero_decimal_digit",
        "decimal_digit", "binary_digit", "octal_digit", "hex_digit", "x_digit",
        "z_digit", "exp", 'white_space', 'zero_or_one', 'z_or_x',
        'Any_ASCII_Characters',
        "any_printable_ASCII_character_except_white_space", "time_unit"
    }

    for r in p.rules:
        if r.name.startswith("$"):
            renames[r.name] = r.name.replace("$", "dolar_")
        for fr in fragments:
            if r.name in fragments:
                r.is_fragment = True
                renames[fr] = fr.upper()

    for r in p.rules:
        rm_redunt_whitespaces_on_end(r)

    identifier_rule_equivalents = {
        r.name
        for r in collect_simple_rules(p.rules, "identifier")
    }
    hierarchical_identifier_rule_equivalents = {
        r.name
        for r in collect_simple_rules(p.rules, "hierarchical_identifier")
    }

    to_remove = {
        "comment",
        "one_line_comment",
        "block_comment",
        "comment_text",
        "white_space",
    }
    to_remove.update(identifier_rule_equivalents)
    to_remove.update(hierarchical_identifier_rule_equivalents)
    simple_rules_to_remove = [
        "default_clause",  # default kw
        "variable_port_type",
        "limit_value",  # used only in more specific limit values
        "dpi_function_proto",  # used only in dpi block so we already know
        "dpi_task_proto",  # used only in dpi block so we already know
        "property_lvar_port_direction",  # used only in property so we already know
        # "consecutive_repetition",  # useless
        "trans_item",
        "ordered_parameter_assignment",
        "function_statement",
        "case_expression",
        "case_item_expression",
        "open_value_range",  # used only in open_range_list so we already know
        "constant_assignment_pattern_expression",  # parser do not see the difference between const/non const
        "clockvar",  # used only in clockvar_expression
        "path_delay_expression",  # used only in more specific rules
        "constant_function_call",  # parser do not see the difference between const/non const
        "function_subroutine_call",
        "constant_let_expression",  # parser do not see the difference between const/non const
        "attr_name",  # used only in attr_spec
        "array_identifier",  # never used
        "checker_identifier",  # used only in rule with same name
        "class_identifier",
        "class_variable_identifier",
        "clocking_identifier",
        "config_identifier",
        "const_identifier",
        "constraint_identifier",
        "covergroup_identifier",
        "covergroup_variable_identifier",
        "cover_point_identifier",
        "cross_identifier",
        "enum_identifier",
        "formal_identifier",
        "function_identifier",
        "generate_block_identifier",
        "genvar_identifier",
        "hierarchical_array_identifier",
        "hierarchical_block_identifier",
        "hierarchical_event_identifier",
        "hierarchical_net_identifier",
        "hierarchical_parameter_identifier",
        "hierarchical_property_identifier",
        "hierarchical_sequence_identifier",
        "hierarchical_task_identifier",
        "hierarchical_tf_identifier",
        "hierarchical_variable_identifier",
        "index_variable_identifier",
        "interface_identifier",
        "interface_instance_identifier",
        # "inout_port_identifier",
        # "input_port_identifier",
        "instance_identifier",
        "member_identifier",
        "method_identifier",
        "modport_identifier",
        "module_identifier",
        "net_identifier",
        # "output_port_identifier"
        "package_identifier",
        "parameter_identifier",
        "port_identifier",
        "production_identifier",
        "program_identifier",
        "property_identifier",
        "sequence_identifier",
        "signal_identifier",
        "specparam_identifier",
        "task_identifier",
        "tf_identifier",
        "terminal_identifier",
        "topmodule_identifier",
        "udp_identifier",
        "variable_identifier",
    ]
    for sr in simple_rules_to_remove:
        remove_simple_rule(sr, p)
    p.rules = [r for r in p.rules if r.name not in to_remove]

    for idname in identifier_rule_equivalents:
        renames[idname] = "identifier"

    for idname in hierarchical_identifier_rule_equivalents:
        renames[idname] = "hierarchical_identifier"

    apply_rename = generate_renamer(renames, True)
    for r in p.rules:
        r.walk(apply_rename)
        r.walk(mark_regex)

    for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items():
        body = Antlr4Symbol(k, True)
        r = Antlr4Rule(v, body)
        p.rules.append(r)

    # because C_IDENTIFIER is just normal identifier without $ and can match identifiers
    for r in p.rules:
        if r.name == "identifier":
            r.body.insert(0, Antlr4Symbol("C_IDENTIFIER", False))
Esempio n. 19
0
    renames = {
        k: k.upper()
        for k in [
            "base_specifier", "lower_case_letter", "upper_case_letter",
            'special_character', 'other_special_character', 'digit',
            'format_effector', 'space_character', 'underline'
        ]
    }
    renames["mode"] = "signal_mode"
    renames["E"] = "E_SIGN"
    renames["NULL"] = "NULL_SYM"
    for k, v in VhdlRule2Antlr4Rule.SPEC_SYMB.items():
        renames[k] = v

    IGNORED = [
        Antlr4Symbol(s, False) for s in [
            "Property_Declaration",
            "Sequence_Declaration",
            "Clock_Declaration",
            "PSL_Directive",
            "Verification_Unit",
        ]
    ]
    with open("vhdl2008.g4_proto") as f:
        p = VhdlRule2Antlr4Rule()
        p.convert(f)
        rm_newline_from_simple_rules(p.rules)
        nts = get_used_non_terminals(p.rules)
        def_nts = get_defined_non_terminals(p.rules)

        keywords = set()
def add_string_literal_rules(p):
    string_char = Antlr4Rule("ANY_ASCII_CHARACTERS", Antlr4Selection([
        Antlr4Symbol('~["\\\\\\r\\n]', True, True),
        Antlr4Symbol('\\\n', True),
        Antlr4Symbol('\\\r\n', True),
        Antlr4Symbol('\t', True),
        Antlr4Symbol('\\\\', True),
        Antlr4Symbol('\v', True),
        Antlr4Symbol('\f', True),
        Antlr4Symbol('\a', True),
        Antlr4Symbol("'\\\\' [0-9] [0-9]? [0-9]?", True, True),
        Antlr4Symbol("'\\\\' 'x' [0-9A-Fa-f] [0-9A-Fa-f]?", True, True),
        ]), is_fragment=True)
    p.rules.append(string_char)

    any_printable_ASCII_character_except_white_space = Antlr4Rule(
        "ANY_PRINTABLE_ASCII_CHARACTER_EXCEPT_WHITE_SPACE",
        Antlr4Symbol("'\\u0021'..'\\u007E'", True, True),
        is_fragment=True)
    p.rules.append(any_printable_ASCII_character_except_white_space)