def _selection_flatten(sel: Antlr4Selection):
    assert isinstance(sel, Antlr4Selection), sel
    new_choices = []
    use_new_choinces = False
    for c in sel:
        _c = c
        while isinstance(_c, Antlr4Sequence) and len(_c) == 1:
            _c = _c[0]
        if isinstance(_c, Antlr4Selection):
            use_new_choinces = True
            new_choices.extend(_c)
        else:
            new_choices.append(c)
    if use_new_choinces:
        return Antlr4Selection(new_choices), True
    else:
        return sel, False
Ejemplo n.º 2
0
def fix_SYSTEM_TF_IDENTIFIER(rules):
    kws = collect_keywords(rules)
    SYSTEM_TF_IDENTIFIER = Antlr4Symbol("SYSTEM_TF_IDENTIFIER", False)
    any_system_tf_identifier = Antlr4Symbol("any_system_tf_identifier", False)

    def match_replace_fn(o):
        if o == SYSTEM_TF_IDENTIFIER:
            return deepcopy(any_system_tf_identifier)

    for rule in rules:
        replace_item_by_sequence(rule, match_replace_fn)

    rules.append(
        Antlr4Rule(
            "any_system_tf_identifier",
            Antlr4Selection([
                SYSTEM_TF_IDENTIFIER, *[
                    Antlr4Symbol(kw.replace("$", "KW_DOLAR_").upper(), False)
                    for kw in kws if kw.startswith("$")
                ]
            ])))
def add_string_literal_rules(p):
    string_char = Antlr4Rule(
        "ANY_ASCII_CHARACTERS",
        Antlr4Selection([
            Antlr4Symbol('~["\\\\\\r\\n]', True, True),
            Antlr4Symbol('\\\n', True),
            Antlr4Symbol('\\\r\n', True),
            Antlr4Sequence([
                Antlr4Symbol("\\", True),
                Antlr4Symbol('[nt\\\\"vfa%]', True, is_regex=True),
            ]),
            Antlr4Symbol("'\\\\' [0-9] [0-9]? [0-9]?", True, True),
            Antlr4Symbol("'\\\\' 'x' [0-9A-Fa-f] [0-9A-Fa-f]?", True, True),
        ]),
        is_fragment=True)
    p.rules.append(string_char)

    any_printable_ASCII_character_except_white_space = Antlr4Rule(
        "ANY_PRINTABLE_ASCII_CHARACTER_EXCEPT_WHITE_SPACE",
        Antlr4Symbol("'\\u0021'..'\\u007E'", True, True),
        is_fragment=True)
    p.rules.append(any_printable_ASCII_character_except_white_space)
    def _selection(self):
        options = []
        while True:
            try:
                o = self._sequence()
            except StopIteration:
                break
            options.append(o)
            try:
                n = self.next()
            except StopIteration:
                break
            if n != "|":
                self.back(n)
                break

        if not options:
            raise StopIteration()
        elif len(options) == 1:
            return options[0]
        else:
            return Antlr4Selection(options)
Ejemplo n.º 5
0
def simplify_select_rule(rules, rule_name):
    """
    ( ( KW0 a0 ( a1 )* )* KW0 a0 )? ( a1 )* ...
    ->
    ( KW0 a0 | a1 )* ...
    """
    r = rule_by_name(rules, rule_name)
    g0 = r.body[0]
    g1 = r.body[1]
    first_part = Antlr4Iteration(Antlr4Selection([Antlr4Sequence(g0.body[-2:]), g1.body]), positive=False)
    if len(r.body) > 2:
        if len(r.body) > 3:
            rest = r.body[2:]
        else:
            rest = [r.body[2], ]

        new_body = Antlr4Sequence([
            first_part,
            *rest
        ])
    else:
        new_body = first_part

    r.body = new_body
def remove_useless_and_normalize_names(p):
    renames = {}
    for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items():
        renames[k] = v
    # rm_newline_from_simple_rules(p.rules)
    # nts = get_used_non_terminals(p.rules)
    # def_nts = get_defined_non_terminals(p.rules)

    # overspecified
    # finish_number 0 - 2
    replace_rule("finish_number", "UNSIGNED_NUMBER", renames, p)

    # scalar_constant 1b number
    replace_rule("scalar_constant", "integral_number", renames, p)

    # init_val 1b value
    replace_rule("init_val", "integral_number", renames, p)

    # edge_descriptor 2 tristate digits
    # edge_descriptor: '01' | '10' | Z_OR_X ZERO_OR_ONE | ZERO_OR_ONE Z_OR_X;
    # dpi_spec_string  two concrete strings
    replace_rule("dpi_spec_string", "STRING_LITERAL", renames, p)

    # #0 -> # UNSIGNED_NUMBER
    primitive_delay = Antlr4Rule(
        "primitive_delay",
        Antlr4Sequence([
            Antlr4Symbol("HASH", False),
            Antlr4Symbol("UNSIGNED_NUMBER", False),
        ]))
    p.rules.append(primitive_delay)
    replace_rule("#0", "primitive_delay", renames, p)

    # all same
    ps_identifier_rules = [
        "ps_class_identifier",
        "ps_covergroup_identifier",
        "ps_checker_identifier",
    ]
    for name in ps_identifier_rules:
        replace_rule(name, "ps_identifier", renames, p)

    ps_or_hierarchical_id_rules = [
        "ps_or_hierarchical_net_identifier",
        "ps_or_hierarchical_property_identifier",
        "ps_or_hierarchical_sequence_identifier",
        "ps_or_hierarchical_tf_identifier",
    ]

    ps_or_hierarchical_identifier = Antlr4Rule(
        "ps_or_hierarchical_identifier",
        Antlr4Selection([
            Antlr4Sequence([
                Antlr4Symbol("package_scope", False),
                Antlr4Symbol("identifier", False)
            ]),
            # can be only identifier
            Antlr4Symbol("hierarchical_identifier", False),
        ]))
    p.rules.append(ps_or_hierarchical_identifier)
    for name in ps_or_hierarchical_id_rules:
        replace_rule(name, "ps_or_hierarchical_identifier", renames, p)

    to_lexer = [
        "c_identifier",
        "unsigned_number",
        "simple_identifier",
        "system_tf_identifier",
        "unsigned_number",
        "string_literal",
        "binary_number",
        "octal_number",
        "hex_number",
        "octal_number",
        "hex_number",
        "fixed_point_number",
        "escaped_identifier",
        "unbased_unsized_literal",
        "time_literal",

        # because it is very hard to switch mode to parse
        # edge_descriptor and it is easy to just parse coma separated list of 2 chars
        "edge_control_specifier",
        "level_symbol",
        "output_symbol",
        "edge_symbol",
        "file_path_spec",
    ]
    for tl in to_lexer:
        renames[tl] = tl.upper()

    fragments = {
        "binary_value", "octal_value", "hex_value", "decimal_base",
        "binary_base", "octal_base", "hex_base", "non_zero_unsigned_number",
        "size", "sign", "edge_descriptor", "non_zero_decimal_digit",
        "decimal_digit", "binary_digit", "octal_digit", "hex_digit", "x_digit",
        "z_digit", "exp", 'white_space', 'zero_or_one', 'z_or_x',
        'Any_ASCII_Characters',
        "any_printable_ASCII_character_except_white_space", "time_unit"
    }

    for r in p.rules:
        if r.name.startswith("$"):
            renames[r.name] = r.name.replace("$", "dolar_")
        for fr in fragments:
            if r.name in fragments:
                r.is_fragment = True
                renames[fr] = fr.upper()

    identifier_rule_equivalents = {
        r.name
        for r in collect_simple_rules(p.rules, "identifier")
    }
    hierarchical_identifier_rule_equivalents = {
        r.name
        for r in collect_simple_rules(p.rules, "hierarchical_identifier")
    }

    to_remove = {
        "comment",
        "one_line_comment",
        "block_comment",
        "comment_text",
        "white_space",

        # libary rules
        "library_text",
        "library_description",
        "library_declaration",
        "include_statement",
        "file_path_spec",
        "file_path_spec",
    }
    to_remove.update(identifier_rule_equivalents)
    to_remove.update(hierarchical_identifier_rule_equivalents)
    simple_rules_to_remove = [
        "default_clause",  # default kw
        "variable_port_type",
        "limit_value",  # used only in more specific limit values
        "dpi_function_proto",  # used only in dpi block so we already know
        "dpi_task_proto",  # used only in dpi block so we already know
        "property_lvar_port_direction",  # used only in property so we already know
        # "consecutive_repetition",  # useless
        "trans_item",
        "ordered_parameter_assignment",
        "function_statement",
        "case_expression",
        "case_item_expression",
        "open_value_range",  # used only in open_range_list so we already know
        "constant_assignment_pattern_expression",  # parser do not see the difference between const/non const
        "clockvar",  # used only in clockvar_expression
        "path_delay_expression",  # used only in more specific rules
        "constant_function_call",  # parser do not see the difference between const/non const
        "function_subroutine_call",
        "constant_let_expression",  # parser do not see the difference between const/non const
        "attr_name",  # used only in attr_spec
        "array_identifier",  # never used
        "checker_identifier",  # used only in rule with same name
        "class_identifier",
        "class_variable_identifier",
        "clocking_identifier",
        "config_identifier",
        "const_identifier",
        "constraint_identifier",
        "covergroup_identifier",
        "covergroup_variable_identifier",
        "cover_point_identifier",
        "cross_identifier",
        "enum_identifier",
        "formal_identifier",
        "function_identifier",
        "generate_block_identifier",
        "genvar_identifier",
        "hierarchical_array_identifier",
        "hierarchical_block_identifier",
        "hierarchical_event_identifier",
        "hierarchical_net_identifier",
        "hierarchical_parameter_identifier",
        "hierarchical_property_identifier",
        "hierarchical_sequence_identifier",
        "hierarchical_task_identifier",
        "hierarchical_tf_identifier",
        "hierarchical_variable_identifier",
        "index_variable_identifier",
        "interface_identifier",
        "interface_instance_identifier",
        # "inout_port_identifier",
        # "input_port_identifier",
        "instance_identifier",
        "member_identifier",
        "method_identifier",
        "modport_identifier",
        "module_identifier",
        "net_identifier",
        # "output_port_identifier"
        "package_identifier",
        "parameter_identifier",
        "port_identifier",
        "production_identifier",
        "program_identifier",
        "property_identifier",
        "sequence_identifier",
        "signal_identifier",
        "specparam_identifier",
        "task_identifier",
        "tf_identifier",
        "terminal_identifier",
        "topmodule_identifier",
        "udp_identifier",
        "variable_identifier",
        "let_identifier",
        "type_identifier",

        # covergroup_expression
        "with_covergroup_expression",
        "set_covergroup_expression",
        "integer_covergroup_expression",
        "cross_set_expression",
        "data_event",
        "reference_event",
    ]
    for sr in simple_rules_to_remove:
        remove_simple_rule(sr, p)
    p.rules = [r for r in p.rules if r.name not in to_remove]

    for idname in identifier_rule_equivalents:
        renames[idname] = "identifier"

    for idname in hierarchical_identifier_rule_equivalents:
        renames[idname] = "hierarchical_identifier"

    apply_rename = generate_renamer(renames, True)
    for r in p.rules:
        r.walk(apply_rename)
        r.walk(mark_regex)

    for k, v in SvRule2Antlr4Rule.SPEC_SYMB.items():
        body = Antlr4Symbol(k, True)
        r = Antlr4Rule(v, body)
        if k in ['"', "_"]:
            r.is_fragment = True
        p.rules.append(r)

    # because C_IDENTIFIER is just normal identifier without $ and can match identifiers
    identifier = rule_by_name(p.rules, "identifier")
    identifier.body.insert(0, Antlr4Symbol("C_IDENTIFIER", False))

    kws = collect_keywords(p.rules)
    for kw in kws:
        if kw not in IEEE1800_2017_KEYWORDS and kw != "1step" and "$" not in kw:
            identifier.body.append(Antlr4Symbol("KW_" + kw.upper(), False))
Ejemplo n.º 7
0
                                    and not isinstance(s2, Antlr4Indent)):
                                all_to_remove = False
                        if _s and all_to_remove:
                            s.pop()
                            continue
                break
            if r.name == "signature":
                # rm ()? as it is in ()? every where it is used
                a, b = r.body[0].body
                a = a.body
                b = b.body
                # ( ( type_mark ( COMMA type_mark )* )? ( RETURN type_mark )? )?
                r.body = Antlr4Selection([
                    Antlr4Sequence([a, Antlr4Newline(),
                                    Antlr4Indent(1)]),
                    Antlr4Sequence([a, b,
                                    Antlr4Newline(),
                                    Antlr4Indent(1)]),
                    Antlr4Sequence([b, Antlr4Newline()]),
                ])

    HEADER = """/*
 * Grammar extracted from the VHDL 1993, 2002, 2008, 2018 standard and then merged together
 * (the standard is selected by parser property)
 */

grammar vhdl;
"""
    with open("vhdl.g4", "w") as f:
        f.write("\n\n")
        f.write(HEADER)
        for kw in keywords:
Ejemplo n.º 8
0
def _optimise_selections(elm: iAntlr4GramElem):
    """
    Reduce selection options which differ only in single item
    to a sequence with selection of different items.

    Example:
    a:
        b c d
      | b e d;

    to a: b (c | e) d;

    :note: ignores visuals
    :note: similar sequences have to be directly after each other
        because if they were not the priority of choices would
        be changed
    """
    if isinstance(elm, Antlr4Sequence):
        modified = False
        for e in elm:
            modified = modified or _optimise_selections(e)
        return modified
    elif isinstance(elm, Antlr4Selection):
        # List[Tuple[index of different item,
        #            List[Tuple[index in choices, selection options to replace]]]]
        to_reduce = []
        # tuple (index in choices, value)
        similar_choices = []
        diff_in = None
        for c_i, choice in enumerate(elm):
            if not similar_choices:
                if isinstance(choice, Antlr4Sequence) and len_without_visuals(choice) > 1:
                    similar_choices.append((c_i, choice))
                continue
            else:
                _, prev = similar_choices[0]
                compatible = True
                if (isinstance(prev, Antlr4Sequence)
                        and isinstance(choice, Antlr4Sequence)
                        and len_without_visuals(prev) == len_without_visuals(choice)):
                    # check if differs in a single item
                    for i, (prev_item, current_item) in enumerate(
                            zip(iter_non_visuals(prev), iter_non_visuals(choice))):
                        if prev_item != current_item:
                            if diff_in == i or diff_in is None:
                                diff_in = i
                            else:
                                compatible = False
                                break
                    if compatible:
                        similar_choices.append((c_i, choice))
                else:
                    compatible = False

                if not compatible:
                    if len(similar_choices) > 1:
                        to_reduce.append((diff_in, similar_choices))
                    # reset search
                    if isinstance(choice, Antlr4Sequence) and len_without_visuals(choice) > 1:
                        similar_choices = [(c_i, choice)]
                    else:
                        similar_choices = []
                    diff_in = None

        if len(similar_choices) > 1:
            to_reduce.append((diff_in, similar_choices))

        offset = 0
        for diff_in, _choices in to_reduce:
            choices = [c[1] for c in _choices]
            start_i = _choices[0][0] + offset
            assert len(_choices) > 1
            try:
                assert elm[start_i] is choices[0]
            except AssertionError:
                raise
            diff_item_substitution = Antlr4Selection([
                index_non_visual(c, diff_in) for c in choices
                ])
            part_to_exclude = index_non_visual(choices[0], diff_in)
            new_choice = Antlr4Sequence([
                (e if e is not part_to_exclude else diff_item_substitution)
                for e in choices[0]
            ])
            elm[start_i] = new_choice
            del elm[start_i + 1: start_i + len(choices)]

            offset -= len(choices) - 1
        return len(to_reduce)
    return False
Ejemplo n.º 9
0
 def match_replace_fn(o):
     if o == cls:
         return Antlr4Selection([o, deepcopy(intf)])
Ejemplo n.º 10
0
 def match_replace_fn(o):
     if o == C_IDENTIFIER:
         return Antlr4Selection(
             [C_IDENTIFIER,
              Antlr4Symbol("ESCAPED_IDENTIFIER", False)])