Exemple #1
0
def optimise_subroutine_call(rules):
    r = rule_by_name(rules, "subroutine_call")
    Antlr4GenericOptimizer().optimize([
        r,
    ])
    c0 = Antlr4parser().from_str("""
    ( class_qualifier | ( primary | implicit_class_handle ) DOT )?
    ( 
        identifier ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? 
        |  array_method_name ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )?
            ( KW_WITH LPAREN expression RPAREN )? 
        | randomize_call 
    )

    """)
    assert r.body[0].eq_relaxed(c0), r.body[0]
    subroutine_call_args = Antlr4Rule(
        "subroutine_call_args",
        Antlr4parser().from_str("""
        ( attribute_instance )* ( LPAREN list_of_arguments RPAREN )? 
        ( KW_WITH LPAREN expression RPAREN )?
    """))
    rules.insert(rules.index(r), subroutine_call_args)
    new_c0 = Antlr4parser().from_str("""
    ( primary_no_cast_no_call
      | cast 
    )
    subroutine_call_args
    ( 
       DOT ( array_method_name | randomize_call | primary_no_cast_no_call | cast )
       subroutine_call_args
    )*
    """)
    r.body[0] = new_c0
    primary = rule_by_name(rules, "primary")
    assert primary.body[0].eq_relaxed(
        Antlr4Symbol("primary_no_cast_no_call", False))
    del primary.body[0]

    c2 = Antlr4parser().from_str("""
        any_system_tf_identifier ( LPAREN ( list_of_arguments 
                                      | data_type ( COMMA expression )? 
                                      | expression ( COMMA ( expression )? )* ( COMMA 
                                      ( clocking_event )? )? 
                                      ) RPAREN )? 
    """)
    assert r.body[2].eq_relaxed(c2)
    r.body[2] = Antlr4parser().from_str("""
        any_system_tf_identifier ( LPAREN (
                                     ( data_type )? list_of_arguments
                                     ( COMMA clocking_event )?
                                 ) RPAREN )?
    """)

    c1 = Antlr4parser().from_str("""
        ps_or_hierarchical_identifier ( attribute_instance )*
        ( LPAREN list_of_arguments RPAREN )?
    """)
    assert r.body[1].eq_relaxed(c1), r.body[1]
    del r.body[1]
Exemple #2
0
def fix_class_scope(rules):
    """
    Because otherwise class_type consume last id after ::
    and it is not possible to recover
    """
    r = rule_by_name(rules, "class_scope")
    _inline_rule([
        r,
    ], rule_by_name(rules, "class_type"))
Exemple #3
0
    def handle_conditional_fn(bin_op_choices, current_expr_rule):
        # rm left recursion from cond_predicate/conditional_expression
        cond_predicate = rule_by_name(rules, "cond_predicate")
        conditional_expression = rule_by_name(rules, "conditional_expression")
        rules.remove(conditional_expression)
        _inline_rule([
            conditional_expression,
        ], cond_predicate)

        bin_op_choices.append(Antlr4Sequence(conditional_expression.body[1:]))
Exemple #4
0
def fix_lexer_for_table_def(p):
    # because OUTPUT_SYMBOL is a special case of LEVEL_SYMBOL
    OUTPUT_SYMBOL = Antlr4Symbol("OUTPUT_SYMBOL", False)

    def OUTPUT_SYMBOL_to_LEVEL_SYMBOL(o):
        if o == OUTPUT_SYMBOL:
            o.symbol = "LEVEL_SYMBOL"

    for r in p.rules:
        r.body.walk(OUTPUT_SYMBOL_to_LEVEL_SYMBOL)
    p.rules.remove(rule_by_name(p.rules, "OUTPUT_SYMBOL"))
    table_tokens = get_all_used_lexer_tokens(p.rules, "combinational_body")
    table_tokens2 = get_all_used_lexer_tokens(p.rules, "sequential_entry")
    table_tokens = table_tokens.union(table_tokens2)

    # [TODO] += comments, whitespaces
    table_tokens.remove("KW_TABLE")
    table_shared_tokens = {
        'SEMI', 'RPAREN', 'COLON', 'LPAREN', 'MINUS', *COMMENT_AND_WS_TOKENS
    }
    wrap_in_lexer_mode(p.rules, "TABLE_MODE", {
        "KW_TABLE",
    }, {
        "KW_ENDTABLE",
    }, table_tokens, table_shared_tokens)
Exemple #5
0
def split_rule(rules, rule_name, symbols_to_extract: List[str],
               subrule_name: str):
    """
    Let only options which are starting with symbols from symbols_to_extract.
    Put the rest to a subrule.
    """
    r = rule_by_name(rules, rule_name)

    assert isinstance(r.body, Antlr4Selection), r

    sub_options = Antlr4Selection([])
    for o in r.body:
        start_symbols = set()
        _direct_left_corner(o, start_symbols, allow_eps_in_sel=True)
        if not start_symbols.intersection(symbols_to_extract):
            sub_options.append(o)
    r.body = Antlr4Selection([o for o in r.body if not (o in sub_options)])
    r.body.insert(0, Antlr4Symbol(subrule_name, False))
    if len(r.body) == 1:
        r.body = r.body[0]

    assert len(sub_options) > 0
    if len(sub_options) == 1:
        sub_options = sub_options[0]
    else:
        sub_options = Antlr4Selection(sub_options)

    sub_r = Antlr4Rule(subrule_name, sub_options)
    rules.insert(rules.index(r), sub_r)
    return sub_r
Exemple #6
0
def iterate_everything_except_first(rules, rule_name):
    r = rule_by_name(rules, rule_name)
    if isinstance(r.body, Antlr4Sequence):
        _iterate_everything_except_first_and_replace_first(
            r.body, next(iter_non_visuals(r.body)))
    else:
        raise NotImplementedError()
def remove_simple_rule(name, p):
    r = rule_by_name(p.rules, name)
    assert r is not None, name
    body = r.body
    assert len(body) == 1, r
    assert isinstance(body[0], Antlr4Symbol)
    inline_rule(p.rules, name)
def numbers_add_whitespace_after_base(rules):
    number_rules = set([
        "DECIMAL_NUMBER_WITH_BASE",
        "DECIMAL_INVALID_NUMBER_WITH_BASE",
        "DECIMAL_TRISTATE_NUMBER_WITH_BASE",
        "BINARY_NUMBER",
        "OCTAL_NUMBER",
        "HEX_NUMBER",
    ])
    number_base_rules = set([
        "DECIMAL_BASE",
        "BINARY_BASE",
        "OCTAL_BASE",
        "HEX_BASE",
    ])
    # used only in integral_number
    inline_rule(rules, "decimal_number")

    def opt_ws():
        return Antlr4Option(Antlr4Symbol("WHITE_SPACE", False))

    Antlr4Option(Antlr4Symbol("UNSIGNED_NUMBER", False)),

    for r in rules:
        if r.name in number_rules:
            # ( SIZE )? *_BASE ....
            assert r.body[0].body.symbol == "SIZE", r
            assert r.body[1].symbol.endswith("_BASE"), r
            del r.body[0]
            r.is_fragment = True

        elif r.name in number_base_rules:
            # APOSTROPHE ( [sS] )? [dD];
            r.body.insert(2, opt_ws())
            r.body.insert(1, opt_ws())
            r.body.append(opt_ws())
    any_based_number = Antlr4Rule(
        "ANY_BASED_NUMBER",
        Antlr4Selection([Antlr4Symbol(n, False) for n in number_rules]))
    rules.insert(rules.index(rule_by_name(rules, "HEX_NUMBER")),
                 any_based_number)

    integral_number = rule_by_name(rules, "integral_number")
    integral_number.body = Antlr4parser().from_str("""
    ( UNSIGNED_NUMBER )? ANY_BASED_NUMBER
    | UNSIGNED_NUMBER
    """)
Exemple #9
0
def optimize_action_block(rules):
    action_block = rule_by_name(rules, "action_block")
    assert action_block.body.eq_relaxed(Antlr4parser().from_str("( ( statement )? KW_ELSE )? statement_or_null"))
    action_block.body = Antlr4parser().from_str("""
          ( attribute_instance )* SEMI
        | KW_ELSE statement_or_null
        | statement ( KW_ELSE statement_or_null )?
    """)
Exemple #10
0
def fix_subroutine_call(rules):
    r = rule_by_name(rules, "subroutine_call")
    r.body.insert(
        0,
        Antlr4Sequence([
            Antlr4Option(Antlr4Symbol("class_qualifier", False)),
            Antlr4Symbol("method_call_body", False)
        ]))
Exemple #11
0
def rm_ambiguity(rules):
    rule = rule_by_name(rules, "variable_decl_assignment")
    to_repl = Antlr4parser().from_str("( ASSIGN class_new )?")

    def match_replace_fn(o):
        if o == to_repl:
            return o.body

    replace_item_by_sequence(rule, match_replace_fn)
Exemple #12
0
def _optimize_ps_parameter_identifier(rules):
    ps_parameter_identifier = rule_by_name(rules, "ps_parameter_identifier")
    #  ( ( package_scope | class_scope )? | ( 
    #      identifier ( LSQUARE_BR constant_expression RSQUARE_BR )? DOT )* 
    #  ) identifier 
    ps_parameter_identifier.body = Antlr4parser().from_str("""
        package_or_class_scoped_id
        ( DOT identifier ( LSQUARE_BR constant_expression RSQUARE_BR )? )*
    """)
Exemple #13
0
def left_recurse_remove(rules):
    """
    Removing Left Recursion from Context-Free Grammars
    https://www.microsoft.com/en-us/research/wp-content/uploads/2000/04/naacl2k-proc-rev.pdf

    http://web.science.mq.edu.au/~mjohnson/papers/johnson-left-corner.pdf

    :note: supports the '|',?,* in rules
    """
    # :note: higher priority = sooner in parse tree

    rules = optimise_selections(rules)
    direct_left_recurse_rm(rules, 'block_event_expression')
    direct_left_recurse_rm(rules, 'event_expression')
    # direct_left_recurse_rm(rules, 'constant_expression')
    solve_left_recurse_and_op_precedence_for_constant_expression(rules)
    # method_call_root - only in method_call
    # method_call      - only in subroutine_call
    inline_rule(rules, "method_call")
    inline_rule(rules, "method_call_root")
    split_rule(rules, "primary", ["cast", "subroutine_call"],
               "primary_no_cast_no_call")

    split_rule(rules, "constant_primary", ["constant_cast", "subroutine_call"],
               "constant_primary_no_cast_no_call")

    # inline_rule(rules, "cast")
    # inline_rule(rules, "constant_cast")
    # iterate_everything_except_first(
    #   rules, "cast")
    # iterate_everything_except_first(
    #   rules, "constant_cast")
    # [TODO] check if really all combinations of cast/call are possible
    replace_symbol_in_rule(rules, "casting_type", "constant_primary",
                           "constant_primary_no_cast_no_call")

    # solve expression - conditional_expression left recurse
    # copy cond_predicate

    inline_rule(rules, "inside_expression")
    subroutine_call_rm_lr(rules)

    inline_rule(rules, "module_path_conditional_expression")
    direct_left_recurse_rm(rules, 'module_path_expression')

    # inline_rule(rules, "inside_expression")
    inline_rule(rules, "expression_or_cond_pattern")
    inline_rule(rules, "cond_pattern")
    # inline_rule(rules, "conditional_expression")
    rules = optimise_selections(rules)

    solve_left_recurse_and_op_precedence_for_expression(rules)
    binary_operator = rule_by_name(rules, "binary_operator")
    rules.remove(binary_operator)

    return rules
def replace_symbol_in_rule(rules,
                           rule_name,
                           symbol_name,
                           symbol_name_replace,
                           only_first=False):
    rule = rule_by_name(rules, rule_name)
    _replace_symbol_in_rule(rule,
                            symbol_name,
                            symbol_name_replace,
                            only_first=only_first)
Exemple #15
0
def optimize_item_rules(rules):
    for r in ["package_or_generate_item_declaration", "module_or_generate_item",
              "module_or_generate_item_declaration", "module_common_item",
              "interface_or_generate_item", "checker_or_generate_item_declaration",
              ]:
        inline_rule(rules, r)
    generate_item = rule_by_name(rules, "generate_item")
    assert generate_item.body[-1].eq_relaxed(Antlr4Symbol("checker_or_generate_item", False))
    generate_item.body[-1] = Antlr4parser().from_str("KW_RAND data_declaration")
    generate_item.body.append(Antlr4parser().from_str("program_generate_item"))
Exemple #16
0
def fix_dpi_import_export(rules):
    C_IDENTIFIER = Antlr4Symbol("C_IDENTIFIER", False)

    def match_replace_fn(o):
        if o == C_IDENTIFIER:
            return Antlr4Selection(
                [C_IDENTIFIER,
                 Antlr4Symbol("ESCAPED_IDENTIFIER", False)])

    r = rule_by_name(rules, "dpi_import_export")
    replace_item_by_sequence(r.body, match_replace_fn)
Exemple #17
0
def direct_left_recurse_rm(rules, rule_name):
    r = rule_by_name(rules, rule_name)

    if isinstance(r.body, Antlr4Selection):
        choices = r.body
    elif isinstance(r.body, Antlr4Sequence):
        choices = [
            r.body,
        ]
    else:
        raise NotImplementedError()

    # find choices which starts with this rule non terminal
    lr_choices = []
    for c in choices:
        if isinstance(c, Antlr4Sequence):
            first = next(iter_non_visuals(c))
            if isinstance(first, Antlr4Symbol) and first.symbol == rule_name:
                lr_choices.append(c)
        else:
            raise NotImplementedError()

    # remove choices which are causing left recursion
    assert len(lr_choices) >= 1, rule_name
    for lr_choice in lr_choices:
        choices.remove(lr_choice)

    if len(choices) == 0:
        raise NotImplementedError()
    elif len(choices) == 1:
        r.body = choices[0]

    # renaame this rule to rule_item
    r_base_name = r.name + "_item"
    for _r in rules:
        assert r.name != r_base_name, r_base_name
    r.name = r_base_name

    # create new rule which will implement removed choices and also expands to rule_item
    choices_new = Antlr4Selection([])
    for lr_choice in lr_choices:
        first = next(iter_non_visuals(lr_choice))
        assert isinstance(first, Antlr4Symbol) and first.symbol == rule_name
        repl = Antlr4Symbol(r_base_name, False)
        _iterate_everything_except_first_and_replace_first(lr_choice, repl)

        if not choices_new:
            lr_choice.insert(0, Antlr4Newline())
            lr_choice.insert(1, Antlr4Indent(1))
        choices_new.append(lr_choice)

    body_new = choices_new[0] if len(choices_new) == 1 else choices_new
    r_new = Antlr4Rule(rule_name, body_new)
    rules.insert(rules.index(r), r_new)
Exemple #18
0
def rm_ambiguity(rules):
    rule = rule_by_name(rules, "variable_decl_assignment")
    to_repl = Antlr4Option(
        Antlr4Sequence(
            [Antlr4Symbol("ASSIGN", False),
             Antlr4Symbol("class_new", False)]))

    def match_replace_fn(o):
        if o == to_repl:
            return o.body

    replace_item_by_sequence(rule, match_replace_fn)
Exemple #19
0
def rm_semi_from_cross_body_item(rules):
    """
    Because SEMI is already part of cross_body_item
    """
    rule = rule_by_name(rules, "cross_body")
    semi = Antlr4Symbol("SEMI", False)

    def match_replace_fn(o):
        if o == semi:
            return Antlr4Sequence([])

    replace_item_by_sequence(rule.body[0], match_replace_fn)
Exemple #20
0
def subroutine_call_rm_lr(rules):
    r = rule_by_name(rules, "subroutine_call")
    assert isinstance(r.body, Antlr4Selection)
    c = r.body[2]
    _body = list(iter_non_visuals(c))
    assert _body[-1].symbol == "method_call_body", _body[-1].symbol
    start: Antlr4Selection = _body[0]
    start.clear()
    start.extend([
        Antlr4Symbol("primary_no_cast_no_call", False),
        Antlr4Symbol("cast", False),
        Antlr4Symbol("implicit_class_handle", False)
    ])
Exemple #21
0
def replace_same_rules(rules, rules_to_replace: List[str], replacement: str):
    r = None
    for name in rules_to_replace:
        _r = rule_by_name(rules, name)
        if r is None:
            r = _r
        else:
            assert r.body == _r.body or r.body.toAntlr4() == _r.body.toAntlr4(), (r, _r)
        rules.remove(_r)

    for rule in rules:
        for symbol_name in rules_to_replace:
            _replace_symbol_in_rule(rule, symbol_name, replacement, False)
Exemple #22
0
def optimize_primary(rules):
    primary_no_cast_no_call = rule_by_name(rules, "primary_no_cast_no_call")

    def assert_eq(index, s):
        elm = Antlr4parser().from_str(s)
        assert (primary_no_cast_no_call.body[index].eq_relaxed(elm)
            ), primary_no_cast_no_call.body[index]

    assert_eq(5, "package_or_class_scoped_hier_id_with_const_select select")
    assert_eq(8, "let_expression")  # is just call
    primary_no_cast_no_call.body[5] = Antlr4parser().from_str("""
        package_or_class_scoped_hier_id_with_select
    """)
    del primary_no_cast_no_call.body[8]
Exemple #23
0
def fix_implicit_data_type(rules):
    r = rule_by_name(rules, "implicit_data_type")
    # : (signing)? (packed_dimension)*
    # ->
    # : signing (packed_dimension)*
    # | (packed_dimension)+
    # ;
    r.body = Antlr4Selection([
        Antlr4Sequence([
            Antlr4Symbol("signing", False),
            Antlr4Iteration(Antlr4Symbol("packed_dimension", False))
        ]),
        Antlr4Iteration(Antlr4Symbol("packed_dimension", False), positive=True)
    ])
def selection_extract_common(rules, rule_name_a, rule_name_b, new_rule_name):
    """
    a0: a b c
    b0: a b d
    -> 
    new_rule_name: a b
    a0: new_rule_name c
    b0: new_rule_name d
    """

    a = rule_by_name(rules, rule_name_a)
    b = rule_by_name(rules, rule_name_b)
    assert isinstance(a.body, Antlr4Sequence), a
    assert isinstance(b.body, Antlr4Sequence), b
    i = 0
    for i, (_a, _b) in enumerate(zip(a.body, b.body)):
        if _a != _b:
            break
    assert i > 1
    body = Antlr4Sequence(a.body[:i])
    c = Antlr4Rule(new_rule_name, body)
    rules.insert(rules.index(a), c)
    a.body[:i] = [Antlr4Symbol(new_rule_name, False)]
    b.body[:i] = [Antlr4Symbol(new_rule_name, False)]
Exemple #25
0
def wrap_in_lexer_mode(rules, mode_name, enter_tokens, exit_tokens, tokens,
                       shared_tokens):
    for enter_token in enter_tokens:
        enter_rule = rule_by_name(rules, enter_token)
        enter_rule.lexer_actions.append(Antlr4LexerAction.pushMode(mode_name))

    for t_name in sorted(tokens.union(shared_tokens)):
        t_rule = rule_by_name(rules, t_name)
        if t_name in shared_tokens:
            # copy the rule
            # translate mode specific token to a original token
            actions = deepcopy(t_rule.lexer_actions)
            if not Antlr4LexerAction.skip() in actions:
                actions.append(Antlr4LexerAction.type(t_name))
            mode_specific_t_rule = Antlr4Rule(mode_name + "_" + t_name,
                                              deepcopy(t_rule.body),
                                              lexer_mode=mode_name,
                                              lexer_actions=actions)
            rules.append(mode_specific_t_rule)
            t_rule = mode_specific_t_rule

        t_rule.lexer_mode = mode_name
        if t_name in sorted(exit_tokens):
            t_rule.lexer_actions.append(Antlr4LexerAction.popMode())
Exemple #26
0
def solve_left_recurse_and_op_precedence_for_constant_expression(rules):
    # constant_expression:
    #       constant_primary
    #       | unary_operator ( attribute_instance )* constant_primary
    #       | constant_expression binary_operator ( attribute_instance )* constant_expression
    #       | constant_expression QUESTIONMARK ( attribute_instance )* constant_expression COLON constant_expression;

    c_expression_0 = extract_option_as_rule(rules, "constant_expression",
                                            [0, 1], "constant_expression_0")

    # constant_expression_0:
    #       constant_primary
    #       | unary_operator ( attribute_instance )* constant_primary

    def handle_conditional_fn(bin_op_choices, current_expr_rule):
        bin_op_choices.extend([
            Antlr4Symbol(current_expr_rule.name, False),
            Antlr4Iteration(
                Antlr4Sequence([
                    Antlr4Symbol("QUESTIONMARK", False),
                    Antlr4Iteration(Antlr4Symbol("attribute_instance", False)),
                    Antlr4Symbol("constant_expression", False),
                    Antlr4Symbol("COLON", False),
                    Antlr4Symbol("constant_expression", False),
                ]))
        ])

    def handle_inside_fn(bin_op_choices, current_expr_rule):
        pass

    rules.remove(rule_by_name(rules, "constant_expression"))
    current_expr_rule = c_expression_0
    op_group = get_operator_precedence_groups()
    for i, prec_group in enumerate(op_group):
        is_last = i == len(op_group) - 1
        if is_last:
            new_rule_name = "constant_expression"
        else:
            new_rule_name = "constant_expression_%d" % (i + 1)
        current_expr_rule = extract_bin_ops(rules, current_expr_rule,
                                            prec_group, new_rule_name,
                                            handle_conditional_fn,
                                            handle_inside_fn)
Exemple #27
0
def replace_and_rename_same(rules: List[Antlr4Rule],
                            rules_to_replace: List[str],
                            name_of_new_rule: str):
    r = None
    for name in rules_to_replace:
        _r = rule_by_name(rules, name)
        if r is None:
            r = _r
        else:
            assert r.body == _r.body or r.body.toAntlr4() == _r.body.toAntlr4(), (r, _r)
            rules.remove(_r)

    for rule in rules:
        assert r.name != name_of_new_rule, name_of_new_rule 
    r.name = name_of_new_rule

    for rule in rules:
        for symbol_name in rules_to_replace:
            _replace_symbol_in_rule(rule, symbol_name, name_of_new_rule, False)
Exemple #28
0
def move_iteration_up_in_parse_tree(rules, rule_name):
    r = rule_by_name(rules, rule_name)

    # remove ()* from the rule body
    if isinstance(r.body, Antlr4Sequence):
        assert len(r.body) == 1, r.body
        r.body = r.body[0]
    assert isinstance(r.body, Antlr4Iteration) and not r.body.positive
    r.body = r.body.body

    # wrap rule appearence in ()*
    r_symb = Antlr4Symbol(rule_name, False)

    def match_replace_fn(o):
        if o == r_symb:
            return Antlr4Iteration(o, positive=False)

    for r in rules:
        replace_item_by_sequence(r.body, match_replace_fn)
Exemple #29
0
def extract_option_as_rule(rules, rule_name, options_i, new_rule_name):
    r = rule_by_name(rules, rule_name)
    assert isinstance(r.body, Antlr4Selection)
    new_body = Antlr4Selection([])
    for i in options_i:
        new_body.append(r.body[i])

    r.body[options_i[0]] = Antlr4Sequence(
        [Antlr4Symbol(new_rule_name, False),
         Antlr4Newline(),
         Antlr4Indent(1)])
    r.body = Antlr4Selection(
        [x for i, x in enumerate(r.body) if i not in options_i[1:]])

    if len(new_body) == 1:
        new_body = new_body[0]

    new_r = Antlr4Rule(new_rule_name, new_body)
    rules.insert(rules.index(r), new_r)
    return new_r
Exemple #30
0
def replace_symbol_in_rule(rules,
                           rule_name,
                           symbol_name,
                           symbol_name_replace,
                           only_first=False):
    r = rule_by_name(rules, rule_name)

    class FirstFound(Exception):
        pass

    def renamer(obj):
        if isinstance(obj, Antlr4Symbol) and obj.symbol == symbol_name:
            obj.symbol = symbol_name_replace
            if only_first:
                raise FirstFound()

    try:
        r.walk(renamer)
    except FirstFound:
        pass