def reduce_selection_from_optional(rule: Antlr4Rule): """ a? | b? -> (a | b)? """ modified = False def match_replace_fn(o): if isinstance(o, Antlr4Selection): non_optional_items = [] for c in o: if isinstance(c, Antlr4Sequence): c = list(iter_non_visuals(c)) if len(c) != 1: return c = c[0] if not isinstance(c, Antlr4Option): return non_optional_items.append(c.body) o.clear() o.extend(non_optional_items) modified = True return Antlr4Sequence([ Antlr4Option(o), ]) replace_item_by_sequence(rule, match_replace_fn) return modified
def char_options_to_regex(r: Antlr4Rule): """ '0' | '1' -> [01] """ def match_replace_fn(o): if isinstance(o, Antlr4Selection): char_symb_to_replace = [] for orig_c in o: c = orig_c c = list(iter_non_visuals(c)) if len(c) > 1: continue c = c[0] if isinstance(c, Antlr4Symbol) and c.is_terminal and len( c.symbol) == 1: char_symb_to_replace.append((orig_c, c)) if len(char_symb_to_replace) > 1: # build an regex out of them # and replace them by the regex for c, _ in char_symb_to_replace: o.remove(c) re_str = "[%s]" % ("".join( [c._escaped() for _, c in char_symb_to_replace])) re = Antlr4Symbol(re_str, True, is_regex=True) if len(list(iter_non_visuals(o))): o.append(re) else: return Antlr4Sequence([ re, ]) replace_item_by_sequence(r, match_replace_fn)
def reduce_optionality(rules): """ (a? b?)? -> a? b? (a?)? -> a? """ def match_replace_fn(o): if isinstance(o, Antlr4Option): if isinstance(o.body, Antlr4Sequence): items = [] for c in o.body: if isinstance(c, Antlr4Sequence): c = list(iter_non_visuals(c)) if len(c) != 1: return c = c[0] if not isinstance(c, Antlr4Option): return items.append(c) return Antlr4Sequence([ *items, ]) elif isinstance(o.body, Antlr4Option): return o.body for r in rules: replace_item_by_sequence(r, match_replace_fn)
def extract_optional_suffix(r): """ a | a b -> a b? """ modified = False def match_replace_fn(o): if isinstance(o, Antlr4Selection): potential_prefix = None potential_prefix_i = None to_remove = [] for i, c in enumerate(o): if potential_prefix is None: potential_prefix = c potential_prefix_i = i else: # check if the potential_prefix is really a prefix of this rule is_prefix, suffix = is_prefix_of_elem(potential_prefix, c) if is_prefix: # put suffix as a optional to a prefix if list(iter_non_visuals(suffix)): if not isinstance(potential_prefix, Antlr4Sequence): assert o[potential_prefix_i] is potential_prefix potential_prefix = Antlr4Sequence([ potential_prefix, ]) o[potential_prefix_i] = potential_prefix if len(suffix) == 1: suffix = suffix[0] else: suffix = Antlr4Sequence(suffix) potential_prefix.append(Antlr4Option(suffix)) to_remove.append(c) potential_prefix = None potential_prefix_i = None modified = True else: potential_prefix = c potential_prefix_i = i for c in to_remove: o.remove(c) if len(o) == 1: return Antlr4Sequence([ o[0], ]) replace_item_by_sequence(r, match_replace_fn) return modified
def rm_semi_from_cross_body_item(rules): """ Because SEMI is already part of cross_body_item """ rule = rule_by_name(rules, "cross_body") semi = Antlr4Symbol("SEMI", False) def match_replace_fn(o): if o == semi: return Antlr4Sequence([]) replace_item_by_sequence(rule.body[0], match_replace_fn)
def rm_ambiguity(rules): rule = rule_by_name(rules, "variable_decl_assignment") to_repl = Antlr4Option( Antlr4Sequence( [Antlr4Symbol("ASSIGN", False), Antlr4Symbol("class_new", False)])) def match_replace_fn(o): if o == to_repl: return o.body replace_item_by_sequence(rule, match_replace_fn)
def add_interface_class_declaration(rules): """ Because interface_class_definition is not used anywhere (is missing in specified rules) """ intf = Antlr4Symbol("interface_class_declaration", False) cls = Antlr4Symbol("class_declaration", False) def match_replace_fn(o): if o == cls: return Antlr4Selection([o, deepcopy(intf)]) for rule in rules: replace_item_by_sequence(rule, match_replace_fn)
def fix_priority_of__class_scope__package_scope(rules): orig = Antlr4Selection([ Antlr4Symbol("class_scope", False), Antlr4Symbol("package_scope", False) ]) repl = Antlr4Selection([ Antlr4Symbol("package_scope", False), Antlr4Symbol("class_scope", False) ]) def match_replace_fn(o): if o == orig: return deepcopy(repl) for rule in rules: replace_item_by_sequence(rule, match_replace_fn)