def inline_add_rule(self, match: typing.Match): gs = match.groups() if len(gs) != 4: raise ValueError("'add_rule' command expects 4 matched groups.") rule_name = gs[0] rule_re = re.compile(gs[1]) rule_func = gs[2] rule_arg = gs[3] if not rule_arg: rule_arg = None if rule_func == '=': if not rule_arg: raise ValueError( "Empty fourth group on 'format_replace' transformation.") rule_func = transf.format_replace_closure(rule_arg) elif rule_func == '+': if not gs[3]: raise ValueError( "Empty fourth group on 'copy_then_format' transformation.") rule_func = transf.copy_then_format_closure(rule_arg) elif rule_func == '#': if rule_arg: rule_func = transf.comment_then_format_closure( rule_arg, self.comment_begin, self.comment_end) else: rule_func = transf.comment_out_closure(self.comment_begin, self.comment_end) self.add_rule(rule_name, rule_func, rule_re)
def _get_replace_group( match: typing.Match, idx: int, replace_groups: typing.Union[ReplaceGroup, typing.List[str]] ) -> typing.Optional[str]: """Gets the replace group value Args: match (Match): Regex match idx (int): Index of match replace_groups (ReplaceGroup): Replace group Returns: str: Replace group value that matches """ if isinstance(replace_groups, dict): val = replace_groups.get(str(idx)) if val is not None: return val group = get_named_group_at_index(match, idx) if group is not None: if group in replace_groups: return replace_groups[group] for key in replace_groups: if group in str(key).split(','): return replace_groups[key] return _get_group_value(len(match.groups()), replace_groups, idx) if isinstance(replace_groups, list) and idx <= len(replace_groups): return replace_groups[idx - 1] return None
def match_group_replace( match: typing.Match, replace_func: typing.Callable[[typing.Match, int, int], str]) -> str: """Replace groups in match Args: match (Match): Regex match replace_func (function): Takes the match, group index, and replace offset, returns replacement string Returns: str: Replaced string result """ string = match.group(0) result = '' last = 0 for idx in range(1, len(match.groups()) + 1): if match.start(idx) == -1: continue result += string[last:match.start(idx) - match.start()] result += replace_func(match, idx, match.start(idx) - len(result)) last = max(match.end(idx) - match.start(), last) result += string[last:] return result
def inline_command(self, match: typing.Match): gs = match.groups() command = gs[0] arg = gs[1] if len(gs) >= 2 else None cmd = None if arg: arg = arg.strip() cmd = self.enable_rule if command == 'enable' else cmd cmd = self.disable_rule if command == 'disable' else cmd if cmd: rule_names = arg.split(',') for r in rule_names: cmd(r.strip()) elif command == 'skip': self.n_skip = int(arg) elif command == 'discard': if arg == 'on': self.discard = 1 elif arg == 'on+': self.discard = 2 else: self.discard = False else: raise ValueError( "Invalid command with argument '{}'".format(command)) else: cmd = self.on if command == 'on' else cmd cmd = self.off if command == 'off' else cmd if cmd is None: raise ValueError( "Invalid command without arguments '{}'".format(command)) return
def match_to_degrees(match: Match) -> float: """Turn a regex match into a value in degrees. The match object must have four groups corresponding to degrees, minutes, seconds and sign of the zodiac, respectively """ raw_degree, minute, second, sign = match.groups() degree = int(raw_degree) + 30 * ZODIAC_ZET9.index(sign) return dms_to_deg(degree, int(minute), float(second))
async def execute(self, evt: MessageEvent, match: Match) -> None: content = self.template.execute( evt=evt, rule_vars=self.variables, extra_vars={str(i): val for i, val in enumerate(match.groups())}) await evt.client.send_message_event(evt.room_id, self.type or self.template.type, content)
def plain_mention_to_html(match: Match) -> str: puppet = pu.Puppet.find_by_displayname(match.group(2)) if puppet: return (f"{match.group(1)}" f"<a href='https://matrix.to/#/{puppet.mxid}'>" f"{puppet.displayname}" "</a>") return "".join(match.groups())
def python_warning_parser(match: typing.Match) -> LivyLogParseResult: """Special case derived from stdout: Output of `warnings.warn`.""" path, lineno, type_, msg = match.groups() return LivyLogParseResult( created=None, level=logging.WARNING, name=f"stdout.warning.{type_}", message=f"{msg} (from line {lineno} @{path})", )
def python_argerror_parser(match: typing.Match) -> LivyLogParseResult: """Special case derived from stdout: argument error from argparse.""" usage, msg = match.groups() return LivyLogParseResult( created=None, level=logging.ERROR, name="stdout.argerror", message=f"{msg}\n{usage}", )
def replace(match: Match) -> str: mention = match.group() if "<@&" in mention: role_id = int(match.groups()[0]) role = discord.utils.get(guild.roles, id=role_id) if role is not None and role.mentionable: mention = f"@{role.name}" elif "<@" in mention: user_id = int(match.groups()[0]) member = guild.get_member(user_id) if member is not None and not member.bot and channel.permissions_for(member).read_messages: mention = f"@{member.name}" if mention in ("@everyone", "@here"): mention = mention.replace("@", "@\N{ZERO WIDTH SPACE}") return mention
def get_named_group_index_list(match: typing.Match) -> typing.List[typing.Optional[str]]: """Get the names of the groups Args: match (Match): The regex match Returns: list: The names of the groups by index """ group_names: typing.List[typing.Optional[str]] = [None] * (len(match.groups()) + 1) for i in range(1, len(match.groups()) + 1): span = match.span(i) for group in match.groupdict(): if match.span(group) == span: group_names[i] = group break return group_names
def _get_module_name(re_match: Match) -> str: """ Get the name of the module for a match of the folowing structure: #export[i] module_name :param re_match: The regular expression match :return: The name of the module """ return re_match.groups()[0]
def transformation(match: Match, s=spec.decode(self.codec)): symb: dict = match.groupdict() args: list = [match.group(0), *match.groups()] used = set() item = meta.format(s, self.codec, args, symb, True, True, used) for variable in used: symb.pop(variable, None) symb.update(offset=match.start()) for name, value in meta.items(): symb.setdefault(name, value) return self.labelled(item, **symb)
def repl(group: Match): """replace shortocodes with evaluated templates""" match = group.groups()[0] func, args = match.split(' ', 1) args = re.split('(\w+=)', args) args = [a.strip("""'" """) for a in args if a] kwargs = {args[i].strip('='): args[i + 1] for i in range(0, len(args), 2)} try: return Template(shortcodes[func]).render(**kwargs) except KeyError: raise KeyError('shortcode {} not found'.format(func))
def replace_link(match: Match) -> str: groups = match.groups() if is_args: file = url_map[groups[0]] seen.add(file) return f'<a href={repr(file.url)} class="file-link" ' \ f'target="_blank">{groups[0]}</a>' file = url_map[groups[1]] seen.add(file) return f'{groups[0]}<a href={repr(file.url)} class="file-link" ' \ f'target="_blank">{groups[1]}</a>{groups[2]}'
def _replace_clitic(self, match: Match) -> str: # Group 1: - (of -t) # Group 2: t (of -t) # Group 3: - (of clitic) # Group 4: t (the clitic) # Group 5: apostrophe or word boundary g0, g1, g2, g3, g4, g5 = match.groups() before = "" if g0: before = self.char_registry["-"]+"t" return self.space_before + before + self.char_registry["-"] + g4 + g5 + self.space_after
def process_amp(data: TreeManager, spec: TreeManager, properties: PropertyManager, match: Match, lookup_offset=0) -> str: """ Process Ampersand matches and replace the & expression with the resolved value match: is the regular expression match and groups should 3 match groups &{0} | &({0},{0}) """ # Process the [] operator if match.group()[0] == '[': rematch = re.match(r'\[#([0-9]+)\]', match.group()) # Default to "up 2 levels" which uses data[:-1] to generate the key value for the array t = tuple(data.path[:-int(rematch.groups()[0]) + 1]) if rematch else tuple(data.path[:-1]), tuple( spec.path[:-1]) # Use a default dict in the property class to return the index return properties[t].array_bind[data.current_key] elif match.group()[0] == '\\': # Catch the case where \ is used to escape an operator []@#$& or \ itself return match.group()[1:] ascend = int(match.groups()[0] or match.groups()[1] or 0) - lookup_offset descend = int(match.groups()[2] or 0) if ( match.groups()[2] or '0').isnumeric() else match.groups()[2] # Return the processed &,@ pattern result by ascending and descending the data tree if isinstance(descend, int): if descend == 0: return get_operator_value(data.ascend(ascend), spec.ascend(ascend), properties, match) return properties[data.ascend(ascend).path].matches[descend] elif isinstance(descend, str): # Spec is not defined for string key descent return get_operator_value( data.ascend(ascend - 1)[descend], None, properties, match) elif isinstance(descend, list): return reduce(operator.getitem, [data.ascend(ascend)] + descend) raise JoltException()
def _handle_match(html: str, match: Match, nested: bool) -> Tuple[str, int]: start, end = match.start(), match.end() prefix, sigil, text, suffix = match.groups() if nested: text = _convert_formatting(text) tag = tags[sigil] # We don't want to include the whitespace suffix length, as that could be used as the # whitespace prefix right after this formatting block. pos = start + len(prefix) + (2 * len(tag) + 5) + len(text) html = (f"{html[:start]}{prefix}" f"<{tag}>{text}</{tag}>" f"{suffix}{html[end:]}") return html, pos
def generate_specialized_response(parts: Match): # construct specialized response using regular expressions words = parts.group(len(parts.groups())).lower().split() # replace pronouns with correct ones for i, word in enumerate(words): if word in pronouns: words[i] = pronouns[word] # combine parts and print response component = " ".join(words) finished_resp = eliza_resp.replace("{}", component) print(re.sub(r'\s+', ' ', finished_resp))
def get_named_group_index(match: typing.Match, name: str) -> typing.Optional[int]: """Get the index of the named group Args: match (Match): The regex match name (str): The group name Returns: int: The index of the group """ if name in match.groupdict(): span = match.span(name) for i in range(1, len(match.groups()) + 1): if match.span(i) == span: return i return None
def replacer(match: Match) -> str: puppet = pu.Puppet.find_by_displayname(match.group(2)) if puppet: offset = match.start() length = match.end() - offset if puppet.username: entity = MessageEntityMention(offset, length) text = f"@{puppet.username}" else: entity = MessageEntityMentionName(offset, length, user_id=puppet.tgid) text = puppet.displayname entities.append(entity) return text return "".join(match.groups())
def get_named_group_at_index(match: typing.Match, idx: int) -> typing.Optional[str]: """Get the name of the group Args: match (Match): The regex match idx (int): The group index Returns: str: The group name """ if len(match.groups()) >= idx: span = match.span(idx) for group in match.groupdict(): if match.span(group) == span: return group return None
def _instantiate_matched(match: Match, group_name: str, inclusion: str) -> Tuple[str, str]: num_perc_s = group_name.count("%s") matches = [g or "" for g in match.groups()] if len(matches) < num_perc_s: raise RuntimeError("Invalid entry in inventory_logwatch_groups: group name " "%r contains %d times '%%s', but regular expression " "%r contains only %d subexpression(s)." % (group_name, num_perc_s, inclusion, len(matches))) if not matches: return group_name, inclusion for num, group in enumerate(matches): inclusion = eval_regex.instantiate_regex_pattern_once(inclusion, group) group_name = group_name.replace("%%%d" % (num + 1), group) return group_name % tuple(matches[:num_perc_s]), inclusion
def md_codeblock(match: typing.Match) -> str: """Substitution method to replace markdown code blocks with pygmented HTML. Should be called from substition (sub) regex method. Args: match: matched block Returns: A string containing the highlighted (HTML) code block. """ lang, code = match.groups() try: lexer = pygments.lexers.get_lexer_by_name(lang) except ValueError: lexer = pygments.lexers.TextLexer() return pygments.highlight(code, lexer, FORMATTER)
def replace_symbol(chapter_match: Match) -> str: """ Replaces the given symbol with its chapter This function is nested so that it can access the values of the outer function, and it can't have arguments passed in as it is used in an re.sub() call :param chapter_match: The text match to be replaced :return: The resulting symbol """ roman_map = {"i": 1, "ii": 2, "iii": 3, "iv": 4, "v": 5} roman_symbol = chapter_match.groups()[0].lower() classes = [ "ms", "ms-saga", "ms-saga-{}".format(roman_map.get(roman_symbol)) ] if scale is not None: classes.append(f"ms-{scale}") return '<i class="{}"></i>'.format(" ".join(classes))
def get_named_group_index_dict(match: typing.Match) -> typing.Dict[int, str]: """Get the name/index map of the groups Args: match (Match): The regex match Returns: dict: A mapping of indices to names """ group_idx_to_name = {} for group in match.groupdict(): span = match.span(group) for i in range(1, len(match.groups()) + 1): if match.span(i) == span: group_idx_to_name[i] = group break return group_idx_to_name
def convert_match(match: Match) -> List[Tuple[str, Set[str]]]: """Convert an match into a form for comparison. :param match: The interpretation to convert. :returns: A standard form. """ new_seq: List[Tuple[str, Set[str]]] = list() regex = re.compile("(" + "|".join(grammar.STROKES) + ")?(.*)") for group in match.groups(): if group is None: continue i = 0 while i < len(group) and group[i] in string.ascii_uppercase: i += 1 if i > 0: new_seq.append((group[:i], {c for c in group[i:]})) else: new_seq.append((group, set())) return new_seq
def repl(group: Match): href, text = group.groups() if not text: text = href href = slugify( href, regex_subs=[ ( r"[^\w\s-]", "", ), # remove non-alphabetical/whitespace/'-' chars (r"(?u)\A\s*", ""), # strip leading whitespace (r"(?u)\s*\Z", ""), # strip trailing whitespace ( r"[-\s]+", "-", ), # reduce multiple whitespace or '-' to single '-' ], ) return f'<a href="/{href}/">{text}</a>'
def lex_olist(m: Match) -> Optional[Tuple[str, int]]: """ Attempt to parse a numeral on the list item, be it decimal, roman or alphabetical returns list_type, number """ # TODO: support for non-latin alphabet numbering? HTML doesn't seem to support it _, numeral = m.groups() try: return '1', int(numeral) # is it an integer? except ValueError: try: value = from_roman(numeral.upper()) # is it a roman numeral? case = 'i' if numeral.lower() == numeral else 'I' return case, value except InvalidRomanNumeralError: value = 0 # is it just a letter? for char in numeral: if char not in string.ascii_letters: return None value = value * 26 + (string.ascii_lowercase.index(char.lower()) + 1) case = 'a' if numeral.lower() == numeral else 'A' return case, value
def _starts_ends_overall( m: Match) -> Tuple[MatchIndexes, MatchIndexes, MatchIndexes]: """ Extracts indices from a match object. Returns (groupstarts, groupends, [overall_start, overall_end]) >>> m = re.match(r'.(.)', 'abc') >>> _starts_ends_overall(m) ([1], [2], [0, 2]) >>> m = re.match(r'.', 'abc') >>> _starts_ends_overall(m) ([], [], [0, 1]) """ overall_start, overall_end = m.span() n_matches = len(m.groups()) spans = [m.span(n) for n in range(1, n_matches + 1)] starts = [span[0] for span in spans] ends = [span[1] for span in spans] return starts, ends, [overall_start, overall_end]