def get_tag_context(match: re.Match, limit: int = 100, full_tag: bool = False) -> str: ''' Get context of the tag match object. Returns a string with <limit> symbols before match, the match string and <limit> symbols after match. If full_tag == False, matched string is limited too: first <limit>/2 symbols of match and last <limit>/2 symbols of match. ''' source = match.string start = max(0, match.start() - limit) # index of context start end = min(len(source), match.end() + limit) # index of context end span = match.span() # indeces of match (start, end) result = '...' if start != 0 else '' # add ... at beginning if cropped if span[1] - span[ 0] > limit and not full_tag: # if tag contents longer than limit bp1 = match.start() + limit // 2 bp2 = match.end() - limit // 2 result += f'{source[start:bp1]} <...> {source[bp2:end]}' else: result += source[start:end] if end != len(source): # add ... at the end if cropped result += '...' return result
def _replace(s: str, m: re.Match, title_level: int = 5) -> str: lang = m.group("lang") or "base" mark = m.group("mark") title = m.group("title") body = m.group("body").replace("<", "<").replace(">", ">") attrs: List[str] = [] if mark is not None: marks = mark.split(",") hl_lines = ",".join([f'"{x}"' for x in marks]) attrs.append(f"hl_lines=[{hl_lines}]") attrs.append("linenos=table") block = f""" ```{lang} {{{",".join(attrs)}}} {body} ``` """ if title is not None: heading = "#" * title_level block = f"{heading} {title}\n" + block return s[:m.start()] + block + s[m.end():]
def handleMatch(self, m: re.Match, _data: str) -> Tuple[ElementTree.Element, int, int]: elem = ElementTree.Element("blockquote") italics = ElementTree.Element("i") italics.text = m.groupdict()["text"] elem.append(italics) return elem, m.start(0), m.end(0)
def argsub(m: re.Match): """The only syntactic difference between long macros and normal macros in the body is that tokens are interpolated into other names without the normal ## pasting syntax. This could cause problems inside of string literals. This can be disabled with by setting `autopaste=False` when calling `mkmacro`. """ body = m.string start = end = "" if m.start() != 0 and W.match(body[m.start() - 1]): start = " ## " if m.end() != len(body) and W.match(body[m.end()]): end = " ## " return ''.join((start, m.group(0), end))
async def handle_link_in_message(message: types.Message, regexp: re.Match) -> None: """ Catch link in message """ user_id = message.from_user.id message_text = message.text url = regexp.group() start, end = regexp.start(), regexp.end() description = message_text[:start] + message_text[end:] description = description if description else None link_validator = LinkValidator() try: url, description = link_validator.validate_link_url( url), link_validator.validate_link_description(description) except ValidationError as error: text = f'I`ve caught your link but validation error has occured:\n{get_formatted_error_message(error)}' await message.answer(text) else: link = Link(url=url, description=description, user_id=user_id) link_repr = link.short_url_with_description async with async_db_sessionmaker() as session: await db.add_link(session, link) text = md.text('РюЁ I`ve caught your link:', link_repr, 'and added in non-rubric category. ЪўЅ', sep='\n') await message.answer(text, disable_web_page_preview=True)
def handleMatch(self, m: Match, data): rating_parent = etree.Element("span") rating_value = int(m.group(1)) for r in range(0, 5): class_ = "fas fa-star star-checked" if r < rating_value else "far fa-star" rating_item = etree.Element("i", {"class": class_}) rating_parent.append(rating_item) return rating_parent, m.start(0), m.end(0)
def pretty_match(match: Match, string: str, underline_char: str = "^") -> str: """return a string with the match location underlined: >>> import re >>> print(pretty_match(re.search('mange', 'il mange du bacon'), 'il mange du bacon')) il mange du bacon ^^^^^ >>> :type match: _sre.SRE_match :param match: object returned by re.match, re.search or re.finditer :type string: str or unicode :param string: the string on which the regular expression has been applied to obtain the `match` object :type underline_char: str or unicode :param underline_char: character to use to underline the matched section, default to the carret '^' :rtype: str or unicode :return: the original string with an inserted line to underline the match location """ start = match.start() end = match.end() string = _LINE_RGX.sub(linesep, string) start_line_pos = string.rfind(linesep, 0, start) if start_line_pos == -1: start_line_pos = 0 result = [] else: result = [string[:start_line_pos]] start_line_pos += len(linesep) offset = start - start_line_pos underline = " " * offset + underline_char * (end - start) end_line_pos = string.find(linesep, end) if end_line_pos == -1: string = string[start_line_pos:] result.append(string) result.append(underline) else: # mypy: Incompatible types in assignment (expression has type "str", # mypy: variable has type "int") # but it's a str :| end = string[end_line_pos + len(linesep):] # type: ignore string = string[start_line_pos:end_line_pos] result.append(string) result.append(underline) result.append(end) # type: ignore # see previous comment return linesep.join(result).rstrip()
def __init__(self, matcher: Matcher, match: re.Match, group: int): self.matcher = matcher self.match_str = match.group(group+1) self.start = match.start(group+1) self.end = match.end(group+1) self.match_parsed = None if matcher.fmt is not None: try: self.match_parsed = matcher.fmt.parse(self.match_str) except Exception: log.warning('Failed to parse for matcher %s', str(matcher))
def parse_definition(self, re_match: re.Match): """Parses the annotation and populates object fields. Args: re_match: A Match obtained from the Language's call_detection_regex. """ definition_function = re_match.group(1) self.type_name = self.language.annotation_types[definition_function] # Parse the arguments given to the definition function, populating # |unique_id|, |text| and (possibly) |extra_id|. body = re_match.string[re_match.end():] self._parse_body(body)
def from_match(cls, match: re.Match) -> Position: """ Create a Position object from a regex Match object. Args: match (re.Match): A Match object. Returns: position (Position): The corresponding Position object. """ return Position(start_index=match.start(), end_index=match.end(), string_list=[match.group(0)])
def __init__(self, m: re.Match): self.line = line = m.string parts = [] prev = 0 while True: m1, m2 = m.start(), m.end() if s := line[prev:m1]: parts.append(s) parts.append(('pattern', line[m1:m2])) prev = m2 m = m.re.search(line, m2) if not m: if s := line[prev:len(line)]: parts.append(s) break
def handleMatch(self, m: re.Match, _data: str) -> Tuple[ElementTree.Element, int, int]: elem = ElementTree.Element("b") link = ElementTree.Element( "a", attrib={ # This will return the url for the mentioned user taken # from the url database "href": re.sub("<[a-z:]+>", m.group(0)[1:], url_for("views.user")) }, ) link.text = "@" + m.group(0)[1:] elem.append(link) elem.set("class", "markdown-mention") return elem, m.start(0), m.end(0)
def change_path_name(catch: re.Match, change: Decimal, file: Path) -> Path: new_path = file for m, match in enumerate(catch.groups(), start=1): number_format = _get_number_format(match) try: number = Decimal(match) except ValueError: logger.error( '{} is not a string which can be incremented/decremented'. format(match)) continue number += change replacement = _format_number_to_string(number_format, number) new_name = new_path.name[:catch.start( m)] + replacement + new_path.name[catch.end(m):] new_path = new_path.with_name(new_name) return new_path
def handleMatch(self, matches: re.Match, data: str) -> Tuple[etree.Element, int, int]: """Handle a match Args: matches (re.Match): The regular expression match result data (str): The matched text Returns: Tuple[etree.Element, int, int]: The element to insert and the start and end index """ import_str = matches.group(1) element = self._render(import_str) start = matches.start(0) end = matches.end(0) return element, start, end
def handleMatch(self, match: re.Match, data: str) -> Tuple[etree.Element, int, int]: el = etree.Element( "ac:structured-macro", { "ac:name": "status", "ac:schema-version": "1", "ac:macro-id": "d4fcf299-d2f0-4eec-807a-1e4a3c8fe0dc", }, ) etree.SubElement(el, "ac:parameter", { "ac:name": "title" }).text = match.group("title") etree.SubElement(el, "ac:parameter", { "ac:name": "colour" }).text = self._color.capitalize() return el, match.start(0), match.end(0)
def handleMatch( self, m: re.Match, data: str) -> Tuple[Union[ElementTree.Element, str], int, int]: a: Union[ElementTree.Element, str] = "" if m.group(1).strip(): category, name, label = (g.strip() if g else None for g in m.groups()) if label is None: label = name url = self.url_patterns.get(category.lower()) if url is not None: if self.config.get("testing"): full_url = f"url_for('{url}', slug='{slugify(name)}')" else: full_url = url_for(url, slug=slugify(name)) a = ElementTree.Element("a") a.text = label a.set("href", full_url) return a, m.start(0), m.end(0)
def handleMatch( self, m: re.Match, data: str ) -> Tuple[Optional[etree.Element], Optional[int], Optional[int]]: text, index, handled = self.getText(data, m.end(0)) if not handled: return None, None, None href, title, index, handled = self.getLink(data, index) if not handled: return None, None, None link = etree.Element("ac:link", {"ac:card-appearance": "inline"}) etree.SubElement( link, "ri:page", { "ri:content-title": html.escape(href, quote=False), "ri:version-at-save": "1", }, ) etree.SubElement(link, "ac:link-body").text = text return link, m.start(0), index
def _entities_from_regex_match( match: Match, domain: Domain, extractor_name: Optional[Text]) -> List[Dict[Text, Any]]: """Extracts the optional entity information from the given pattern match. If no entities are specified or if the extraction fails, then an empty list is returned. Args: match: a match produced by `self.pattern` domain: the domain extractor_name: A extractor name which should be added for the entities Returns: some list of entities """ entities_str = match.group(ENTITIES) if entities_str is None: return [] try: parsed_entities = json.loads(entities_str) if not isinstance(parsed_entities, dict): raise ValueError( f"Parsed value isn't a json object " f"(instead parser found '{type(parsed_entities)}')") except (JSONDecodeError, ValueError) as e: rasa.shared.utils.io.raise_warning( f"Failed to parse arguments in line '{match.string}'. " f"Failed to decode parameters as a json object (dict). " f"Make sure the intent is followed by a proper json object (dict). " f"Continuing without entities. " f"Error: {e}", docs=DOCS_URL_STORIES, ) parsed_entities = dict() # validate the given entity types if domain: entity_types = set(parsed_entities.keys()) unknown_entity_types = entity_types.difference(domain.entities) if unknown_entity_types: rasa.shared.utils.io.raise_warning( f"Failed to parse arguments in line '{match.string}'. " f"Expected entities from {domain.entities} " f"but found {unknown_entity_types}. " f"Continuing without unknown entity types. ", docs=DOCS_URL_STORIES, ) parsed_entities = { key: value for key, value in parsed_entities.items() if key not in unknown_entity_types } # convert them into the list of dictionaries that we expect entities: List[Dict[Text, Any]] = [] default_properties = {} if extractor_name: default_properties = {EXTRACTOR: extractor_name} for entity_type, entity_values in parsed_entities.items(): if not isinstance(entity_values, list): entity_values = [entity_values] for entity_value in entity_values: entities.append({ ENTITY_ATTRIBUTE_TYPE: entity_type, ENTITY_ATTRIBUTE_VALUE: entity_value, ENTITY_ATTRIBUTE_START: match.start(ENTITIES), ENTITY_ATTRIBUTE_END: match.end(ENTITIES), **default_properties, }) return entities
def explode(pair: re.Match): before = pair.string[:pair.start()] after = pair.string[pair.end():] x, y = [int(d) for d in re.findall("\d+", pair[0])] if (n := LAST_NUMBER.search(before)) is not None: before = before[:n.start(1)] + f"{int(n[1]) + x}" + before[n.end(1):]
def handleMatch(self, m: re.Match, _data: str) -> Tuple[ElementTree.Element, int, int]: elem = ElementTree.Element("span") elem.text = m.groupdict()["text"] elem.set("class", "markdown-highlight") return elem, m.start(0), m.end(0)
def handleMatch(self, m: re.Match, _data: str) -> Tuple[ElementTree.Element, int, int]: elem = ElementTree.Element("s") elem.text = m.groupdict()["text"] return elem, m.start(0), m.end(0)
def to_lowercase(line: str, match: re.Match) -> str: sub = line[match.start():match.end()].lower() return line[:match.start()] + sub + line[match.end():]
def split(number: re.Match): before = number.string[:number.start()] after = number.string[number.end():] n = int(number[0]) return before + f"[{n // 2},{n - n // 2}]" + after
def sub(x: re.Match, s: str): return ''.join([x.string[:x.start(1)], s, x.string[x.end(1):]])