def make_link(m: typing.Match) -> str: url = m.group(1) proto = m.group(2) if require_protocol and not proto: return url # not protocol, no linkify if proto and proto not in permitted_protocols: return url # bad protocol, no linkify href = m.group(1) if not proto: href = "http://" + href # no proto specified, use http if callable(extra_params): params = " " + extra_params(href).strip() else: params = extra_params # clip long urls. max_len is just an approximation max_len = 30 if shorten and len(url) > max_len: before_clip = url if proto: proto_len = len(proto) + 1 + len(m.group(3) or "") # +1 for : else: proto_len = 0 parts = url[proto_len:].split("/") if len(parts) > 1: # Grab the whole host part plus the first bit of the path # The path is usually not that interesting once shortened # (no more slug, etc), so it really just provides a little # extra indication of shortening. url = ( url[:proto_len] + parts[0] + "/" + parts[1][:8].split("?")[0].split(".")[0] ) if len(url) > max_len * 1.5: # still too long url = url[:max_len] if url != before_clip: amp = url.rfind("&") # avoid splitting html char entities if amp > max_len - 5: url = url[:amp] url += "..." if len(url) >= len(before_clip): url = before_clip else: # full url is visible on mouse-over (for those who don't # have a status bar, such as Safari by default) params += ' title="%s"' % href return u'<a href="%s"%s>%s</a>' % (href, params, url)
def _replacement_map_func(match_obj: Match) -> str: """Maps the replace_from match to the replace_to string. :param match_obj: The replacement character as a regex match object, to be used as a key. return: The matching value, a string from the replacements dictionary. """ # Preserve the spacing in group one, but swap the matched char(s) # with their replacement from the dict return match_obj.group(1) + replacement_dict[match_obj.group(2)]
def _from_match( cls, match: Match, *, origin: Optional[str] = None, ) -> 'Target': path = RecordPath(match.group('path')) flags_group = match.group('flags') try: flags = FlagContainer.split_flags_string( flags_group, relative_flags=False ) except FlagError as error: raise TargetError( "Error while parsing target '{}' flags." .format(match.group(0))) from error if any(flag.startswith('-') for flag in flags): raise RuntimeError return cls(path, flags, origin=origin)
def _derive_from_match( self, match: Match, *, origin: Optional[str] = None, ) -> 'Target': try: subpath = RecordPath(self.path, match.group('path')) except RecordPathError as error: raise TargetError(self, match.group(0)) from error flags_group = match.group('flags') try: flags = FlagContainer.split_flags_string(flags_group) except FlagError as error: raise TargetError(self, match.group(0)) from error subflags = self.flags.delta_mixed(flags=flags, origin=origin) return self.__class__(subpath, subflags)
def escape_repl(m: Match, prefix: str) -> str: """Translate a string escape sequence, e.g. \t -> the tab character. Assume that the Match object is from escape_re. """ seq = m.group(1) if len(seq) == 1 and seq in escape_map: # Single-character escape sequence, e.g. \n. return escape_map[seq] elif seq.startswith('x'): # Hexadecimal sequence \xNN. return chr(int(seq[1:], 16)) elif seq.startswith('u'): # Unicode sequence \uNNNN. if 'b' not in prefix: return chr(int(seq[1:], 16)) else: return '\\' + seq else: # Octal sequence. ord = int(seq, 8) if 'b' in prefix: # Make sure code is no larger than 255 for bytes literals. ord = ord % 256 return chr(ord)
def handle_thread(self, source: str, match: Match) -> Generator[List[Content], None, None]: """Handles ``thread`` links to 4chan media. Args: source (str): The source url match (Match): The source match regex Yields: list[Content]: A list of various levels of quality content for \ the same source url """ matchdict = match.groupdict() data = self._get_data(matchdict["board"], matchdict["id"]) for post in data.get("posts", []): if "md5" in post: content_list = [] for (post_type, url_path, quality, extension_type) in self._content_configs: # build post_type depending on existing post_type post_type = (f"-{post_type}" if post_type else "") content_uid = ( f'{self.name}-{matchdict["board"]}-' f'{matchdict["id"]}-{post["tim"]}{post_type}') content_fragments = [ furl(self._img_base).add(path=url_path.format( board=matchdict["board"], post=post)).url ] content_extension = (extension_type if extension_type else post["ext"].split(".")[-1]) content_description = None if "com" in post and len(post["com"]) > 0: content_description = HTML(html=post.get("com")).text content_list.append( Content( uid=content_uid, source=source, fragments=content_fragments, extractor=self, extension=content_extension, title=post.get("filename"), description=content_description, quality=quality, uploaded_by=post.get("name"), uploaded_date=datetime.datetime.fromtimestamp( int(post.get("time"))), metadata=post, )) yield content_list
def get_acronym_words_start(phrase: str, match: Match) -> int: """ each acronym match should be preceded by capitalized words that start from the same letters :param phrase: "rompió el silencio tras ser despedido del Canal del Fútbol (CDF). " :param match: "(CDF)" Match object for this example :return: start letter (42 for this case) index or -1 """ proc = UniversalDefinitionsParser.basic_line_processor name = match.group().strip('() ').upper() start = match.start() words = proc.split_text_on_words(phrase[:start]) if len(words) < 2: return -1 mistakes = 0 uppercases = 0 acr_index = len(name) - 1 acr_start = words[-1].start for i in range(len(words) - 1, -1, -1): if words[i].is_separator: continue l = words[i].text[0] l_upper = l.upper() is_upper = l_upper == l if is_upper: uppercases += 1 is_correct = name[acr_index] == l_upper if not is_correct: mistakes += 1 if mistakes > 1: return -1 continue acr_start = words[i].start acr_index -= 1 if acr_index < 0: break return acr_start if uppercases > 1 and acr_index < 0 else -1
def handle(bot: Bot, event: events.TextMessage, match: typing.Match): if event.uid not in Config.whitelist_admin: return # Grab client_uid try: user = bot.exec_("clientgetnamefromdbid", cldbid=match.group(1)) client_uid = user[0]["cluid"] except ts3.query.TS3QueryError: bot.send_message(event.id, "user_not_found") return try: json = fetch_api("account", api_key=match.group(2)) account = models.Account.get_or_create(bot.session, json, match.group(2)) identity: models.Identity = models.Identity.get_or_create( bot.session, client_uid) # Save api key in account account.api_key = match.group(2) account.is_valid = True bot.session.commit() transfer_registration( bot, account, event, is_admin=True, target_identity=identity, target_dbid=match.group(1), ) except InvalidKeyException: logging.info("This seems to be an invalid API key.") bot.send_message(event.id, "invalid_token") return except (RateLimitException, RequestException, ApiErrBadData): bot.send_message(event.id, "error_api")
def replace(match: Match) -> str: s = match.group(0) try: return ESCAPE_DICT[s] except KeyError: n = ord(s) if n < 0x10000: return '\\u%04x' % (n, ) else: # surrogate pair n -= 0x10000 s1 = 0xd800 | ((n >> 10) & 0x3ff) s2 = 0xdc00 | (n & 0x3ff) return '\\u%04x\\u%04x' % (s1, s2)
def run(self, m: Match, file_reader: FileReader) -> bool: if file_reader.status: return False file_reader.status = 'axiom' axiom = Axiom(sideBar=True) axiom.name = m.group(1).strip() file_reader.objects.append(axiom) def normal_line(file_reader: FileReader, line: str) -> None: axiom.append(line) file_reader.normal_line_handler = normal_line file_reader.blank_line_handler = normal_line return True
def run(self, m: Match, file_reader: FileReader) -> bool: if file_reader.status: return False file_reader.status = 'tactic' tactic = Tactic(sideBar=True) tactic.name = m.group(1).strip() file_reader.objects.append(tactic) def normal_line(file_reader: FileReader, line: str) -> None: tactic.append(line) file_reader.normal_line_handler = normal_line file_reader.blank_line_handler = normal_line return True
def hyperlink(match: Match) -> str: end_text = '' objname = match.group(0) if objname.endswith('.'): end_text = objname[-1] objname = objname[:-1] _, short_objname = objname.split('.', 1) # Try importing the string, to make sure it's not pointing at # a symbol that doesn't actually exist. import_string(objname) return '<a href="{}reference.html#{}"><code>{}</code></a>{}'.format( settings.DOCS_URL, objname, short_objname, end_text)
def _is_valid_match(self, match: Match) -> bool: is_valid_tld = False is_ip_url = RegExpUtility.get_group(match, 'IPurl') if not is_ip_url: tld_string = RegExpUtility.get_group(match, 'Tld') tld_matches = self.tld_matcher().find(tld_string) if any(o.start == 0 and o.end == len(tld_string) for o in tld_matches): is_valid_tld = True # For cases like "7.am" or "8.pm" which are more likely time terms. if re.match(self.ambiguous_time_term.re, match.group(0)) is not None: return False return is_valid_tld or is_ip_url
def _get_numbered_group(match: typing.Match, name: str, start: int = 0) -> typing.Optional[str]: groups = match.groupdict() idx = start while True: key = f'{name}{idx}' if key not in groups: return None if groups[key] is not None: return groups[key] idx += 1 return None
def extract_entity_attributes(match: Match) -> EntityAttributes: """Extract the entity attributes, i.e. type, value, etc., from the regex match. Args: match: Regex match to extract the entity attributes from. Returns: EntityAttributes object. """ entity_text = match.groupdict()[GROUP_ENTITY_TEXT] if match.groupdict()[GROUP_ENTITY_DICT]: return extract_entity_attributes_from_dict(entity_text, match) entity_type = match.groupdict()[GROUP_ENTITY_TYPE] if match.groupdict()[GROUP_ENTITY_VALUE]: entity_value = match.groupdict()[GROUP_ENTITY_VALUE] else: entity_value = entity_text return EntityAttributes(entity_type, entity_value, entity_text, None, None)
def wombojify_repl(cls, m: Match) -> str: s_tag = '<img class="emoji emoji--large" alt="' m1_tag = '" title="' m2_tag = '" src="' e_tag = '">' if m.group('animated'): # animated emoji emoji_url = f"https://cdn.discordapp.com/emojis/{m.group('id')}.gif" else: emoji_url = f"https://cdn.discordapp.com/emojis/{m.group('id')}.png" replacement = f"{s_tag}{m.group('name')}{m1_tag}{m.group('name')}{m2_tag}{emoji_url}{e_tag}" return replacement
def _starts_ends_overall( m: Match) -> Tuple[MatchIndexes, MatchIndexes, MatchIndexes]: """ Extracts indices from a match object. Returns (groupstarts, groupends, [overall_start, overall_end]) >>> m = re.match(r'.(.)', 'abc') >>> _starts_ends_overall(m) ([1], [2], [0, 2]) >>> m = re.match(r'.', 'abc') >>> _starts_ends_overall(m) ([], [], [0, 1]) """ overall_start, overall_end = m.span() n_matches = len(m.groups()) spans = [m.span(n) for n in range(1, n_matches + 1)] starts = [span[0] for span in spans] ends = [span[1] for span in spans] return starts, ends, [overall_start, overall_end]
def fixup(m: Match): text = m.group(0) if text[:2] == "&#": # character reference with cl.suppress(ValueError): if text[:3] == "&#x": return chr(int(text[3:-1], 16)) else: return chr(int(text[2:-1])) else: # named entity with cl.suppress(KeyError): text = chr(html.entities.name2codepoint[text[1:-1]]) return text # leave as is
def _upper_zero_group(match: typing.Match, /) -> str: """ Поднимает все символы в верхний регистр у captured-группы `let`. Используется для конвертации snake_case в camelCase. Arguments: match: Регекс-группа, полученная в результате `re.sub` Returns: Ту же букву из группы, но в верхнем регистре """ return match.group("let").upper()
def inline_obj( self, match: Match, lineno: int, end_pattern: Pattern, nodeclass: nodes.TextElement, restore_backslashes: bool = False, ): """Create the node for an inline class, if the end string match can be found.""" string = match.string matchstart = match.start("start") matchend = match.end("start") if self.quoted_start(match): return (string[:matchend], [], string[matchend:], [], "") endmatch = end_pattern.search(string[matchend:]) if endmatch and endmatch.start(1): # 1 or more chars _text = endmatch.string[: endmatch.start(1)] text = unescape(_text, restore_backslashes) textend = matchend + endmatch.end(1) rawsource = unescape(string[matchstart:textend], True) node = nodeclass(rawsource, text) node[0].rawsource = unescape(_text, True) return ( string[:matchstart], [node], string[textend:], [], endmatch.group(1), ) msg = self.reporter.warning( "Inline %s start-string without end-string." % nodeclass.__name__, line=lineno, ) text = unescape(string[matchstart:matchend], True) rawsource = unescape(string[matchstart:matchend], True) prb = self.problematic(text, rawsource, msg) return string[:matchstart], [prb], string[matchend:], [msg], ""
def _convert_entity(m: typing.Match) -> str: if m.group(1) == "#": try: if m.group(2)[:1].lower() == 'x': return chr(int(m.group(2)[1:], 16)) else: return chr(int(m.group(2))) except ValueError: return "&#%s;" % m.group(2) try: return _HTML_UNICODE_MAP[m.group(2)] except KeyError: return "&%s;" % m.group(2)
def _convert_entity(m: typing.Match) -> str: if m.group(1) == "#": try: if m.group(2)[:1].lower() == "x": return chr(int(m.group(2)[1:], 16)) else: return chr(int(m.group(2))) except ValueError: return "&#%s;" % m.group(2) try: return _HTML_UNICODE_MAP[m.group(2)] except KeyError: return "&%s;" % m.group(2)
def get_object_html(cls, match: Match, use_preretrieved_html: bool = False) -> str: """Return the obj's HTML based on a placeholder in the admin.""" if use_preretrieved_html: # Return the pre-retrieved HTML (already included in placeholder) preretrieved_html = match.group(PlaceholderGroups.HTML) if preretrieved_html: return preretrieved_html.strip() quote = cls.objects.get(pk=match.group(PlaceholderGroups.PK)) if isinstance(quote, dict): body = quote['text'] footer = quote.get('citation_html') or quote.get( 'attributee_string') else: body = quote.text.html footer = quote.citation_html or quote.attributee_string return ( f'<blockquote class="blockquote">' f'{body}' f'<footer class="blockquote-footer" style="position: relative;">' f'{footer}' f'</footer>' f'</blockquote>')
def _graft_match( self, graft: pvproject.Graft, fbytes: bytes, match: Match, offset: int, project: 'pvproject.Project', ) -> Tuple[bytes, int]: """ :param graft: a graft with a non-null :attr:`pvproject.Graft.subst` :return: the substituted fbytes """ subst = graft.subst_resolved(project) if subst is not None: mstart, mend = match.span() new_text = match.expand(subst) head = fbytes[:mstart + offset] tail = fbytes[mend + offset:] fbytes = head + new_text + tail offset += len(new_text) - (mend - mstart) return fbytes, offset
def from_match(cls, match: Match, mail_dtime: datetime, ttype: TransType) -> 'Transaction': groups_dict = match.groupdict() transaction = cls.__new__(cls) kwargs = { 'ttype': ttype, 'mail_dtime': mail_dtime, } if 'nat' in groups_dict: kwargs['amount'] = float(groups_dict['nat'].replace('.', '').replace(',', '.')) if 'int' in groups_dict: kwargs['int_amount'] = float(groups_dict['int'].replace('.', '').replace(',', '.')) if 'rate' in groups_dict: kwargs['rate'] = float(groups_dict['rate'].replace('.', '').replace(',', '.')) transaction.__init__(**kwargs) return transaction
async def reassign_callback(event: slack_util.Event, match: Match) -> None: verb = slack_util.VerboseWrapper(event) # Find out our two targets from_name = match.group(1).strip() to_name = match.group(2).strip() # Get them as brothers from_bro = await verb(scroll_util.find_by_name(from_name, MIN_RATIO)) to_bro = await verb(scroll_util.find_by_name(to_name, MIN_RATIO)) # Score by name similarity to the first brother. Don't care if signed off or not, # as we want to be able to transfer even after signoffs (why not, amirite?) def scorer(assign: house_management.JobAssignment): if assign.assignee is not None: r = fuzz.ratio(from_bro.name, assign.assignee.name) if r > MIN_RATIO: return r # Change the assignee async def modifier(context: _ModJobContext): context.assign.assignee = to_bro # Say we did it reassign_msg = "Job {} reassigned from {} to {}".format( context.assign.job.pretty_fmt(), from_bro, to_bro) client.get_slack().reply(event, reassign_msg) # Tell the people reassign_msg = "Job {} reassigned from {} to {}".format( context.assign.job.pretty_fmt(), from_bro, to_bro) await alert_user(from_bro, reassign_msg) await alert_user(to_bro, reassign_msg) # Fire it off await _mod_jobs(event, scorer, modifier)
def process_amp(data: TreeManager, spec: TreeManager, properties: PropertyManager, match: Match, lookup_offset=0) -> str: """ Process Ampersand matches and replace the & expression with the resolved value match: is the regular expression match and groups should 3 match groups &{0} | &({0},{0}) """ # Process the [] operator if match.group()[0] == '[': rematch = re.match(r'\[#([0-9]+)\]', match.group()) # Default to "up 2 levels" which uses data[:-1] to generate the key value for the array t = tuple(data.path[:-int(rematch.groups()[0]) + 1]) if rematch else tuple(data.path[:-1]), tuple( spec.path[:-1]) # Use a default dict in the property class to return the index return properties[t].array_bind[data.current_key] elif match.group()[0] == '\\': # Catch the case where \ is used to escape an operator []@#$& or \ itself return match.group()[1:] ascend = int(match.groups()[0] or match.groups()[1] or 0) - lookup_offset descend = int(match.groups()[2] or 0) if ( match.groups()[2] or '0').isnumeric() else match.groups()[2] # Return the processed &,@ pattern result by ascending and descending the data tree if isinstance(descend, int): if descend == 0: return get_operator_value(data.ascend(ascend), spec.ascend(ascend), properties, match) return properties[data.ascend(ascend).path].matches[descend] elif isinstance(descend, str): # Spec is not defined for string key descent return get_operator_value( data.ascend(ascend - 1)[descend], None, properties, match) elif isinstance(descend, list): return reduce(operator.getitem, [data.ascend(ascend)] + descend) raise JoltException()
def replace_contraction_matches(contraction_match: Match) -> str: """ Replaces contraction matches (used as argument to re.sub). Parameters ---------- contraction_match : re.Match Contraction regex match. Returns ------- match_result : str Fixed string (mapping from contraction match). """ match = contraction_match.group(0).lower() return contractions_dict_lower.get(match)
def _linkify(match: Match, link: Callable[..., str], module: pdoc.Module, _is_pyident=re.compile(r'^[a-zA-Z_]\w*(\.\w+)+$').match, **kwargs): matched = match.group(0) refname = matched.strip('`') dobj = module.find_ident(refname) if isinstance(dobj, pdoc.External): if not _is_pyident(refname): return matched # If refname in documentation has a typo or is obsolete, warn. # XXX: Assume at least the first part of refname, i.e. the package, is correct. module_part = module.find_ident(refname.split('.')[0]) if not isinstance(module_part, pdoc.External): warn('Code reference `{}` in module "{}" does not match any ' 'documented object.'.format(refname, module.refname), ReferenceWarning, stacklevel=3) return link(dobj, **kwargs)
def _expand_dollars(match: T.Match) -> str: group = match.group(1) parts = group.split(".") dollars = int(parts[0]) if parts[0] else 0 cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0 if dollars and cents: dollar_unit = "dollar" if dollars == 1 else "dollars" cent_unit = "cent" if cents == 1 else "cents" return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit) if dollars: dollar_unit = "dollar" if dollars == 1 else "dollars" return "%s %s" % (dollars, dollar_unit) if cents: cent_unit = "cent" if cents == 1 else "cents" return "%s %s" % (cents, cent_unit) return "zero dollars"
def _expand_number(m: Match) -> str: num = int(m.group(0)) if num > 1000 and num < 3000: if num == 2000: return 'two thousand' elif num > 2000 and num < 2010: return 'two thousand ' + _inflect.number_to_words(num % 100) elif num % 100 == 0: return _inflect.number_to_words(num // 100) + ' hundred' else: return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ') else: return _inflect.number_to_words(num, andword='')
def _map_token_to_re(cls, token_match: ReMatch, indices: Set[AnyStr]) -> AnyStr: index, modifier = token_match.group("index", "modifier") if index in indices: cls._raise_on_duplicate_token_index(token_match) else: indices.add(index) regex = f"(?P<{_TOKEN_GROUP_PREFIX}{index}>%s)" if not modifier: return regex % ".*?" if modifier == "G": return regex % ".*" elif modifier.startswith("S"): total_spaces = modifier[1:] return regex % fr"(?:\S*\s\S*){{{total_spaces}}}\S*"
def md_codeblock(match: typing.Match) -> str: """Substitution method to replace markdown code blocks with pygmented HTML. Should be called from substition (sub) regex method. Args: match: matched block Returns: A string containing the highlighted (HTML) code block. """ lang, code = match.groups() try: lexer = pygments.lexers.get_lexer_by_name(lang) except ValueError: lexer = pygments.lexers.TextLexer() return pygments.highlight(code, lexer, FORMATTER)
def datetime_from_string(o: Match) -> datetime.datetime: date = date_from_string(o) time = time_from_string(o) tz = o.group('tz') tzi: Optional[datetime.timezone] if tz == 'Z': tzi = datetime.timezone.utc elif tz: td = datetime.timedelta(hours=int(tz[1:3]), minutes=int(tz[4:6])) if tz[0] == '-': td = -td tzi = datetime.timezone(td) else: tzi = None rv = datetime.datetime(date.year, date.month, date.day, time.hour, time.minute, time.second, time.microsecond, tzi) return rv
def peek_quoted_part(match: Match, start_func: Callable[[Match, Match], int], end_func: Callable[[Match, Match], int], match_prob: int) -> List[DefinitionMatch]: defs = [] text = match.group() quoted_entries = [m for m in CommonDefinitionPatterns.reg_quoted.finditer(text)] if len(quoted_entries) == 0: return defs for entry in quoted_entries: df = DefinitionMatch() df.name = entry.group() df.start = start_func(match, entry) df.end = end_func(match, entry) df.probability = match_prob defs.append(df) return defs
def _instantiate_matched(match: Match, group_name: str, inclusion: str) -> Tuple[str, str]: num_perc_s = group_name.count("%s") matches = [g or "" for g in match.groups()] if len(matches) < num_perc_s: raise RuntimeError("Invalid entry in inventory_logwatch_groups: group name " "%r contains %d times '%%s', but regular expression " "%r contains only %d subexpression(s)." % (group_name, num_perc_s, inclusion, len(matches))) if not matches: return group_name, inclusion for num, group in enumerate(matches): inclusion = eval_regex.instantiate_regex_pattern_once(inclusion, group) group_name = group_name.replace("%%%d" % (num + 1), group) return group_name % tuple(matches[:num_perc_s]), inclusion
def lex_olist(m: Match) -> Optional[Tuple[str, int]]: """ Attempt to parse a numeral on the list item, be it decimal, roman or alphabetical returns list_type, number """ # TODO: support for non-latin alphabet numbering? HTML doesn't seem to support it _, numeral = m.groups() try: return '1', int(numeral) # is it an integer? except ValueError: try: value = from_roman(numeral.upper()) # is it a roman numeral? case = 'i' if numeral.lower() == numeral else 'I' return case, value except InvalidRomanNumeralError: value = 0 # is it just a letter? for char in numeral: if char not in string.ascii_letters: return None value = value * 26 + (string.ascii_lowercase.index(char.lower()) + 1) case = 'a' if numeral.lower() == numeral else 'A' return case, value
def link_to(matchobj: Match) -> str: return r"%s<a href='?%s' class='nocode'>%s</a>%s" % ( matchobj.group(1), self.req_qs(link, use_stored=False), e_html(link), matchobj.group(1))
def from_match(cls, match: Match) -> 'RouteNode': place = match.group('place').strip(QUOTES_AND_WHITESPACE) visited = bool(match.group('strikethrough')) skipped = match.group('skipped') is not None skip_reason = match.group('skip_reason') or ('' if skipped else None) return cls(name=place, visited=visited, skip_reason=skip_reason)
def substitute_one(m:Match): name=m.group(1) if name not in args: return m.group() return self._to_rdf(args[name], prefixes).n3()