def _global_re_handler(match: _re) -> str: f_name = match.group(1) if f_name not in _globals: return match.group(0) try: return _globals[f_name]() except Exception as e: raise RuntimeError('Error while calling lang function {}(): {}'.format(f_name, e))
def is_href_matching(url_string: str, regex: re) -> bool: """ Check if the regex has any match in the url string. :param url_string: URL as string :param regex: Regex used to search URL :return: boolean """ if regex.search(url_query_cleaner(url_string)): return True return False
def _parse_table_lines(lines: List[str], regex: re) -> List[Dict[str, any]]: """Parse the lines using the given regular expression. If a line can't be parsed it is logged and skipped in the output. """ results = [] for line in lines: match = regex.search(line) if not match: _LOGGER.debug("Could not parse line: %s", line) continue results.append(match.groupdict()) return results
def is_querystring_matching(url: URL, regex: re) -> bool: """ Check if the regex has any match in the URL query parameters. :param url: URL object :param regex: Regex used to search query :return: boolean """ for key in url.query: if regex.search(key): return True return False
def parse_files(content_list: list, matcher: re) -> List[str]: result = [] for item in content_list: content = item.read_text('utf-8') if isinstance(item, Path) else item for line in content.splitlines(): m = matcher.match(line) if m: value = m.group(1).strip(' \t\n\r') # replace multiple consequent space/tabs with a single space value = re.sub(r'\s+', ' ', value) result.append(value) return result
def scan_recent(self, pattern: re, timeout=10): if not os.path.isfile(self.path): return False with open(self.path) as fd: end = datetime.now() + timedelta(seconds=timeout) while True: fd.seek(self._last_pos, os.SEEK_SET) for line in fd: if pattern.match(line): return True if datetime.now() > end: raise TimeoutError( f"pattern not found in error log after {timeout} seconds" ) time.sleep(.1) return False
def __set_url(self, url_exists: re): # set the url, title & coords attributes if url_exists: try: prefix = re.sub(r"\?q=", "/place/", self.__mq).replace('+', "\+") path = re.search(fr'{prefix}.+?/data.+?\\"', self.__resp.text).group() path = re.sub(r"\\\\u003d", '=', path) coords = re.search(r"@-?\d\d?\.\d{4,8},-?\d\d?\.\d{4,8}", url_exists.group()).group() self.coords = self.coordinates = tuple(coords.strip('@').split(',')) path = re.sub(r"/data", f"/{coords},17z/data", path) self.url = path[:-11] + "!8m2!3d" + re.sub(r",", "!4d", coords.strip('@')) title = re.search(r"(?<=https://www.google.com/maps/place/)\w+.*?/@", self.url).group().strip("/@") self.title = unquote_plus(title) except AttributeError: pass
def is_subdomain_matching(url: URL, regex: re) -> bool: """ Check if the url subdomain matches the regex :param url: URL object :param regex: regex object :return: boolean """ if not url.host: return False split = url.host.split(".") if len(split) <= 2: return False sub_domains = ".".join(split[:-2]) if regex.search(sub_domains): return True return False
def expand_corpus(contraction: re) -> str: contraction_match = contraction.group(0).lower() expanded_contraction = contraction_map.get(contraction_match) return expanded_contraction
def displaymatch(match: re): if match is None: return None return '<Match: %r, group=%r?>' % (match.group(), match.groups())