def replace_any_case(element: NavigableString, target_word: str) -> None: # Replace all instances of the word, but maintaining the same case in # the replacement if len(element) == len(target_word): return if not re.match('.*[a-zA-Z0-9].*', target_word) or ( element.parent and element.parent.name == 'style'): return element.replace_with(BeautifulSoup( re.sub(fr'\b((?![{{}}<>-]){target_word}(?![{{}}<>-]))\b', r'<b>\1</b>', html.escape(element), flags=re.I), 'html.parser') )
def replace_text_with_tag(sub_text: str, repl_tag: Tag, text_tag: bs4.NavigableString, article: Article) -> bs4.NavigableString: #if we can't find the parent, assume it's just the document parent: Tag if text_tag.parent == None or text_tag.parent.name == '[document]': parent = article.content else: parent = text_tag.parent tag_idx = parent.contents.index(text_tag) #replace the matched text with a tag begin, *rest = text_tag.split(sub_text, maxsplit=1) end: str if len(rest): end = rest[0] else: end = "" #convert these strings to tags begin = bs4.NavigableString(begin) end = bs4.NavigableString(end) text_tag.replace_with(begin) parent.insert(tag_idx + 1, repl_tag) parent.insert(tag_idx + 2, end) return end
def highlight(html, highlights): """Highlight part of an HTML documents. :param highlights: Iterable of (start, end) pairs, which are computed over UTF-8 bytes and don't count HTML tags """ highlights = iter(highlights) soup = BeautifulSoup(html, 'html5lib') pos = 0 node = soup highlighting = False try: start, end = next(highlights) while True: if getattr(node, 'contents', None): node = node.contents[0] else: if isinstance(node, NavigableString): nb = len(node.string.encode('utf-8')) while True: if not highlighting and start == pos: highlighting = True elif not highlighting and pos + nb > start: parent = node.parent left = node.string[:start - pos] right = node.string[start - pos:] idx = parent.index(node) node.replace_with(NavigableString(left)) node = NavigableString(right) parent.insert(idx + 1, node) nb -= start - pos pos = start # Code below will do the actual highlighting highlighting = True elif highlighting and pos + nb <= end: newnode = soup.new_tag( 'span', attrs={'class': 'highlight'}, ) node.replace_with(newnode) newnode.append(node) node = newnode if pos + nb == end: highlighting = False start, end = next(highlights) break elif highlighting: parent = node.parent left = node.string[:end - pos] rest = node.string[end - pos:] idx = parent.index(node) newnode = NavigableString(left) node.replace_with(newnode) node = newnode newnode = soup.new_tag( 'span', attrs={'class': 'highlight'}, ) node.replace_with(newnode) newnode.append(node) node = NavigableString(rest) parent.insert(idx + 1, node) nb -= end - pos pos = end highlighting = False start, end = next(highlights) else: break pos += nb while not node.next_sibling: if not node.parent: raise StopIteration node = node.parent node = node.next_sibling except StopIteration: # Remove everything but body body = soup.body soup.clear() soup.append(body) # Remove the body tag itself to only have the contents soup.body.unwrap() # Back to text return str(soup)