def test_multibyte_conservative_truncation(): """Ensure truncating a multibyte url slug won't massively shorten it.""" # this string has a comma as the 6th char which will be converted to an # underscore, so if truncation amount isn't restricted, it would result in # a 46-char slug instead of the full 100. original = 'パイソンは、汎用のプログラミング言語である' assert len(convert_to_url_slug(original, 100)) == 100
def test_multibyte_whole_character_truncation(): """Ensure truncation happens at the edge of a multibyte character.""" # each of these characters url-encodes to 3 bytes = 9 characters each, so # only the first character should be included for all lengths from 9 - 17 original = 'コード' for limit in range(9, 18): assert convert_to_url_slug(original, limit) == '%E3%82%B3'
def __init__(self, group: Group, page_name: str, markdown: str, user: User): """Create a new wiki page.""" self.group = group self.page_name = page_name self.slug = convert_to_url_slug(page_name) # prevent possible conflict with url for creating a new page if self.slug == "new_page": raise ValueError("Invalid page name") if self.file_path.exists(): raise ValueError("Wiki page already exists") # create the directory for the group if it doesn't already exist self.file_path.parent.mkdir(mode=0o755, exist_ok=True) self.edit(markdown, user, "Create page")
def add_anchors_to_headings(html: str) -> str: """Replace all heading elements with ones with ids that link to themselves.""" soup = BeautifulSoup(html, features="html5lib") headings = soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]) for heading in headings: # generate an anchor from the string contents of the heading anchor = convert_to_url_slug("".join(heading.strings)) # create a link to that anchor, and put the heading's contents inside it link = soup.new_tag("a", href=f"#{anchor}") link.contents = heading.contents # put that link in a replacement same-level heading with the anchor as id new_heading = soup.new_tag(heading.name, id=anchor) new_heading.append(link) heading.replace_with(new_heading) # html5lib adds <html> and <body> tags around the fragment, strip them back out return "".join([str(tag) for tag in soup.body.children])
def url_slug(self) -> str: """Return the url slug for this topic.""" return convert_to_url_slug(self.title)
def test_multibyte_url_slug(): """Ensure converting/truncating a slug with encoded characters works.""" original = 'Python ist eine üblicherweise höhere Programmiersprache' expected = 'python_ist_eine_%C3%BCblicherweise' assert convert_to_url_slug(original, 45) == expected
def test_url_slug_truncation(): """Ensure a simple url slug truncates as expected.""" original = "Here's another string to truncate." assert convert_to_url_slug(original, 15) == 'heres_another'
def test_url_slug_with_punctuation(): """Ensure url slug conversion with punctuation works as expected.""" original = "Here's a string. It has (some) punctuation!" expected = 'heres_a_string_it_has_some_punctuation' assert convert_to_url_slug(original) == expected
def test_simple_url_slug_conversion(): """Ensure that a simple url slug conversion works as expected.""" assert convert_to_url_slug("A Simple Test") == 'a_simple_test'
def test_url_slug_with_apostrophes(): """Ensure url slugs don't replace apostrophes with underscores.""" original = "Here's what we don’t want as underscores" expected = "heres_what_we_dont_want_as_underscores" assert convert_to_url_slug(original) == expected