def slugify(text, sep='-'): """A simple slug generator.""" text = stringify(text) if text is None: return None text = text.replace(sep, WS) text = normalize(text, ascii=True) if text is None: return None return text.replace(WS, sep)
def normalize(text: Any, lowercase: bool = True, collapse: bool = True, latinize: bool = False, ascii: bool = False, encoding_default: Encoding = DEFAULT_ENCODING, encoding: Optional[str] = None, replace_categories: Categories = UNICODE_CATEGORIES): """The main normalization function for text. This will take a string and apply a set of transformations to it so that it can be processed more easily afterwards. Arguments: * ``lowercase``: not very mysterious. * ``collapse``: replace multiple whitespace-like characters with a single whitespace. This is especially useful with category replacement which can lead to a lot of whitespace. * ``decompose``: apply a unicode normalization (NFKD) to separate simple characters and their diacritics. * ``replace_categories``: This will perform a replacement of whole classes of unicode characters (e.g. symbols, marks, numbers) with a given character. It is used to replace any non-text elements of the input string. """ text = stringify(text, encoding_default=encoding_default, encoding=encoding) if text is None: return if lowercase: # Yeah I made a Python package for this. text = text.lower() if ascii: # A stricter form of transliteration that leaves only ASCII # characters. text = ascii_text(text) elif latinize: # Perform unicode-based transliteration, e.g. of cyricllic # or CJK scripts into latin. text = latinize_text(text) if text is None: return # Perform unicode category-based character replacement. This is # used to filter out whole classes of characters, such as symbols, # punctuation, or whitespace-like characters. text = category_replace(text, replace_categories) if collapse: # Remove consecutive whitespace. text = collapse_spaces(text) return text
def _safe_name(file_name: Optional[str], sep: str) -> Optional[str]: """Convert the file name to ASCII and normalize the string.""" file_name = stringify(file_name) if file_name is None: return None file_name = ascii_text(file_name) file_name = category_replace(file_name, UNICODE_CATEGORIES) file_name = collapse_spaces(file_name) if file_name is None or not len(file_name): return None return file_name.replace(WS, sep)
def slugify(value: Any, sep: str = "-") -> Optional[str]: """A simple slug generator. Slugs are pure ASCII lowercase strings that can be used in URLs an other places where a name has to be machine-safe.""" text = stringify(value) if text is None: return None text = text.replace(sep, WS) # run this first because it'll give better results on special # characters. text = category_replace(text, SLUG_CATEGORIES) text = latinize_text(text, ascii=True) if text is None: return None text = text.lower() text = "".join([c for c in text if c in VALID_CHARS]) text = collapse_spaces(text) if text is None or len(text) == 0: return None return text.replace(WS, sep)