def normalize(self, name): name = ascii_text(name) name = category_replace(name, UNICODE_CATEGORIES) if name.upper() == name: name = name.replace(WS, '_') name = name.lower() else: name = stringcase.snakecase(name) return re.sub('_+', '_', name)
def clean_strict(text, boundary=WS): """Super-hardcore string scrubbing.""" # transliterate to ascii text = ascii_text(text) # replace punctuation and symbols text = CHARACTERS_REMOVE_RE.sub('', text) text = category_replace(text) # pad out for company type replacements text = ''.join((boundary, collapse_spaces(text), boundary)) return text
def normalize_strong(text): """Perform heavy normalisation of a given text. The goal of this function is not to retain a readable version of the given string, but rather to yield a normalised version suitable for comparisons and machine analysis. """ text = latinize_text(string_value(text)) if text is None: return text = category_replace(text.lower()) return collapse_spaces(text)
def clean_strict(text: Optional[str], boundary: str = WS) -> Optional[str]: """Super-hardcore string scrubbing.""" # transliterate to ascii text = ascii_text(text) if not isinstance(text, str): return None # replace punctuation and symbols text = CHARACTERS_REMOVE_RE.sub("", text) text = category_replace(text) text = collapse_spaces(text) if text is None: return None # pad out for company type replacements return "".join((boundary, text, boundary))
def normalize(text): text = category_replace(text, replacements=UNICODE_CATEGORIES) text = ascii_text(text) if text is not None: return text.lower()