def normalize_emb_code(emb_code: str): emb_code = (emb_code.strip().lower().replace(' ', '').replace('-', '').replace( '.', '')) emb_code = strip_accents_ascii(emb_code) if emb_code.endswith('ce'): emb_code = emb_code[:-2] + 'ec' return emb_code
def normalize_emb_code(emb_code: str): emb_code = ( emb_code.strip().lower().replace(" ", "").replace("-", "").replace(".", "") ) emb_code = strip_accents_ascii(emb_code) if emb_code.endswith("ce"): emb_code = emb_code[:-2] + "ec" return emb_code
def preprocess_product_name(text: str, lower: bool, strip_accent: bool, remove_punct: bool, remove_digit: bool) -> str: if strip_accent: text = strip_accents_ascii(text) if lower: text = text.lower() if remove_punct: text = PUNCTUATION_REGEX.sub(" ", text) if remove_digit: text = DIGIT_REGEX.sub(" ", text) return MULTIPLE_SPACES_REGEX.sub(" ", text)
def get_tag(brand: str) -> str: brand = strip_accents_ascii(brand) return brand.lower().replace(" & ", "-").replace(" ", "-").replace("'", "-")
def normalize_text(text: str) -> str: text = text.lower() text = strip_accents_ascii(text) return text.replace("'", " ").replace("-", " ")