Beispiel #1
0
def normalize_emb_code(emb_code: str):
    emb_code = (emb_code.strip().lower().replace(' ', '').replace('-',
                                                                  '').replace(
                                                                      '.', ''))

    emb_code = strip_accents_ascii(emb_code)

    if emb_code.endswith('ce'):
        emb_code = emb_code[:-2] + 'ec'

    return emb_code
Beispiel #2
0
def normalize_emb_code(emb_code: str):
    emb_code = (
        emb_code.strip().lower().replace(" ", "").replace("-", "").replace(".", "")
    )

    emb_code = strip_accents_ascii(emb_code)

    if emb_code.endswith("ce"):
        emb_code = emb_code[:-2] + "ec"

    return emb_code
Beispiel #3
0
def preprocess_product_name(text: str, lower: bool, strip_accent: bool,
                            remove_punct: bool, remove_digit: bool) -> str:
    if strip_accent:
        text = strip_accents_ascii(text)

    if lower:
        text = text.lower()

    if remove_punct:
        text = PUNCTUATION_REGEX.sub(" ", text)

    if remove_digit:
        text = DIGIT_REGEX.sub(" ", text)

    return MULTIPLE_SPACES_REGEX.sub(" ", text)
Beispiel #4
0
def get_tag(brand: str) -> str:
    brand = strip_accents_ascii(brand)
    return brand.lower().replace(" & ", "-").replace(" ",
                                                     "-").replace("'", "-")
Beispiel #5
0
 def normalize_text(text: str) -> str:
     text = text.lower()
     text = strip_accents_ascii(text)
     return text.replace("'", " ").replace("-", " ")