def normalize(text, lang="en-us", remove_articles=True): """Prepare a string for parsing This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. Args: text (str): the string to normalize lang (str): the code for the language text is in remove_articles (bool): whether to remove articles (like 'a', or 'the'). True by default. Returns: (str): The normalized string. """ lang_lower = str(lang).lower() if lang_lower.startswith("en"): return normalize_en(text, remove_articles) elif lang_lower.startswith("es"): return normalize_es(text, remove_articles) elif lang_lower.startswith("pt"): return normalize_pt(text, remove_articles) elif lang_lower.startswith("it"): return normalize_it(text, remove_articles) elif lang_lower.startswith("fr"): return normalize_fr(text, remove_articles) elif lang_lower.startswith("sv"): return normalize_sv(text, remove_articles) elif lang_lower.startswith("de"): return normalize_de(text, remove_articles) # TODO: Normalization for other languages return text
def normalize(text, lang="en-us", remove_articles=True): """Prepare a string for parsing This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. Args: text (str): the string to normalize lang (str): the code for the language text is in remove_articles (bool): whether to remove articles (like 'a', or 'the'). True by default. Returns: (str): The normalized string. """ lang_lower = str(lang).lower() if lang_lower.startswith("en"): return normalize_en(text, remove_articles) elif lang_lower.startswith("es"): return normalize_es(text, remove_articles) elif lang_lower.startswith("pt"): return normalize_pt(text, remove_articles) elif lang_lower.startswith("it"): return normalize_it(text, remove_articles) elif lang_lower.startswith("fr"): return normalize_fr(text, remove_articles) elif lang_lower.startswith("sv"): return normalize_sv(text, remove_articles) elif lang_lower.startswith("de"): return normalize_de(text, remove_articles) # TODO: Normalization for other languages LOG.warning('Language "{}" not recognized! Please make sure your ' 'language is one of the following: ' 'en, es, pt, it, fr, sv, de.'.format(lang_lower)) return text
def normalize(text, lang=None, remove_articles=True): """Prepare a string for parsing This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. Args: text (str): the string to normalize lang (str): the BCP-47 code for the language to use, None uses default remove_articles (bool): whether to remove articles (like 'a', or 'the'). True by default. Returns: (str): The normalized string. """ lang_code = get_primary_lang_code(lang) if lang_code == "en": return normalize_en(text, remove_articles) elif lang_code == "es": return normalize_es(text, remove_articles) elif lang_code == "pt": return normalize_pt(text, remove_articles) elif lang_code == "it": return normalize_it(text, remove_articles) elif lang_code == "fr": return normalize_fr(text, remove_articles) elif lang_code == "sv": return normalize_sv(text, remove_articles) elif lang_code == "de": return normalize_de(text, remove_articles) elif lang_code == "da": return normalize_da(text, remove_articles) elif lang_code == "nl": return normalize_nl(text, remove_articles) # TODO: Normalization for other languages _log_unsupported_language( lang_code, ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da', 'nl']) return text
def normalize(text, lang=None, remove_articles=True): """Prepare a string for parsing This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. Args: text (str): the string to normalize lang (str): the BCP-47 code for the language to use, None uses default remove_articles (bool): whether to remove articles (like 'a', or 'the'). True by default. Returns: (str): The normalized string. """ lang_code = get_primary_lang_code(lang) if lang_code == "en": return normalize_en(text, remove_articles) elif lang_code == "es": return normalize_es(text, remove_articles) elif lang_code == "pt": return normalize_pt(text, remove_articles) elif lang_code == "it": return normalize_it(text, remove_articles) elif lang_code == "fr": return normalize_fr(text, remove_articles) elif lang_code == "sv": return normalize_sv(text, remove_articles) elif lang_code == "de": return normalize_de(text, remove_articles) elif lang_code == "da": return normalize_da(text, remove_articles) # TODO: Normalization for other languages _log_unsupported_language(lang_code, ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da']) return text