def normalize(text, lang=None, remove_articles=True): """Prepare a string for parsing This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. Args: text (str): the string to normalize lang (str): the BCP-47 code for the language to use, None uses default remove_articles (bool): whether to remove articles (like 'a', or 'the'). True by default. Returns: (str): The normalized string. """ lang_code = get_primary_lang_code(lang) if lang_code == "en": return normalize_en(text, remove_articles) elif lang_code == "es": return normalize_es(text, remove_articles) elif lang_code == "pt": return normalize_pt(text, remove_articles) elif lang_code == "it": return normalize_it(text, remove_articles) elif lang_code == "fr": return normalize_fr(text, remove_articles) elif lang_code == "sv": return normalize_sv(text, remove_articles) elif lang_code == "de": return normalize_de(text, remove_articles) elif lang_code == "da": return normalize_da(text, remove_articles) elif lang_code == "nl": return normalize_nl(text, remove_articles) # TODO: Normalization for other languages _log_unsupported_language( lang_code, ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da', 'nl']) return text
def normalize(text, lang=None, remove_articles=True): """Prepare a string for parsing This function prepares the given text for parsing by making numbers consistent, getting rid of contractions, etc. Args: text (str): the string to normalize lang (str): the BCP-47 code for the language to use, None uses default remove_articles (bool): whether to remove articles (like 'a', or 'the'). True by default. Returns: (str): The normalized string. """ lang_code = get_primary_lang_code(lang) if lang_code == "en": return normalize_en(text, remove_articles) elif lang_code == "es": return normalize_es(text, remove_articles) elif lang_code == "pt": return normalize_pt(text, remove_articles) elif lang_code == "it": return normalize_it(text, remove_articles) elif lang_code == "fr": return normalize_fr(text, remove_articles) elif lang_code == "sv": return normalize_sv(text, remove_articles) elif lang_code == "de": return normalize_de(text, remove_articles) elif lang_code == "da": return normalize_da(text, remove_articles) # TODO: Normalization for other languages _log_unsupported_language(lang_code, ['en', 'es', 'pt', 'it', 'fr', 'sv', 'de', 'da']) return text