def _translate_word(name, lang=''): """ Helper to get word translations Args: name (str): Word name. Returned as the default value if not translated lang (str): Language code, e.g. "en-us" Returns: str: translated version of resource name """ from lingua_franca.internal import resolve_resource_file if not lang: if lang is None: warn(NoneLangWarning) lang = get_default_loc() lang_code = lang if is_supported_full_lang(lang) else \ get_full_lang_code(lang) filename = resolve_resource_file(join("text", lang_code, name + ".word")) if filename: # open the file try: with open(filename, 'r', encoding='utf8') as f: for line in f: word = line.strip() if word.startswith("#"): continue # skip comment lines return word except Exception: pass return name # use resource name as the word
class PortugueseNormalizer(Normalizer): with open(resolve_resource_file("text/pt-pt/normalize.json")) as f: _default_config = json.load(f) @staticmethod def tokenize(utterance): # Split things like 12% utterance = re.sub(r"([0-9]+)([\%])", r"\1 \2", utterance) # Split things like #1 utterance = re.sub(r"(\#)([0-9]+\b)", r"\1 \2", utterance) # Split things like amo-te utterance = re.sub(r"([a-zA-Z]+)(-)([a-zA-Z]+\b)", r"\1 \2 \3", utterance) tokens = utterance.split() if tokens[-1] == '-': tokens = tokens[:-1] return tokens