# pylint: disable=unnecessary-comprehension # Mappings from symbol to numeric ID and vice versa: _symbol_to_id = {s: i for i, s in enumerate(symbols)} _id_to_symbol = {i: s for i, s in enumerate(symbols)} _phonemes_to_id = {s: i for i, s in enumerate(phonemes)} _id_to_phonemes = {i: s for i, s in enumerate(phonemes)} _symbols = symbols _phonemes = phonemes # Regular expression matching text enclosed in curly braces: _CURLY_RE = re.compile(r"(.*?)\{(.+?)\}(.*)") # Regular expression matching punctuations, ignoring empty space PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+" # Table for str.translate to fix gruut/TTS phoneme mismatch GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ") def text2phone(text, language, use_espeak_phonemes=False): """Convert graphemes to phonemes. Parameters: text (str): text to phonemize language (str): language of the text Returns: ph (str): phonemes as a string seperated by "|" ph = "ɪ|g|ˈ|z|æ|m|p|ə|l" """
# pylint: disable=unnecessary-comprehension # Mappings from symbol to numeric ID and vice versa: _symbol_to_id = {s: i for i, s in enumerate(symbols)} _id_to_symbol = {i: s for i, s in enumerate(symbols)} _phonemes_to_id = {s: i for i, s in enumerate(phonemes)} _id_to_phonemes = {i: s for i, s in enumerate(phonemes)} _symbols = symbols _phonemes = phonemes # Regular expression matching text enclosed in curly braces: _CURLY_RE = re.compile(r'(.*?)\{(.+?)\}(.*)') # Regular expression matching punctuations, ignoring empty space PHONEME_PUNCTUATION_PATTERN = r'[' + _punctuations.replace(' ', '') + ']+' def text2phone(text, language): '''Convert graphemes to phonemes. For most of the languages, it calls the phonemizer python library that calls espeak/espeak-ng. For chinese mandarin, it calls pypinyin + custom function for phonemizing Parameters: text (str): text to phonemize language (str): language of the text Returns: ph (str): phonemes as a string seperated by "|" ph = "ɪ|g|ˈ|z|æ|m|p|ə|l" ''' # TO REVIEW : How to have a good implementation for this?