Example #1
0
# pylint: disable=unnecessary-comprehension
# Mappings from symbol to numeric ID and vice versa:
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
_id_to_symbol = {i: s for i, s in enumerate(symbols)}

_phonemes_to_id = {s: i for i, s in enumerate(phonemes)}
_id_to_phonemes = {i: s for i, s in enumerate(phonemes)}

_symbols = symbols
_phonemes = phonemes

# Regular expression matching text enclosed in curly braces:
_CURLY_RE = re.compile(r"(.*?)\{(.+?)\}(.*)")

# Regular expression matching punctuations, ignoring empty space
PHONEME_PUNCTUATION_PATTERN = r"[" + _punctuations.replace(" ", "") + "]+"

# Table for str.translate to fix gruut/TTS phoneme mismatch
GRUUT_TRANS_TABLE = str.maketrans("g", "ɡ")


def text2phone(text, language, use_espeak_phonemes=False):
    """Convert graphemes to phonemes.
    Parameters:
            text (str): text to phonemize
            language (str): language of the text
    Returns:
            ph (str): phonemes as a string seperated by "|"
                    ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
    """
Example #2
0
# pylint: disable=unnecessary-comprehension
# Mappings from symbol to numeric ID and vice versa:
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
_id_to_symbol = {i: s for i, s in enumerate(symbols)}

_phonemes_to_id = {s: i for i, s in enumerate(phonemes)}
_id_to_phonemes = {i: s for i, s in enumerate(phonemes)}

_symbols = symbols
_phonemes = phonemes
# Regular expression matching text enclosed in curly braces:
_CURLY_RE = re.compile(r'(.*?)\{(.+?)\}(.*)')

# Regular expression matching punctuations, ignoring empty space
PHONEME_PUNCTUATION_PATTERN = r'[' + _punctuations.replace(' ', '') + ']+'


def text2phone(text, language):
    '''Convert graphemes to phonemes. For most of the languages, it calls
    the phonemizer python library that calls espeak/espeak-ng. For chinese
    mandarin, it calls pypinyin + custom function for phonemizing
        Parameters:
                text (str): text to phonemize
                language (str): language of the text
        Returns:
                ph (str): phonemes as a string seperated by "|"
                        ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
    '''

    # TO REVIEW : How to have a good implementation for this?