def test_language_switch(): text = [ "j'aime l'anglais", "j'aime le football", "football", "surtout le real madrid", "n'utilise pas google" ] backend = EspeakBackend('fr-fr', language_switch='keep-flags') out = backend.phonemize(text, separator.Separator(), True) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)', 'syʁtu lə (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)' ] # default behavior is to keep the flags backend = EspeakBackend('fr-fr') out = backend.phonemize(text, separator.Separator(), True) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)', 'syʁtu lə (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)' ] backend = EspeakBackend('fr-fr', language_switch='remove-flags') out = backend.phonemize(text, separator.Separator(), True) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə fʊtbɔːl', 'fʊtbɔːl', 'syʁtu lə ɹiəl madʁid', 'nytiliz pa ɡuːɡəl' ] backend = EspeakBackend('fr-fr', language_switch='remove-utterance') out = backend.phonemize(text, separator.Separator(), True) assert out == ['ʒɛm lɑ̃ɡlɛ'] with pytest.raises(RuntimeError): backend = EspeakBackend('fr-fr', language_switch='foo')
def test_stress(): backend = EspeakBackend('en-us', with_stress=False) assert backend.phonemize(['hello world'], default_separator, True) == ['həloʊ wɜːld'] backend = EspeakBackend('en-us', with_stress=True) assert backend.phonemize(['hello world'], default_separator, True) == ['həlˈoʊ wˈɜːld']
def test_bad_text(): backend = EspeakBackend('en-us') text = 'hello world' with pytest.raises(RuntimeError) as err: backend.phonemize(text, default_separator, True) assert 'input text to phonemize() is str' in str(err) assert backend.phonemize([text], default_separator, True) == ['həloʊ wɜːld']
def test_stress(): backend = EspeakBackend('en-us', with_stress=False) assert u'həloʊ wɜːld' == backend.phonemize('hello world', separator.default_separator, True) backend = EspeakBackend('en-us', with_stress=True) assert u'həlˈoʊ wˈɜːld' == backend.phonemize(u'hello world', separator.default_separator, True)
def test_phone_separator_simple(): text = 'The lion and the tiger ran' sep = separator.Separator(phone='_') backend = EspeakBackend('en-us') output = backend.phonemize(text, separator=sep, strip=True) expected = 'ð_ə l_aɪə_n æ_n_d ð_ə t_aɪ_ɡ_ɚ ɹ_æ_n' assert expected == output output = backend.phonemize(text, separator=sep, strip=False) expected = 'ð_ə_ l_aɪə_n_ æ_n_d_ ð_ə_ t_aɪ_ɡ_ɚ_ ɹ_æ_n_ ' assert expected == output
def test_tie_utf8(): # NOTE this is a bug in espeak to append ties on (en) language switch # flags. For now phonemizer does not fix it. backend = EspeakBackend('fr-fr', tie=True) # used to be 'bɔ̃͡ʒuʁ ' assert backend.phonemize(['bonjour']) == ['bɔ̃ʒuʁ '] # used to be 'ty ɛm lə (͡e͡n͡)fʊtbɔ͡ːl(͡f͡r͡)' assert backend.phonemize(['tu aimes le football' ]) == ['ty ɛm lə (͡e͡n)fʊtbɔːl(͡f͡r) '] assert backend.phonemize(['bonjour apple' ]) == ['bɔ̃ʒuʁ (͡e͡n)apə͡l(͡f͡r) ']
def test_french(): backend = EspeakBackend('fr-fr') text = u'bonjour le monde' sep = separator.Separator(word=';eword ', syllable=None, phone=' ') expected = u'b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword ' out = backend.phonemize(text, sep, False) assert out == expected
def test_no_switch(policy, caplog): text = ["j'aime l'anglais", "tu parles le français"] backend = EspeakBackend('fr-fr', language_switch=policy) out = backend.phonemize(text, separator=Separator(), strip=True) assert out == ['ʒɛm lɑ̃ɡlɛ', 'ty paʁl lə fʁɑ̃sɛ'] messages = [msg[2] for msg in caplog.record_tuples] assert not messages
def test_tie_simple(caplog, tie, expected): backend = EspeakBackend('en-us', tie=tie) assert backend.phonemize(['Jackie Chan'], separator=Separator(word=' ', phone='_'))[0] == expected if tie: messages = [msg[2] for msg in caplog.record_tuples] assert ( 'cannot use ties AND phone separation, ignoring phone separator' in messages)
def test_arabic(): backend = EspeakBackend('ar') text = u'السلام عليكم' sep = separator.Separator() # Arabic seems to have changed starting at espeak-ng-1.49.3 if tuple(EspeakBackend.version().split('.')) >= ('1', '49', '3'): expected = u'ʔassalaːm ʕliːkm ' else: expected = u'ʔassalaam ʕaliijkum ' out = backend.phonemize(text, sep, False) assert out == expected
def test_arabic(): backend = EspeakBackend('ar') text = ['السلام عليكم'] sep = Separator() # Arabic seems to have changed starting at espeak-ng-1.49.3 if EspeakBackend.version() >= (1, 49, 3): expected = ['ʔassalaːm ʕliːkm '] else: expected = ['ʔassalaam ʕaliijkum '] out = backend.phonemize(text, sep, False) assert out == expected
def test_language_switch_remove_utterance(caplog, langswitch_text, njobs): backend = EspeakBackend('fr-fr', language_switch='remove-utterance') out = backend.phonemize(langswitch_text, separator=Separator(), strip=True, njobs=njobs) assert out == ['ʒɛm lɑ̃ɡlɛ', '', '', '', ''] messages = [msg[2] for msg in caplog.record_tuples] assert ('removed 4 utterances containing language switches ' '(applying "remove-utterance" policy)' in messages) with pytest.raises(RuntimeError): backend = EspeakBackend('fr-fr', language_switch='foo')
def test_language_switch_remove_flags(caplog, langswitch_text, njobs): backend = EspeakBackend('fr-fr', language_switch='remove-flags') out = backend.phonemize(langswitch_text, separator=Separator(), strip=True, njobs=njobs) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə fʊtbɔːl', 'fʊtbɔːl', 'syʁtu lə ɹiəl madʁid', 'nytiliz pa ɡuːɡəl' ] messages = [msg[2] for msg in caplog.record_tuples] assert ('4 utterances containing language switches on lines 2, 3, 4, 5' in messages) assert ('language switch flags have been removed ' '(applying "remove-flags" policy)' in messages)
def test_language_switch_default(caplog, langswitch_text, njobs): # default behavior is to keep the flags backend = EspeakBackend('fr-fr') out = backend.phonemize(langswitch_text, separator=Separator(), strip=True, njobs=njobs) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)', 'syʁtu lə (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)' ] messages = [msg[2] for msg in caplog.record_tuples] assert ('4 utterances containing language switches on lines 2, 3, 4, 5' in messages) assert ( 'language switch flags have been kept (applying "keep-flags" policy)' in messages)
def test_english(): backend = EspeakBackend('en-us') text = ['hello world', 'goodbye', 'third line', 'yet another'] out = backend.phonemize(text, default_separator, True) assert out == ['həloʊ wɜːld', 'ɡʊdbaɪ', 'θɜːd laɪn', 'jɛt ɐnʌðɚ']
def test_english(): backend = EspeakBackend('en-us') text = u'hello world\ngoodbye\nthird line\nyet another' out = backend.phonemize(text, separator.default_separator, True) assert out == u'həloʊ wɜːld\nɡʊdbaɪ\nθɜːd laɪn\njɛt ɐnʌðɚ'
class TextFrontend(object): def __init__( self, text_cleaners=["basic_cleaners"], use_phonemes=True, n_jobs=1, with_stress=True, language="en-us", ): """ Text sequencies preprocessor with G2P support. :param text_cleaners: text cleaner type: * `basic_cleaners`: basic pipeline that lowercases and collapses whitespace without transliteration. * `transliteration_cleaners`: pipeline for non-English text that transliterates to ASCII. * `english_cleaners`: pipeline for English text, including number and abbreviation expansion. :param use_phonemes: file path with phonemes set separated by `|` :param n_jobs: number of workers for phonemization :param with_stress: set `True` to stress words during phonemization """ self.text_cleaners = text_cleaners self.use_phonemes = use_phonemes self.n_jobs = n_jobs self.with_stress = with_stress self.language = language CHARS = _GRAPHEMES if not self.use_phonemes else _PHONEMES self.SYMBOLS = ([_PAD, _EOS, _SPACE] + _PUNCTUATIONS + ["¡", "¿"] + _NUMBERS + CHARS) # Mappings from symbol to numeric ID and vice versa: self._symbol_to_id = {s: i for i, s in enumerate(self.SYMBOLS)} self._id_to_symbol = {i: s for i, s in enumerate(self.SYMBOLS)} self._separator = Separator(word=_WORD_SEP, syllable="", phone=_PHONEME_SEP) self.p = EspeakBackend( self.language, punctuation_marks="".join(_PUNCTUATIONS), preserve_punctuation=True, with_stress=self.with_stress, ) @property def nchars(self): return len(self.SYMBOLS) def _should_keep_token(self, token, token_dict): return (token in token_dict and token != _PAD and token != _EOS and token != self._symbol_to_id[_PAD] and token != self._symbol_to_id[_EOS]) def graphemes_to_phonemes(self, text): """ Transforms grapheme text representation to phoneme representation. :param text: grapheme string :return: phoneme string """ # get punctuation map and preserve from errors # for punct in _PUNCTUATIONS: # text = text.replace(punct, '{} '.format(punct)) # punct_mask = [ # f'{_PHONEME_SEP}{word[-1]}' \ # if word[-1] in _PUNCTUATIONS else '' # for word in text.split(' ') if word != '' # ] # get phonemes phonemes = self.p.phonemize(text, separator=self._separator, strip=True, njobs=self.n_jobs) # phonemes = phonemize( # text, # strip=True, # njobs=self.n_jobs, # backend="espeak", # separator=self._separator, # language=lang, # with_stress=self.with_stress, # preserve_punctuation=True, # punctuation_marks="".join(_PUNCTUATIONS), # ) phonemes = phonemes.replace(" ", _WORD_SEP) phonemes_new = "" for i, c in enumerate(phonemes): phonemes_new += c if (i < len(phonemes) - 1) and ( ((c in _PUNCTUATIONS) and (phonemes[i + 1] != _WORD_SEP)) or ((phonemes[i + 1] in _PUNCTUATIONS) and (c != _WORD_SEP))): phonemes_new += _PHONEME_SEP phonemes = phonemes_new words = phonemes.split(_WORD_SEP) # # add punctuation # if len(punct_mask) == len(words): # phonemes = f"{_PHONEME_SEP} {_PHONEME_SEP}".join( # [word + punct_mask[i] for i, word in enumerate(words)] # ) # else: # phonemes = f"{_PHONEME_SEP} {_PHONEME_SEP}".join( # [word for i, word in enumerate(words)] # ) phonemes = f"{_PHONEME_SEP} {_PHONEME_SEP}".join( [word for i, word in enumerate(words)]) return phonemes def text_to_sequence(self, text, just_map=False): """ Encodes symbolic text into a sequence of character ids, which can be fed to TTS. Performs G2P as intermediate step if flag `use_phonemes` is set to `True`. :param text: string :param return_phonemes: whether to return idx mappings or phonemes itself if phonemes mode. :return: """ text = clean_text(text, cleaner_names=self.text_cleaners) if self.use_phonemes: if not just_map: text = self.graphemes_to_phonemes(text) text = text.split(_PHONEME_SEP) sequence = [ self._symbol_to_id[s] for s in text if self._should_keep_token(s, self._symbol_to_id) ] sequence.append(self._symbol_to_id[_EOS]) return sequence def sequence_to_text(self, sequence): """ Decodes numeric sequence of character ids back into symbolic text (phoneme representation if flag `use_phonemes` is set to `True`). """ text = [ self._id_to_symbol[idx] for idx in sequence if self._should_keep_token(idx, self._id_to_symbol) ] return (_PHONEME_SEP if self.use_phonemes else _GRAPHEME_SEP).join(text) def __call__(self, text): return self.text_to_sequence(text)
def test_phone_separator(text, expected): sep = separator.Separator(phone='_') backend = EspeakBackend('en-us') output = backend.phonemize(text, separator=sep, strip=True) assert output == expected
def phonemize(text, language='en-us', backend='espeak', separator=default_separator, strip=False, with_stress=False, use_sampa=False, language_switch='keep-flags', njobs=1, logger=logging.getLogger(__name__)): """Multilingual text to phonemes converter Return a phonemized version of an input `text`, given its `language` and a phonemization `backend`. Parameters ---------- text (str or list of str): The text to be phonemized. Any empty line will be ignored. If `text` is an str, it can be multiline (lines being separated by \n). If `text` is a list, each element is considered as a separated line. Each line is considered as a text utterance. language (str): The language code of the input text, must be supported by the backend. If `backend` is 'segments', the language can be a file with a grapheme to phoneme mapping. backend (str): The software backend to use for phonemization, must be 'festival' (US English only is supported, coded 'en-us'), 'espeak' or 'segments'. separator (Separator): string separators between phonemes, syllables and words, default to separator.default_separator. strip (bool): If True, don't output the last word and phone separators of a token, default to False. with_stress (bool): This option is only valid for the espeak/espeak-ng backend. When True the stresses on phonemes are present (stresses characters are ˈ'ˌ). When False stresses are removed. Default to False. use_sampa (bool): Use the 'sampa' phonetic alphabet (Speech Assessment Methods Phonetic Alphabet) instead of 'ipa' (International Phonetic Alphabet). This option is only valid for the 'espeak-ng' backend. Default to False. language_switch (str) : espeak can pronounce some words in another language (typically English) when phonemizing a text. This option setups the policy to use when such a language switch occurs. Three values are available: 'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The 'keep-flags' policy keeps the language switching flags, for example (en) or (jp), in the output. The 'remove-flags' policy removes them and the 'remove-utterance' policy removes the whole line of text including a language switch. njobs (int): The number of parallel jobs to launch. The input text is split in `njobs` parts, phonemized on parallel instances of the backend and the outputs are finally collapsed. logger (logging.Logger): the logging instance where to send messages. If not specified, use the default system logger. Returns ------- phonemized text (str or list of str) : The input `text` phonemized for the given `language` and `backend`. The returned value has the same type of the input text (either a list or a string). Raises ------ RuntimeError If the `backend` is not valid or is valid but not installed, if the `language` is not supported by the `backend`, if `use_sampa`, `with_stress` or `language_switch` are used but the backend is not 'espeak-ng'. """ # ensure the backend is either espeak, festival or segments if backend not in ('espeak', 'festival', 'segments'): raise RuntimeError( '{} is not a supported backend, choose in {}.'.format( backend, ', '.join(('espeak', 'festival', 'segments')))) # ensure the phonetic alphabet is valid if use_sampa is True: if backend == 'espeak' and not EspeakBackend.is_espeak_ng(): raise RuntimeError( # pragma: nocover 'sampa alphabet is not supported by espeak, ' 'please install espeak-ng') if backend != 'espeak': raise RuntimeError( 'sampa alphabet is only supported by espeak backend') # with_stress option only valid for espeak if with_stress and backend != 'espeak': raise RuntimeError( 'the "with_stress" option is available for espeak backend only, ' 'but you are using {} backend'.format(backend)) # language_switch option only valid for espeak if language_switch != 'keep-flags' and backend != 'espeak': raise RuntimeError( 'the "language_switch" option is available for espeak backend ' 'only, but you are using {} backend'.format(backend)) # python2 needs additional utf8 encoding if sys.version_info[0] == 2: # pragma: nocover logger.warning( 'Your are using python2 but unsupported by the phonemizer, ' 'please update to python3') # instanciate the requested backend for the given language (raises # a RuntimeError if the language is not supported). phonemizer = EspeakBackend(language, with_stress=with_stress, use_sampa=use_sampa, language_switch=language_switch, logger=logger) # phonemize the input text with the backend return phonemizer.phonemize(text, separator=separator, strip=strip, njobs=njobs)