Example #1
0
def test_language_switch():
    text = [
        "j'aime l'anglais", "j'aime le football", "football",
        "surtout le real madrid", "n'utilise pas google"
    ]

    backend = EspeakBackend('fr-fr', language_switch='keep-flags')
    out = backend.phonemize(text, separator.Separator(), True)
    assert out == [
        'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)',
        'syʁtu lə (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)'
    ]

    # default behavior is to keep the flags
    backend = EspeakBackend('fr-fr')
    out = backend.phonemize(text, separator.Separator(), True)
    assert out == [
        'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)',
        'syʁtu lə (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)'
    ]

    backend = EspeakBackend('fr-fr', language_switch='remove-flags')
    out = backend.phonemize(text, separator.Separator(), True)
    assert out == [
        'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə fʊtbɔːl', 'fʊtbɔːl', 'syʁtu lə ɹiəl madʁid',
        'nytiliz pa ɡuːɡəl'
    ]

    backend = EspeakBackend('fr-fr', language_switch='remove-utterance')
    out = backend.phonemize(text, separator.Separator(), True)
    assert out == ['ʒɛm lɑ̃ɡlɛ']

    with pytest.raises(RuntimeError):
        backend = EspeakBackend('fr-fr', language_switch='foo')
Example #2
0
def test_stress():
    backend = EspeakBackend('en-us', with_stress=False)
    assert backend.phonemize(['hello world'], default_separator,
                             True) == ['həloʊ wɜːld']

    backend = EspeakBackend('en-us', with_stress=True)
    assert backend.phonemize(['hello world'], default_separator,
                             True) == ['həlˈoʊ wˈɜːld']
Example #3
0
def test_bad_text():
    backend = EspeakBackend('en-us')
    text = 'hello world'
    with pytest.raises(RuntimeError) as err:
        backend.phonemize(text, default_separator, True)
    assert 'input text to phonemize() is str' in str(err)

    assert backend.phonemize([text], default_separator,
                             True) == ['həloʊ wɜːld']
Example #4
0
def test_stress():
    backend = EspeakBackend('en-us', with_stress=False)
    assert u'həloʊ wɜːld' == backend.phonemize('hello world',
                                               separator.default_separator,
                                               True)

    backend = EspeakBackend('en-us', with_stress=True)
    assert u'həlˈoʊ wˈɜːld' == backend.phonemize(u'hello world',
                                                 separator.default_separator,
                                                 True)
Example #5
0
def test_phone_separator_simple():
    text = 'The lion and the tiger ran'
    sep = separator.Separator(phone='_')
    backend = EspeakBackend('en-us')

    output = backend.phonemize(text, separator=sep, strip=True)
    expected = 'ð_ə l_aɪə_n æ_n_d ð_ə t_aɪ_ɡ_ɚ ɹ_æ_n'
    assert expected == output

    output = backend.phonemize(text, separator=sep, strip=False)
    expected = 'ð_ə_ l_aɪə_n_ æ_n_d_ ð_ə_ t_aɪ_ɡ_ɚ_ ɹ_æ_n_ '
    assert expected == output
Example #6
0
def test_tie_utf8():
    # NOTE this is a bug in espeak to append ties on (en) language switch
    # flags. For now phonemizer does not fix it.
    backend = EspeakBackend('fr-fr', tie=True)

    # used to be 'bɔ̃͡ʒuʁ '
    assert backend.phonemize(['bonjour']) == ['bɔ̃ʒuʁ ']

    # used to be 'ty ɛm lə (͡e͡n͡)fʊtbɔ͡ːl(͡f͡r͡)'
    assert backend.phonemize(['tu aimes le football'
                              ]) == ['ty ɛm lə (͡e͡n)fʊtbɔːl(͡f͡r) ']

    assert backend.phonemize(['bonjour apple'
                              ]) == ['bɔ̃ʒuʁ (͡e͡n)apə͡l(͡f͡r) ']
Example #7
0
def test_french():
    backend = EspeakBackend('fr-fr')
    text = u'bonjour le monde'
    sep = separator.Separator(word=';eword ', syllable=None, phone=' ')
    expected = u'b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword '
    out = backend.phonemize(text, sep, False)
    assert out == expected
def test_no_switch(policy, caplog):
    text = ["j'aime l'anglais", "tu parles le français"]
    backend = EspeakBackend('fr-fr', language_switch=policy)
    out = backend.phonemize(text, separator=Separator(), strip=True)
    assert out == ['ʒɛm lɑ̃ɡlɛ', 'ty paʁl lə fʁɑ̃sɛ']

    messages = [msg[2] for msg in caplog.record_tuples]
    assert not messages
Example #9
0
def test_tie_simple(caplog, tie, expected):
    backend = EspeakBackend('en-us', tie=tie)
    assert backend.phonemize(['Jackie Chan'],
                             separator=Separator(word=' ',
                                                 phone='_'))[0] == expected

    if tie:
        messages = [msg[2] for msg in caplog.record_tuples]
        assert (
            'cannot use ties AND phone separation, ignoring phone separator'
            in messages)
Example #10
0
def test_arabic():
    backend = EspeakBackend('ar')
    text = u'السلام عليكم'
    sep = separator.Separator()

    # Arabic seems to have changed starting at espeak-ng-1.49.3
    if tuple(EspeakBackend.version().split('.')) >= ('1', '49', '3'):
        expected = u'ʔassalaːm ʕliːkm '
    else:
        expected = u'ʔassalaam ʕaliijkum '
    out = backend.phonemize(text, sep, False)
    assert out == expected
Example #11
0
def test_arabic():
    backend = EspeakBackend('ar')
    text = ['السلام عليكم']
    sep = Separator()

    # Arabic seems to have changed starting at espeak-ng-1.49.3
    if EspeakBackend.version() >= (1, 49, 3):
        expected = ['ʔassalaːm ʕliːkm ']
    else:
        expected = ['ʔassalaam ʕaliijkum ']
    out = backend.phonemize(text, sep, False)
    assert out == expected
def test_language_switch_remove_utterance(caplog, langswitch_text, njobs):
    backend = EspeakBackend('fr-fr', language_switch='remove-utterance')
    out = backend.phonemize(langswitch_text,
                            separator=Separator(),
                            strip=True,
                            njobs=njobs)
    assert out == ['ʒɛm lɑ̃ɡlɛ', '', '', '', '']

    messages = [msg[2] for msg in caplog.record_tuples]
    assert ('removed 4 utterances containing language switches '
            '(applying "remove-utterance" policy)' in messages)

    with pytest.raises(RuntimeError):
        backend = EspeakBackend('fr-fr', language_switch='foo')
def test_language_switch_remove_flags(caplog, langswitch_text, njobs):
    backend = EspeakBackend('fr-fr', language_switch='remove-flags')
    out = backend.phonemize(langswitch_text,
                            separator=Separator(),
                            strip=True,
                            njobs=njobs)
    assert out == [
        'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə fʊtbɔːl', 'fʊtbɔːl', 'syʁtu lə ɹiəl madʁid',
        'nytiliz pa ɡuːɡəl'
    ]

    messages = [msg[2] for msg in caplog.record_tuples]
    assert ('4 utterances containing language switches on lines 2, 3, 4, 5'
            in messages)
    assert ('language switch flags have been removed '
            '(applying "remove-flags" policy)' in messages)
def test_language_switch_default(caplog, langswitch_text, njobs):
    # default behavior is to keep the flags
    backend = EspeakBackend('fr-fr')
    out = backend.phonemize(langswitch_text,
                            separator=Separator(),
                            strip=True,
                            njobs=njobs)
    assert out == [
        'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)',
        'syʁtu lə (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)'
    ]

    messages = [msg[2] for msg in caplog.record_tuples]
    assert ('4 utterances containing language switches on lines 2, 3, 4, 5'
            in messages)
    assert (
        'language switch flags have been kept (applying "keep-flags" policy)'
        in messages)
Example #15
0
def test_english():
    backend = EspeakBackend('en-us')
    text = ['hello world', 'goodbye', 'third line', 'yet another']
    out = backend.phonemize(text, default_separator, True)
    assert out == ['həloʊ wɜːld', 'ɡʊdbaɪ', 'θɜːd laɪn', 'jɛt ɐnʌðɚ']
Example #16
0
def test_english():
    backend = EspeakBackend('en-us')
    text = u'hello world\ngoodbye\nthird line\nyet another'
    out = backend.phonemize(text, separator.default_separator, True)
    assert out == u'həloʊ wɜːld\nɡʊdbaɪ\nθɜːd laɪn\njɛt ɐnʌðɚ'
class TextFrontend(object):
    def __init__(
        self,
        text_cleaners=["basic_cleaners"],
        use_phonemes=True,
        n_jobs=1,
        with_stress=True,
        language="en-us",
    ):
        """
        Text sequencies preprocessor with G2P support.
        :param text_cleaners: text cleaner type:
            * `basic_cleaners`: basic pipeline that lowercases and collapses whitespace without transliteration.
            * `transliteration_cleaners`: pipeline for non-English text that transliterates to ASCII.
            * `english_cleaners`: pipeline for English text, including number and abbreviation expansion.
        :param use_phonemes: file path with phonemes set separated by `|`
        :param n_jobs: number of workers for phonemization
        :param with_stress: set `True` to stress words during phonemization
        """
        self.text_cleaners = text_cleaners
        self.use_phonemes = use_phonemes
        self.n_jobs = n_jobs
        self.with_stress = with_stress
        self.language = language

        CHARS = _GRAPHEMES if not self.use_phonemes else _PHONEMES

        self.SYMBOLS = ([_PAD, _EOS, _SPACE] + _PUNCTUATIONS + ["¡", "¿"] +
                        _NUMBERS + CHARS)

        # Mappings from symbol to numeric ID and vice versa:
        self._symbol_to_id = {s: i for i, s in enumerate(self.SYMBOLS)}
        self._id_to_symbol = {i: s for i, s in enumerate(self.SYMBOLS)}

        self._separator = Separator(word=_WORD_SEP,
                                    syllable="",
                                    phone=_PHONEME_SEP)
        self.p = EspeakBackend(
            self.language,
            punctuation_marks="".join(_PUNCTUATIONS),
            preserve_punctuation=True,
            with_stress=self.with_stress,
        )

    @property
    def nchars(self):
        return len(self.SYMBOLS)

    def _should_keep_token(self, token, token_dict):
        return (token in token_dict and token != _PAD and token != _EOS
                and token != self._symbol_to_id[_PAD]
                and token != self._symbol_to_id[_EOS])

    def graphemes_to_phonemes(self, text):
        """
        Transforms grapheme text representation to phoneme representation.
        :param text: grapheme string
        :return: phoneme string
        """
        # get punctuation map and preserve from errors
        # for punct in _PUNCTUATIONS:
        #     text = text.replace(punct, '{} '.format(punct))
        # punct_mask = [
        #     f'{_PHONEME_SEP}{word[-1]}' \
        #         if word[-1] in _PUNCTUATIONS else ''
        #     for word in text.split(' ') if word != ''
        # ]

        # get phonemes
        phonemes = self.p.phonemize(text,
                                    separator=self._separator,
                                    strip=True,
                                    njobs=self.n_jobs)
        # phonemes = phonemize(
        #     text,
        #     strip=True,
        #     njobs=self.n_jobs,
        #     backend="espeak",
        #     separator=self._separator,
        #     language=lang,
        #     with_stress=self.with_stress,
        #     preserve_punctuation=True,
        #     punctuation_marks="".join(_PUNCTUATIONS),
        # )
        phonemes = phonemes.replace(" ", _WORD_SEP)
        phonemes_new = ""
        for i, c in enumerate(phonemes):
            phonemes_new += c
            if (i < len(phonemes) - 1) and (
                ((c in _PUNCTUATIONS) and (phonemes[i + 1] != _WORD_SEP)) or
                ((phonemes[i + 1] in _PUNCTUATIONS) and (c != _WORD_SEP))):
                phonemes_new += _PHONEME_SEP
        phonemes = phonemes_new
        words = phonemes.split(_WORD_SEP)
        # # add punctuation
        # if len(punct_mask) == len(words):
        #     phonemes = f"{_PHONEME_SEP} {_PHONEME_SEP}".join(
        #         [word + punct_mask[i] for i, word in enumerate(words)]
        #     )
        # else:
        #     phonemes = f"{_PHONEME_SEP} {_PHONEME_SEP}".join(
        #         [word for i, word in enumerate(words)]
        #     )
        phonemes = f"{_PHONEME_SEP} {_PHONEME_SEP}".join(
            [word for i, word in enumerate(words)])
        return phonemes

    def text_to_sequence(self, text, just_map=False):
        """
        Encodes symbolic text into a sequence of character ids, which can be fed to TTS.
        Performs G2P as intermediate step if flag `use_phonemes` is set to `True`.
        :param text: string
        :param return_phonemes: whether to return idx mappings or phonemes itself if phonemes mode.
        :return: 
        """
        text = clean_text(text, cleaner_names=self.text_cleaners)

        if self.use_phonemes:
            if not just_map:
                text = self.graphemes_to_phonemes(text)
            text = text.split(_PHONEME_SEP)
        sequence = [
            self._symbol_to_id[s] for s in text
            if self._should_keep_token(s, self._symbol_to_id)
        ]
        sequence.append(self._symbol_to_id[_EOS])
        return sequence

    def sequence_to_text(self, sequence):
        """
        Decodes numeric sequence of character ids back into symbolic text
        (phoneme representation if flag `use_phonemes` is set to `True`).  
        """
        text = [
            self._id_to_symbol[idx] for idx in sequence
            if self._should_keep_token(idx, self._id_to_symbol)
        ]
        return (_PHONEME_SEP
                if self.use_phonemes else _GRAPHEME_SEP).join(text)

    def __call__(self, text):
        return self.text_to_sequence(text)
Example #18
0
def test_phone_separator(text, expected):
    sep = separator.Separator(phone='_')
    backend = EspeakBackend('en-us')
    output = backend.phonemize(text, separator=sep, strip=True)
    assert output == expected
Example #19
0
def phonemize(text,
              language='en-us',
              backend='espeak',
              separator=default_separator,
              strip=False,
              with_stress=False,
              use_sampa=False,
              language_switch='keep-flags',
              njobs=1,
              logger=logging.getLogger(__name__)):
    """Multilingual text to phonemes converter

    Return a phonemized version of an input `text`, given its
    `language` and a phonemization `backend`.

    Parameters
    ----------
    text (str or list of str): The text to be phonemized. Any empty
       line will be ignored. If `text` is an str, it can be multiline
       (lines being separated by \n). If `text` is a list, each
       element is considered as a separated line. Each line is
       considered as a text utterance.

    language (str): The language code of the input text, must be
      supported by the backend. If `backend` is 'segments', the
      language can be a file with a grapheme to phoneme mapping.

    backend (str): The software backend to use for phonemization, must
      be 'festival' (US English only is supported, coded 'en-us'),
      'espeak' or 'segments'.

    separator (Separator): string separators between phonemes,
      syllables and words, default to separator.default_separator.

    strip (bool): If True, don't output the last word and phone
      separators of a token, default to False.

    with_stress (bool): This option is only valid for the espeak/espeak-ng
      backend. When True the stresses on phonemes are present (stresses
      characters are ˈ'ˌ). When False stresses are removed. Default to False.

    use_sampa (bool): Use the 'sampa' phonetic alphabet (Speech Assessment
      Methods Phonetic Alphabet) instead of 'ipa' (International Phonetic
      Alphabet). This option is only valid for the 'espeak-ng' backend. Default
      to False.

    language_switch (str) : espeak can pronounce some words in another language
      (typically English) when phonemizing a text. This option setups the
      policy to use when such a language switch occurs. Three values are
      available: 'keep-flags' (the default), 'remove-flags' or
      'remove-utterance'. The 'keep-flags' policy keeps the language switching
      flags, for example (en) or (jp), in the output. The 'remove-flags' policy
      removes them and the 'remove-utterance' policy removes the whole line of
      text including a language switch.

    njobs (int): The number of parallel jobs to launch. The input text
      is split in `njobs` parts, phonemized on parallel instances of
      the backend and the outputs are finally collapsed.

    logger (logging.Logger): the logging instance where to send
      messages. If not specified, use the default system logger.

    Returns
    -------
    phonemized text (str or list of str) : The input `text` phonemized
      for the given `language` and `backend`. The returned value has
      the same type of the input text (either a list or a string).

    Raises
    ------
    RuntimeError

      If the `backend` is not valid or is valid but not installed, if the
      `language` is not supported by the `backend`, if `use_sampa`,
      `with_stress` or `language_switch` are used but the backend is not
      'espeak-ng'.

    """
    # ensure the backend is either espeak, festival or segments
    if backend not in ('espeak', 'festival', 'segments'):
        raise RuntimeError(
            '{} is not a supported backend, choose in {}.'.format(
                backend, ', '.join(('espeak', 'festival', 'segments'))))

    # ensure the phonetic alphabet is valid
    if use_sampa is True:
        if backend == 'espeak' and not EspeakBackend.is_espeak_ng():
            raise RuntimeError(  # pragma: nocover
                'sampa alphabet is not supported by espeak, '
                'please install espeak-ng')
        if backend != 'espeak':
            raise RuntimeError(
                'sampa alphabet is only supported by espeak backend')

    # with_stress option only valid for espeak
    if with_stress and backend != 'espeak':
        raise RuntimeError(
            'the "with_stress" option is available for espeak backend only, '
            'but you are using {} backend'.format(backend))

    # language_switch option only valid for espeak
    if language_switch != 'keep-flags' and backend != 'espeak':
        raise RuntimeError(
            'the "language_switch" option is available for espeak backend '
            'only, but you are using {} backend'.format(backend))

    # python2 needs additional utf8 encoding
    if sys.version_info[0] == 2:  # pragma: nocover
        logger.warning(
            'Your are using python2 but unsupported by the phonemizer, '
            'please update to python3')

    # instanciate the requested backend for the given language (raises
    # a RuntimeError if the language is not supported).
    phonemizer = EspeakBackend(language,
                               with_stress=with_stress,
                               use_sampa=use_sampa,
                               language_switch=language_switch,
                               logger=logger)

    # phonemize the input text with the backend
    return phonemizer.phonemize(text,
                                separator=separator,
                                strip=strip,
                                njobs=njobs)