def version(): """Return version information for front and backends""" # version of the phonemizer version = ('phonemizer-' + pkg_resources.get_distribution('phonemizer').version) # for each backend, check if it is available or not. If so get its version available = [] unavailable = [] if EspeakBackend.is_available(): available.append('espeak-' + ('ng-' if EspeakBackend.is_espeak_ng() else '') + EspeakBackend.version()) else: # pragma: nocover unavailable.append('espeak') if FestivalBackend.is_available(): available.append('festival-' + FestivalBackend.version()) else: # pragma: nocover unavailable.append('festival') if SegmentsBackend.is_available(): available.append('segments-' + SegmentsBackend.version()) else: # pragma: nocover unavailable.append('segments') # resumes the backends status in the final version string if available: version += '\navailable backends: ' + ', '.join(available) if unavailable: # pragma: nocover version += '\nuninstalled backends: ' + ', '.join(unavailable) return version
def version(): """Return version information for front and backends""" version = ('phonemizer-' + pkg_resources.get_distribution('phonemizer').version) return version + '\navailable backends: ' + ', '.join( ('festival-' + FestivalBackend.version(), ('espeak-' + ('ng-' if EspeakBackend.is_espeak_ng() else '') + EspeakBackend.version()), 'segments-' + SegmentsBackend.version()))
def version(): """Return version information for front and backends""" version = ('phonemizer-' + pkg_resources.get_distribution('phonemizer').version) return version + '\navailable backends: ' + ', '.join( ('festival-' + FestivalBackend.version(), ('espeak-' + ('ng-' if EspeakBackend.is_espeak_ng() else '') + EspeakBackend.version()), 'segments-' + SegmentsBackend.version()))
def test_espeak(njobs): text = ['one two', 'three', 'four five'] out = phonemize(text, language='en-us', backend='espeak', strip=True, njobs=njobs) assert out == [u'wʌn tuː', u'θɹiː', u'foːɹ faɪv'] if EspeakBackend.is_espeak_ng(): out = phonemize(text, language='en-us', backend='espeak', use_sampa=True, strip=True, njobs=njobs) assert out == [u'wVn tu:', u'Tri:', u'fo@ faIv'] out = phonemize(text, language='en-us', backend='espeak', strip=False, njobs=njobs) assert out == [u'wʌn tuː ', u'θɹiː ', u'foːɹ faɪv '] out = phonemize(' '.join(text), language='en-us', backend='espeak', strip=True, njobs=njobs) assert out == ' '.join([u'wʌn tuː', u'θɹiː', u'foːɹ faɪv']) out = phonemize(' '.join(text), language='en-us', backend='espeak', strip=False, njobs=njobs) assert out == ' '.join([u'wʌn tuː', u'θɹiː', u'foːɹ faɪv ']) out = phonemize('\n'.join(text), language='en-us', backend='espeak', strip=True, njobs=njobs) assert out == '\n'.join([u'wʌn tuː', u'θɹiː', u'foːɹ faɪv']) out = phonemize('\n'.join(text), language='en-us', backend='espeak', strip=False, njobs=njobs) assert out == '\n'.join([u'wʌn tuː ', u'θɹiː ', u'foːɹ faɪv '])
def phonemize(text, language='en-us', backend='festival', separator=default_separator, strip=False, with_stress=False, use_sampa=False, language_switch='keep-flags', njobs=1, logger=get_logger()): """Multilingual text to phonemes converter Return a phonemized version of an input `text`, given its `language` and a phonemization `backend`. Parameters ---------- text (str or list of str): The text to be phonemized. Any empty line will be ignored. If `text` is an str, it can be multiline (lines being separated by \n). If `text` is a list, each element is considered as a separated line. Each line is considered as a text utterance. language (str): The language code of the input text, must be supported by the backend. If `backend` is 'segments', the language can be a file with a grapheme to phoneme mapping. backend (str): The software backend to use for phonemization, must be 'festival' (US English only is supported, coded 'en-us'), 'espeak' or 'segments'. separator (Separator): string separators between phonemes, syllables and words, default to separator.default_separator. strip (bool): If True, don't output the last word and phone separators of a token, default to False. with_stress (bool): This option is only valid for the espeak/espeak-ng backend. When True the stresses on phonemes are present (stresses characters are ˈ'ˌ). When False stresses are removed. Default to False. use_sampa (bool): Use the 'sampa' phonetic alphabet (Speech Assessment Methods Phonetic Alphabet) instead of 'ipa' (International Phonetic Alphabet). This option is only valid for the 'espeak-ng' backend. Default to False. language_switch (str) : espeak can pronounce some words in another language (typically English) when phonemizing a text. This option setups the policy to use when such a language switch occurs. Three values are available: 'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The 'keep-flags' policy keeps the language switching flags, for example (en) or (jp), in the output. The 'remove-flags' policy removes them and the 'remove-utterance' policy removes the whole line of text including a language switch. njobs (int): The number of parallel jobs to launch. The input text is split in `njobs` parts, phonemized on parallel instances of the backend and the outputs are finally collapsed. logger (logging.Logger): the logging instance where to send messages. If not specified, use the default system logger. Returns ------- phonemized text (str or list of str) : The input `text` phonemized for the given `language` and `backend`. The returned value has the same type of the input text (either a list or a string). Raises ------ RuntimeError If the `backend` is not valid or is valid but not installed, if the `language` is not supported by the `backend`, if `use_sampa`, `with_stress` or `language_switch` are used but the backend is not 'espeak-ng'. """ # ensure the backend is either espeak, festival or segments if backend not in ('espeak', 'festival', 'segments'): raise RuntimeError( '{} is not a supported backend, choose in {}.'.format( backend, ', '.join(('espeak', 'festival', 'segments')))) # ensure the phonetic alphabet is valid if use_sampa is True: if backend == 'espeak' and not EspeakBackend.is_espeak_ng(): raise RuntimeError( # pragma: nocover 'sampa alphabet is not supported by espeak, ' 'please install espeak-ng') if backend != 'espeak': raise RuntimeError( 'sampa alphabet is only supported by espeak backend') # with_stress option only valid for espeak if with_stress and backend != 'espeak': raise RuntimeError( 'the "with_stress" option is available for espeak backend only, ' 'but you are using {} backend'.format(backend)) # language_switch option only valid for espeak if language_switch != 'keep-flags' and backend != 'espeak': raise RuntimeError( 'the "language_switch" option is available for espeak backend ' 'only, but you are using {} backend'.format(backend)) # instanciate the requested backend for the given language (raises # a RuntimeError if the language is not supported). backends = { b.name(): b for b in (EspeakBackend, FestivalBackend, SegmentsBackend) } if backend == 'espeak': phonemizer = backends[backend](language, with_stress=with_stress, use_sampa=use_sampa, language_switch=language_switch, logger=logger) else: phonemizer = backends[backend](language, logger=logger) # phonemize the input text with the backend return phonemizer.phonemize(text, separator=separator, strip=strip, njobs=njobs)
out3 = phonemize(text2, language='en-us', backend='espeak', strip=True, prepend_text=True) text3 = [o[0] for o in out3] phn3 = [o[1] for o in out3] assert isinstance(phn1, list) assert isinstance(phn2, str) assert os.linesep.join(phn1) == phn2 assert os.linesep.join(phn3) == phn2 assert text3 == text1 @pytest.mark.skipif(not EspeakBackend.is_espeak_ng(), reason='language switch only exists for espeak-ng') def test_lang_switch(): text = ['bonjour apple', 'bonjour toi'] out = phonemize(text, language='fr-fr', backend='espeak', prepend_text=True, language_switch='remove-utterance') assert out == [('bonjour apple', ''), ('bonjour toi', 'bɔ̃ʒuʁ twa ')] @pytest.mark.parametrize('njobs', [2, 4]) def test_espeak(njobs): text = ['one two', 'three', 'four five']
backend = EspeakBackend('en-us', with_stress=True) assert u'həlˈoʊ wˈɜːld' == backend._phonemize_aux( u'hello world', separator.default_separator, True)[0] def test_french(): backend = EspeakBackend('fr-fr') text = u'bonjour le monde' sep = separator.Separator(word=';eword ', syllable=None, phone=' ') expected = [u'b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword '] out = backend._phonemize_aux(text, sep, False) assert out == expected @pytest.mark.skipif( not EspeakBackend.is_espeak_ng(), reason='Arabic is only supported by espeak-ng') def test_arabic(): backend = EspeakBackend('ar') text = u'السلام عليكم' sep = separator.Separator() # Arabic seems to have changed starting at espeak-ng-1.49.3 if tuple(EspeakBackend.version().split('.')) >= ('1', '49', '3'): expected = [u'ʔassalaːm ʕliːkm '] else: expected = [u'ʔassalaam ʕaliijkum '] out = backend._phonemize_aux(text, sep, False) assert out == expected
def phonemize(text, language='en-us', backend='festival', separator=default_separator, strip=False, use_sampa=False, njobs=1, logger=logging.getLogger()): """Multilingual text to phonemes converter Return a phonemized version of an input `text`, given its `language` and a phonemization `backend`. Parameters ---------- text (str or list of str): The text to be phonemized. Any empty line will be ignored. If `text` is an str, it can be multiline (lines being separated by \n). If `text` is a list, each element is considered as a separated line. Each line is considered as a text utterance. language (str): The language code of the input text, must be supported by the backend. If `backend` is 'segments', the language can be a file with a grapheme to phoneme mapping. backend (str): The software backend to use for phonemization, must be 'festival' (US English only is supported, coded 'en-us'), 'espeak' or 'segments'. separator (Separator): string separators between phonemes, syllables and words, default to separator.default_separator. strip (bool): If True, don't output the last word and phone separators of a token, default to False. use_sampa (bool): use the 'sampa' phonetic alphabet (Speech Assessment Methods Phonetic Alphabet) instead of 'ipa' (International Phonetic Alphabet). This option is only valid for the 'espeak-ng' backend. Default to False. njobs (int): The number of parallel jobs to launch. The input text is split in `njobs` parts, phonemized on parallel instances of the backend and the outputs are finally collapsed. logger (logging.Logger): the logging instance where to send messages. If not specified, use the default system logger. Returns ------- phonemized text (str or list of str) : The input `text` phonemized for the given `language` and `backend`. The returned value has the same type of the input text (either a list or a string). Raises ------ RuntimeError If the `backend` is not valid or is valid but not installed, if the `language` is not supported by the `backend`, if `use_sampa` is set to True but the backend is not 'espeak-ng'. """ # ensure the backend is either espeak, festival or segments if backend not in ('espeak', 'festival', 'segments'): raise RuntimeError( '{} is not a supported backend, choose in {}.'.format( backend, ', '.join(('espeak', 'festival', 'segments')))) # ensure the phonetic alphabet is valid if use_sampa is True: if backend == 'espeak' and not EspeakBackend.is_espeak_ng(): raise RuntimeError('sampa alphabet is not supported by espeak, ' 'please install espeak-ng') if backend != 'espeak': raise RuntimeError( 'sampa alphabet is only supported by espeak backend') # instanciate the requested backend for the given language (raises # a RuntimeError if the language is not supported). backends = { b.name(): b for b in (EspeakBackend, FestivalBackend, SegmentsBackend) } if backend == 'espeak': phonemizer = backends[backend](language, use_sampa=use_sampa, logger=logger) else: phonemizer = backends[backend](language, logger=logger) # phonemize the input text with the backend return phonemizer.phonemize(text, separator=separator, strip=strip, njobs=njobs)
backend = EspeakBackend('en-us') text = u'hello world\ngoodbye\nthird line\nyet another' out = '\n'.join(backend._phonemize_aux( text, separator.default_separator, True)) assert out == u'həloʊ wɜːld\nɡʊdbaɪ\nθɜːd laɪn\njɛt ɐnʌðɚ' def test_french(): backend = EspeakBackend('fr-fr') text = u'bonjour le monde' sep = separator.Separator(word=';eword ', syllable=None, phone=' ') expected = [u'b ɔ̃ ʒ u ʁ ;eword l ə- ;eword m ɔ̃ d ;eword '] out = backend._phonemize_aux(text, sep, False) assert out == expected @pytest.mark.skipif( not EspeakBackend.is_espeak_ng(), reason='Arabic is only supported by espeak-ng') def test_arabic(): backend = EspeakBackend('ar') text = u'السلام عليكم' sep = separator.Separator() # Arabic seems to have changed starting at espeak-ng-1.49.3 if tuple(EspeakBackend.version().split('.')) >= ('1', '49', '3'): expected = [u'ʔassalaːm ʕliːkm '] else: expected = [u'ʔassalaam ʕaliijkum '] out = backend._phonemize_aux(text, sep, False) assert out == expected