Python FestivalBackendの例、phonemizer.backend.FestivalBackend Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_punctuation.py プロジェクト: bootphon/phonemizer

def test_festival():
    text = 'hello, world!'
    expected1 = 'hhaxlow werld'
    expected2 = 'hhaxlow, werld!'
    expected3 = 'hhaxlow werld '
    expected4 = 'hhaxlow, werld!'

    out1 = FestivalBackend('en-us',
                           preserve_punctuation=False).phonemize([text],
                                                                 strip=True)[0]
    assert out1 == expected1

    out2 = FestivalBackend('en-us',
                           preserve_punctuation=True).phonemize([text],
                                                                strip=True)[0]
    assert out2 == expected2

    out3 = FestivalBackend('en-us', preserve_punctuation=False).phonemize(
        [text], strip=False)[0]
    assert out3 == expected3

    out4 = FestivalBackend('en-us',
                           preserve_punctuation=True).phonemize([text],
                                                                strip=False)[0]
    assert out4 == expected4

コード例 #2

0

ファイルを表示

def version():
    """Return version information for front and backends"""
    # version of the phonemizer
    version = ('phonemizer-' +
               pkg_resources.get_distribution('phonemizer').version)

    # for each backend, check if it is available or not. If so get its version
    available = []
    unavailable = []

    if EspeakBackend.is_available():
        available.append('espeak-' +
                         ('ng-' if EspeakBackend.is_espeak_ng() else '') +
                         EspeakBackend.version())
    else:  # pragma: nocover
        unavailable.append('espeak')

    if FestivalBackend.is_available():
        available.append('festival-' + FestivalBackend.version())
    else:  # pragma: nocover
        unavailable.append('festival')

    if SegmentsBackend.is_available():
        available.append('segments-' + SegmentsBackend.version())
    else:  # pragma: nocover
        unavailable.append('segments')

    # resumes the backends status in the final version string
    if available:
        version += '\navailable backends: ' + ', '.join(available)
    if unavailable:  # pragma: nocover
        version += '\nuninstalled backends: ' + ', '.join(unavailable)

    return version

コード例 #3

0

ファイルを表示

def test_path_good():
    try:
        binary = shutil.which('festival')
        FestivalBackend.set_executable(binary)
        assert FestivalBackend('en-us').executable() == pathlib.Path(binary)
    # restore the festival path to default
    finally:
        FestivalBackend.set_executable(None)

コード例 #4

0

ファイルを表示

def test_path_good():
    try:
        binary = distutils.spawn.find_executable('festival')
        FestivalBackend.set_festival_path(binary)

        test_im()

    # restore the festival path to default
    finally:
        FestivalBackend.set_festival_path(None)

コード例 #5

0

ファイルを表示

def test_path_venv():
    try:
        os.environ['PHONEMIZER_FESTIVAL_EXECUTABLE'] = shutil.which('python')
        with pytest.raises(RuntimeError):
            FestivalBackend('en-us').phonemize(['hello'])
        with pytest.raises(RuntimeError):
            FestivalBackend.version()

        os.environ['PHONEMIZER_FESTIVAL_EXECUTABLE'] = __file__
        with pytest.raises(RuntimeError):
            FestivalBackend.version()

    finally:
        try:
            del os.environ['PHONEMIZER_FESTIVAL_EXECUTABLE']
        except KeyError:
            pass

コード例 #6

0

ファイルを表示

def test_path_venv():
    try:
        os.environ['PHONEMIZER_FESTIVAL_PATH'] = distutils.spawn.find_executable('python')
        with pytest.raises(RuntimeError):
            FestivalBackend('en-us').phonemize('hello')
        with pytest.raises(RuntimeError):
            FestivalBackend.version()

        os.environ['PHONEMIZER_FESTIVAL_PATH'] = __file__
        with pytest.raises(ValueError):
            FestivalBackend.version()

    finally:
        try:
            del os.environ['PHONEMIZER_FESTIVAL_PATH']
        except KeyError:
            pass

コード例 #7

0

ファイルを表示

ファイル: main.py プロジェクト: tronin/phonemizer

def version():
    """Return version information for front and backends"""
    version = ('phonemizer-' +
               pkg_resources.get_distribution('phonemizer').version)

    return version + '\navailable backends: ' + ', '.join(
        ('festival-' + FestivalBackend.version(),
         ('espeak-' + ('ng-' if EspeakBackend.is_espeak_ng() else '') +
          EspeakBackend.version()), 'segments-' + SegmentsBackend.version()))

コード例 #8

0

ファイルを表示

ファイル: main.py プロジェクト: bootphon/phonemizer

def version():
    """Return version information for front and backends"""
    version = ('phonemizer-'
               + pkg_resources.get_distribution('phonemizer').version)

    return version + '\navailable backends: ' + ', '.join(
        ('festival-' + FestivalBackend.version(),
         ('espeak-' + ('ng-' if EspeakBackend.is_espeak_ng() else '')
          + EspeakBackend.version()),
         'segments-' + SegmentsBackend.version()))

コード例 #9

0

ファイルを表示

def test_path_bad():
    try:
        # corrupt the default espeak path, try to use python executable instead
        binary = shutil.which('python')
        FestivalBackend.set_executable(binary)

        with pytest.raises(RuntimeError):
            FestivalBackend('en-us').phonemize(['hello'])
        with pytest.raises(RuntimeError):
            FestivalBackend.version()

        with pytest.raises(RuntimeError):
            FestivalBackend.set_executable(__file__)

    # restore the festival path to default
    finally:
        FestivalBackend.set_executable(None)

コード例 #10

0

ファイルを表示

def test_path_bad():
    try:
        # corrupt the default espeak path, try to use python executable instead
        binary = distutils.spawn.find_executable('python')
        FestivalBackend.set_festival_path(binary)

        with pytest.raises(RuntimeError):
            FestivalBackend('en-us').phonemize('hello')
        with pytest.raises(RuntimeError):
            FestivalBackend.version()

        with pytest.raises(ValueError):
            FestivalBackend.set_festival_path(__file__)

    # restore the festival path to default
    finally:
        FestivalBackend.set_festival_path(None)

コード例 #11

0

ファイルを表示

import shutil

import pytest

from phonemizer.separator import Separator
from phonemizer.backend import FestivalBackend


def _test(text, separator=Separator(word=' ', syllable='|', phone='-')):
    backend = FestivalBackend('en-us')
    # pylint: disable=protected-access
    return backend._phonemize_aux(text, 0, separator, True)


@pytest.mark.skipif(
    FestivalBackend.version() <= (2, 1),
    reason='festival-2.1 gives different results than further versions '
    'for syllable boundaries')
def test_hello():
    assert _test(['hello world']) == ['hh-ax|l-ow w-er-l-d']
    assert _test(['hello', 'world']) == ['hh-ax|l-ow', 'w-er-l-d']


@pytest.mark.parametrize('text', ['', ' ', '  ', '(', '()', '"', "'"])
def test_bad_input(text):
    assert _test(text) == []


def test_quote():
    assert _test(["it's"]) == ['ih-t-s']
    assert _test(["its"]) == ['ih-t-s']

コード例 #12

0

ファイルを表示

def main():
    """Phonemize a text from command-line arguments"""
    args = parse_args()

    # setup a custom path to espeak and festival if required (this must be done
    # before generating the version message)
    if args.espeak_path:
        EspeakBackend.set_espeak_path(args.espeak_path)
    if args.festival_path:
        FestivalBackend.set_festival_path(args.festival_path)

    if args.version:
        print(version.version())
        return

    # configure logging according to --verbose/--quiet options
    verbosity = 'normal'
    if args.verbose:
        verbosity = 'verbose'
    elif args.quiet:
        verbosity = 'quiet'
    log = logger.get_logger(verbosity=verbosity)

    # configure input as a readable stream
    streamin = args.input
    if isinstance(streamin, str):
        streamin = codecs.open(streamin, 'r', encoding='utf8')
    log.debug('reading from %s', streamin.name)

    # configure output as a writable stream
    streamout = args.output
    if isinstance(streamout, str):
        streamout = codecs.open(streamout, 'w', 'utf8')
    log.debug('writing to %s', streamout.name)

    # configure the separator for phonemes, syllables and words.
    sep = separator.Separator(phone=args.phone_separator,
                              syllable=args.syllable_separator,
                              word=args.word_separator)
    log.debug('separator is %s', sep)

    # load the input text (python2 optionnally needs an extra decode)
    text = streamin.read()
    try:
        text = text.decode('utf8')
    except (AttributeError, UnicodeEncodeError):
        pass

    # phonemize the input text
    out = phonemize.phonemize(text,
                              language=args.language,
                              backend=args.backend,
                              separator=sep,
                              strip=args.strip,
                              preserve_punctuation=args.preserve_punctuation,
                              punctuation_marks=args.punctuation_marks,
                              with_stress=args.with_stress,
                              use_sampa=args.sampa,
                              language_switch=args.language_switch,
                              njobs=args.njobs,
                              logger=log)

    if len(out):
        streamout.write(out + '\n')

コード例 #13

0

ファイルを表示

ファイル: test_punctuation.py プロジェクト: bootphon/phonemizer

# pylint: disable=missing-docstring

import pytest

from phonemizer.backend import EspeakBackend, FestivalBackend, SegmentsBackend
from phonemizer.punctuation import Punctuation
from phonemizer.phonemize import phonemize

# True if we are using espeak>=1.50
ESPEAK_150 = (EspeakBackend.version() >= (1, 50))

# True if we are using espeak>=1.49.3
ESPEAK_143 = (EspeakBackend.version() >= (1, 49, 3))

# True if we are using festival>=2.5
FESTIVAL_25 = (FestivalBackend.version() >= (2, 5))


@pytest.mark.parametrize('inp, out', [('a, b,c.', 'a b c'),
                                      ('abc de', 'abc de'),
                                      ('!d.d. dd??  d!', 'd d dd d')])
def test_remove(inp, out):
    assert Punctuation().remove(inp) == out


@pytest.mark.parametrize(
    'inp', [['.a.b.c.'], ['a, a?', 'b, b'], ['a, a?', 'b, b', '!'],
            ['a, a?', '!?', 'b, b'], ['!?', 'a, a?', 'b, b'], ['a, a, a'],
            ['a, a?', 'aaa bb', '.bb, b', 'c', '!d.d. dd??  d!'],
            ['Truly replied, "Yes".'], ['hi; ho,"'], ["!?"], ["!'"]])
def test_preserve(inp):

コード例 #14

0

ファイルを表示

ファイル: main.py プロジェクト: welgazil/phonemizer

def main():
    """Phonemize a text from command-line arguments"""
    args = parse_args()

    # setup a custom path to espeak and festival if required (this must be done
    # before generating the version message)
    if args.espeak_path:
        EspeakBackend.set_espeak_path(args.espeak_path)
    if args.festival_path:
        FestivalBackend.set_festival_path(args.festival_path)

    # display version information and exit
    if args.version:
        print(version.version())
        return

    # list supported languages and exit
    if args.list_languages:
        backends = (['festival', 'segments', 'espeak', 'espeak-mbrola']
                    if not args.backend else [args.backend])
        for backend in backends:
            print(f'supported languages for {backend} are:\n' +
                  '\n'.join(f'\t{k}\t->\t{v}' for k, v in sorted(
                      BACKENDS_MAP[backend].supported_languages().items())))
        return

    # set default backend as espeak if not specified
    args.backend = args.backend or 'espeak'

    # configure logging according to --verbose/--quiet options
    verbosity = 'normal'
    if args.verbose:
        verbosity = 'verbose'
    elif args.quiet:
        verbosity = 'quiet'
    log = logger.get_logger(verbosity=verbosity)

    # configure input as a readable stream
    streamin = args.input
    if isinstance(streamin, str):
        streamin = codecs.open(streamin, 'r', encoding='utf8')
    log.debug('reading from %s', streamin.name)

    # configure output as a writable stream
    streamout = args.output
    if isinstance(streamout, str):
        streamout = codecs.open(streamout, 'w', 'utf8')
    log.debug('writing to %s', streamout.name)

    # configure the separator for phonemes, syllables and words.
    if args.backend == 'espeak-mbrola':
        log.debug('using espeak-mbrola backend: ignoring word separator')
        sep = separator.Separator(phone=args.phone_separator,
                                  syllable=None,
                                  word=None)
    else:
        sep = separator.Separator(phone=args.phone_separator,
                                  syllable=args.syllable_separator,
                                  word=args.word_separator)
    log.debug('separator is %s', sep)

    text = [line.strip() for line in streamin]

    # phonemize the input text
    out = phonemize(text,
                    language=args.language,
                    backend=args.backend,
                    separator=sep,
                    strip=args.strip,
                    preserve_punctuation=args.preserve_punctuation,
                    punctuation_marks=args.punctuation_marks,
                    with_stress=args.with_stress,
                    language_switch=args.language_switch,
                    njobs=args.njobs,
                    logger=log)

    if out:
        streamout.write('\n'.join(out) + '\n')

コード例 #15

0

ファイルを表示

import distutils.spawn
import os
import pytest
from phonemizer import separator
from phonemizer.backend import FestivalBackend


def _test(text, separator=separator.Separator(
        word=' ', syllable='|', phone='-')):
    backend = FestivalBackend('en-us')
    return backend._phonemize_aux(text, separator, True)


@pytest.mark.skipif(
    '2.1' in FestivalBackend.version(),
    reason='festival-2.1 gives different results than further versions '
    'for syllable boundaries')
def test_hello():
    assert _test('hello world') == ['hh-ax|l-ow w-er-l-d']
    assert _test('hello\nworld') == ['hh-ax|l-ow', 'w-er-l-d']
    assert _test('hello\nworld\n') == ['hh-ax|l-ow', 'w-er-l-d']


@pytest.mark.parametrize('text', ['', ' ', '  ', '(', '()', '"', "'"])
def test_bad_input(text):
    assert _test(text) == []


def test_quote():
    assert _test("here a 'quote") == ['hh-ih-r ax k-w-ow-t']

コード例 #16

0

ファイルを表示

def _test(text, separator=separator.Separator(
        word=' ', syllable='|', phone='-')):
    backend = FestivalBackend('en-us')
    return backend._phonemize_aux(text, separator, True)

コード例 #17

0

ファイルを表示

def _test(text, separator=Separator(word=' ', syllable='|', phone='-')):
    backend = FestivalBackend('en-us')
    # pylint: disable=protected-access
    return backend._phonemize_aux(text, 0, separator, True)

コード例 #18

0

ファイルを表示

    def recognize(self, wav=None):

        if not self.eng.find("fest") == -1:
            # ======= festival english us only =====================================
            from phonemizer.backend import FestivalBackend
            out1 = FestivalBackend(
                'en-us', preserve_punctuation=False).phonemize(self.text,
                                                               strip=True)
        elif not self.eng.find("esp") == -1:
            # ======= espeak =======================================================
            from phonemizer.backend import EspeakBackend
            if not self.lan.find("en") == -1:
                backend = EspeakBackend('en-us')
            elif not self.lan.find("fr") == -1:
                backend = EspeakBackend('fr-fr')
            elif not self.lan.find("de") == -1 or not self.lan.find(
                    "ger") == -1:
                backend = EspeakBackend('de-de')
            elif not self.lan.find("ita") == -1:
                backend = EspeakBackend('it-it')
            elif not self.lan.find("esp") == -1 or not self.lan.find(
                    "spa") == -1:
                backend = EspeakBackend('es-es')
            else:
                print("Invalid Language specified")
                exit(1)
            sep = separator.Separator(word=';eword ', syllable=None, phone=' ')
            out1 = backend.phonemize(self.text, sep, False)
        elif not self.eng.find("mbr") == -1:
            # ====== mbrola ========================================================
            from phonemizer.backend import EspeakBackend, EspeakMbrolaBackend
            if EspeakMbrolaBackend.is_available():
                if not self.lan.find("en") == -1:
                    EspeakMbrolaBackend('mb-en1')
                elif not self.lan.find("fr") == -1:
                    EspeakMbrolaBackend('mb-fr2')
                elif not self.lan.find("de") == -1 or not self.lan.find(
                        "ger") == -1:
                    EspeakMbrolaBackend('mb-de1')
                elif not self.lan.find("ita") == -1:
                    EspeakMbrolaBackend('mb-es1')
                elif not self.lan.find("esp") == -1 or not self.lan.find(
                        "spa") == -1:
                    EspeakMbrolaBackend('mb-it3')
                else:
                    print("Invalid Language specified")
                    exit(1)
                sep = separator.Separator(word=';eword ',
                                          syllable=None,
                                          phone=' ')
                out1 = backend.phonemize(self.text, sep, False)
        elif not self.eng.find("seg") == -1:
            # ===== segment ========================================================
            from phonemizer.backend import SegmentsBackend
            if not self.lan.find("ja") == -1:
                backend = SegmentsBackend('japanese')
            else:
                print("Invalid Language specified")
                exit(1)
            sep = separator.Separator(word=';eword ', syllable=None, phone=' ')
            out1 = backend.phonemize(self.text, sep, False)
        else:
            print("Invalid Language specified")
            exit(1)
        print(out1)  # return the result string to the speach engine

コード例 #19

0

ファイルを表示

ファイル: main.py プロジェクト: bootphon/phonemizer

def parse_args():
    """Argument parser for the phonemization script"""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''Multilingual text to phonemes converter

The 'phonemize' program allows simple phonemization of words and texts
in many language using three backends: espeak, festival and segments.

- espeak is a text-to-speech software supporting multiple languages
  and IPA (Internatinal Phonetic Alphabet) output. See
  http://espeak.sourceforge.net or
  https://github.com/espeak-ng/espeak-ng

- festival is also a text-to-speech software. Currently only American
  English is supported and festival uses a custom phoneset
  (http://www.festvox.org/bsv/c4711.html), but festival is the only
  backend supporting tokenization at the syllable
  level. See http://www.cstr.ed.ac.uk/projects/festival

- segments is a Unicode tokenizer that build a phonemization from a
  grapheme to phoneme mapping provided as a file by the user. See
  https://github.com/cldf/segments.

See the '--language' option below for details on the languages
supported by each backend.

''',
        epilog='''
   Languages supported by the festival backend are:
   {festival}

   Languages supported by the segments backend are:
   {segments}
   Instead of a language you can also provide a file specifying a
   grapheme to phoneme mapping (see the files above for exemples).

   Languages supported by the espeak backend are:
   {espeak}


Exemples:

* Phonemize a US English text with espeak

   $ echo 'hello world' | phonemize -l en-us -b espeak
   həloʊ wɜːld

* Phonemize a US English text with festival

   $ echo 'hello world' | phonemize -l en-us -b festival
   hhaxlow werld

* Phonemize a Japanese text with segments

  $ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
  konnitʃiwa t͡sekai

* Add a separator between phones

  $ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
  hh-ax-l-ow w-er-l-d

* Phonemize some French text file using espeak

  $ phonemize -l fr-fr -b espeak text.txt -o phones.txt
        '''.format(
            festival='\n'.join(
                '\t{}\t->\t{}'.format(k, v) for k, v in
                sorted(FestivalBackend.supported_languages().items())),
            segments='\n'.join(
                '\t{}\t->\t{}'.format(k, v) for k, v in
                sorted(SegmentsBackend.supported_languages().items())),
            espeak='\n'.join(
                '\t{}\t->\t{}'.format(k, v) for k, v in
                sorted(EspeakBackend.supported_languages().items()))))

    # general arguments
    parser.add_argument(
        '--version', action='store_true',
        help='show version information and exit')

    parser.add_argument(
        '-v', '--verbose', action='store_true',
        help='write some log messages to stderr')

    parser.add_argument(
        '-j', '--njobs', type=int, metavar='<int>', default=1,
        help='number of parallel jobs, default is %(default)s')

    # input/output arguments
    group = parser.add_argument_group('input/output')
    group.add_argument(
        'input', default=sys.stdin, nargs='?', metavar='<file>',
        help='input text file to phonemize, if not specified read from stdin')

    group.add_argument(
        '-o', '--output', default=sys.stdout, metavar='<file>',
        help='output text file to write, if not specified write to stdout')

    group = parser.add_argument_group('separators')
    group.add_argument(
        '-p', '--phone-separator', metavar='<str>',
        default=separator.default_separator.phone,
        help='phone separator, default is "%(default)s"')

    group.add_argument(
        '-w', '--word-separator', metavar='<str>',
        default=separator.default_separator.word,
        help='word separator, default is "%(default)s"')

    group.add_argument(
        '-s', '--syllable-separator', metavar='<str>',
        default=separator.default_separator.syllable,
        help='''syllable separator is available only for the festival backend,
        this option has no effect if espeak or segments is used.
        Default is "%(default)s"''')

    group.add_argument(
        '--strip', action='store_true',
        help='removes the end separators in phonemized tokens')

    group = parser.add_argument_group('language')

    group.add_argument(
        '-b', '--backend', metavar='<str>', default='espeak',
        choices=['espeak', 'festival', 'segments'],
        help="""the phonemization backend, must be 'espeak', 'festival' or
        'segments'. Default is %(default)s""")

    group.add_argument(
        '-l', '--language', metavar='<str|file>', default='en-us',
        help='''the language code of the input text, see below for a list of
        supported languages. According to the language code you
        specify, the appropriate backend (segments, espeak or
        festival) will be called in background. Default is
        %(default)s''')

    return parser.parse_args()

コード例 #20

0

ファイルを表示

ファイル: main.py プロジェクト: songkey/phonemizer

def parse_args():
    """Argument parser for the phonemization script"""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawDescriptionHelpFormatter,
        description='''Multilingual text to phonemes converter

The 'phonemize' program allows simple phonemization of words and texts
in many language using three backends: espeak, festival and segments.

- espeak is a text-to-speech software supporting multiple languages
  and IPA (Internatinal Phonetic Alphabet) output. See
  http://espeak.sourceforge.net or
  https://github.com/espeak-ng/espeak-ng

- festival is also a text-to-speech software. Currently only American
  English is supported and festival uses a custom phoneset
  (http://www.festvox.org/bsv/c4711.html), but festival is the only
  backend supporting tokenization at the syllable
  level. See http://www.cstr.ed.ac.uk/projects/festival

- segments is a Unicode tokenizer that build a phonemization from a
  grapheme to phoneme mapping provided as a file by the user. See
  https://github.com/cldf/segments.

See the '--language' option below for details on the languages
supported by each backend.

''',
        epilog='''
   Languages supported by the festival backend are:
   {festival}

   Languages supported by the segments backend are:
   {segments}
   Instead of a language you can also provide a file specifying a
   grapheme to phoneme mapping (see the files above for exemples).

   Languages supported by the espeak backend are:
   {espeak}


Exemples:

* Phonemize a US English text with espeak

   $ echo 'hello world' | phonemize -l en-us -b espeak
   həloʊ wɜːld

* Phonemize a US English text with festival

   $ echo 'hello world' | phonemize -l en-us -b festival
   hhaxlow werld

* Phonemize a Japanese text with segments

  $ echo 'konnichiwa tsekai' | phonemize -l japanese -b segments
  konnitʃiwa t͡sekai

* Add a separator between phones

  $ echo 'hello world' | phonemize -l en-us -b festival -p '-' --strip
  hh-ax-l-ow w-er-l-d

* Phonemize some French text file using espeak

  $ phonemize -l fr-fr -b espeak text.txt -o phones.txt
        '''.format(festival='\n'.join(
            '\t{}\t->\t{}'.format(k, v)
            for k, v in sorted(FestivalBackend.supported_languages().items())),
                   segments='\n'.join(
                       '\t{}\t->\t{}'.format(k, v) for k, v in sorted(
                           SegmentsBackend.supported_languages().items())),
                   espeak='\n'.join(
                       '\t{}\t->\t{}'.format(k, v) for k, v in sorted(
                           EspeakBackend.supported_languages().items()))))

    # general arguments
    parser.add_argument('--version',
                        action='store_true',
                        help='show version information and exit.')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('-v',
                       '--verbose',
                       action='store_true',
                       help='write all log messages to stderr '
                       '(displays only warnings by default).')
    group.add_argument('-q',
                       '--quiet',
                       action='store_true',
                       help='do not display any log message, even warnings.')

    parser.add_argument(
        '-j',
        '--njobs',
        type=int,
        metavar='<int>',
        default=1,
        help='number of parallel jobs, default is %(default)s.')

    # input/output arguments
    group = parser.add_argument_group('input/output')
    group.add_argument(
        'input',
        default=sys.stdin,
        nargs='?',
        metavar='<file>',
        help='input text file to phonemize, if not specified read from stdin.')

    group.add_argument(
        '-o',
        '--output',
        default=sys.stdout,
        metavar='<file>',
        help='output text file to write, if not specified write to stdout.')

    group = parser.add_argument_group('separators')
    group.add_argument('-p',
                       '--phone-separator',
                       metavar='<str>',
                       default=separator.default_separator.phone,
                       help='phone separator, default is "%(default)s".')

    group.add_argument('-w',
                       '--word-separator',
                       metavar='<str>',
                       default=separator.default_separator.word,
                       help='word separator, default is "%(default)s".')

    group.add_argument(
        '-s',
        '--syllable-separator',
        metavar='<str>',
        default=separator.default_separator.syllable,
        help='''syllable separator, only valid for festival backend,
        this option has no effect if espeak or segments is used.
        Default is "%(default)s".''')

    group.add_argument('--strip',
                       action='store_true',
                       help='removes the end separators in phonemized tokens.')

    group = parser.add_argument_group('backends')
    group.add_argument(
        '-b',
        '--backend',
        metavar='<str>',
        default='espeak',
        choices=['espeak', 'festival', 'segments'],
        help="""the phonemization backend, must be 'espeak', 'festival' or
        'segments'. Default is %(default)s.""")

    group = parser.add_argument_group('specific to espeak backend')
    group.add_argument(
        '--with-stress',
        action='store_true',
        help='''when the option is set, the stresses on phonemes are present
        (stresses characters are ˈ'ˌ). By default stresses are removed.''')
    group.add_argument(
        '--sampa',
        action='store_true',
        help='''only valid for espeak-ng and NOT supported for espeak, use the
        "sampa" (Speech Assessment Methods Phonetic Alphabet) alphabet instead
        of "ipa" (International Phonetic Alphabet).''')
    group.add_argument(
        '--language-switch',
        default='keep-flags',
        choices=['keep-flags', 'remove-flags', 'remove-utterance'],
        help="""espeak can pronounce some words in another language (typically
        English) when phonemizing a text. This option setups the policy to use
        when such a language switch occurs. Three values are available:
        'keep-flags' (the default), 'remove-flags' or 'remove-utterance'. The
        'keep-flags' policy keeps the language switching flags, for example
        (en) or (jp), in the output. The 'remove-flags' policy removes them and
        the 'remove-utterance' policy removes the whole line of text including
        a language switch.""")

    group = parser.add_argument_group('language')
    group.add_argument(
        '-l',
        '--language',
        metavar='<str|file>',
        default='en-us',
        help='''the language code of the input text, see below for a list of
        supported languages. According to the language code you
        specify, the appropriate backend (segments, espeak or
        festival) will be called in background. Default is
        %(default)s.''')

    return parser.parse_args()

コード例 #21

0

ファイルを表示

ファイル: test_punctuation.py プロジェクト: welgazil/phonemizer

import pytest

from phonemizer.backend import EspeakBackend, FestivalBackend, SegmentsBackend
from phonemizer.punctuation import Punctuation
from phonemizer.phonemize import phonemize


# True if we are using espeak>=1.49.3
ESPEAK_143 = (EspeakBackend.version(as_tuple=True) >= (1, 49, 3))

# True if we are using espeak>=1.50
ESPEAK_150 = (EspeakBackend.version(as_tuple=True) >= (1, 50))

# True if we are using festival>=2.5
FESTIVAL_25 = (FestivalBackend.version(as_tuple=True) >= (2, 5))


@pytest.mark.parametrize(
    'inp, out', [
        ('a, b,c.', 'a b c'),
        ('abc de', 'abc de'),
        ('!d.d. dd??  d!', 'd d dd d')])
def test_remove(inp, out):
    assert Punctuation().remove(inp) == out


@pytest.mark.parametrize(
    'inp', [
        ['.a.b.c.'],
        ['a, a?', 'b, b'],