Ejemplo n.º 1
0
def test_segments(njobs):
    # one two three four five in Maya Yucatec
    text = ['untuʼuleʼ kaʼapʼeʼel', 'oʼoxpʼeʼel', 'kantuʼuloʼon chincho']

    out = phonemize(text,
                    language='yucatec',
                    backend='segments',
                    strip=False,
                    njobs=njobs)
    assert out == [
        'untṵːlḛ ka̰ːpʼḛːl ', 'o̰ːʃpʼḛːl ', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo '
    ]
    out = phonemize(' '.join(text),
                    language='yucatec',
                    backend='segments',
                    strip=False,
                    njobs=njobs)
    assert out == ' '.join(
        ['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo '])

    out = phonemize(os.linesep.join(text),
                    language='yucatec',
                    backend='segments',
                    strip=True,
                    njobs=njobs)
    assert out == os.linesep.join(
        ['untṵːlḛ ka̰ːpʼḛːl', 'o̰ːʃpʼḛːl', 'kantṵːlo̰ːn t̠͡ʃint̠͡ʃo'])
Ejemplo n.º 2
0
def text2phone(text, language):
    '''Convert graphemes to phonemes. For most of the languages, it calls
    the phonemizer python library that calls espeak/espeak-ng. For chinese
    mandarin, it calls pypinyin + custom function for phonemizing
        Parameters:
                text (str): text to phonemize
                language (str): language of the text
        Returns:
                ph (str): phonemes as a string seperated by "|"
                        ph = "ɪ|g|ˈ|z|æ|m|p|ə|l"
    '''

    # TO REVIEW : How to have a good implementation for this?
    if language == "zh-CN":
        ph = chinese_text_to_phonemes(text)
        return ph

    seperator = phonemizer.separator.Separator(' |', '', '|')
    #try:
    punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
    if version.parse(phonemizer.__version__) < version.parse('2.1'):
        ph = phonemize(text,
                       separator=seperator,
                       strip=False,
                       njobs=1,
                       backend='espeak',
                       language=language)
        ph = ph[:-1].strip()  # skip the last empty character
        # phonemizer does not tackle punctuations. Here we do.
        # Replace \n with matching punctuations.
        if punctuations:
            # if text ends with a punctuation.
            if text[-1] == punctuations[-1]:
                for punct in punctuations[:-1]:
                    ph = ph.replace('| |\n', '|' + punct + '| |', 1)
                    ph = ph + punctuations[-1]
            else:
                for punct in punctuations:
                    ph = ph.replace('| |\n', '|' + punct + '| |', 1)
    elif version.parse(phonemizer.__version__) >= version.parse('2.1'):
        ph = phonemize(text,
                       separator=seperator,
                       strip=False,
                       njobs=1,
                       backend='espeak',
                       language=language,
                       preserve_punctuation=True,
                       language_switch='remove-flags')
        # this is a simple fix for phonemizer.
        # https://github.com/bootphon/phonemizer/issues/32
        if punctuations:
            for punctuation in punctuations:
                ph = ph.replace(f"| |{punctuation} ",
                                f"|{punctuation}| |").replace(
                                    f"| |{punctuation}", f"|{punctuation}| |")
            ph = ph[:-3]
    else:
        raise RuntimeError(" [!] Use 'phonemizer' version 2.1 or older.")

    return ph
Ejemplo n.º 3
0
def text2phone(text, language):
    '''
    Convert graphemes to phonemes.
    '''
    seperator = phonemizer.separator.Separator(' |', '', '|')
    #try:
    punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
    if version.parse(phonemizer.__version__) < version.parse('2.1'):
        ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language)
        ph = ph[:-1].strip() # skip the last empty character
        # phonemizer does not tackle punctuations. Here we do.
        # Replace \n with matching punctuations.
        if punctuations:
            # if text ends with a punctuation.
            if text[-1] == punctuations[-1]:
                for punct in punctuations[:-1]:
                    ph = ph.replace('| |\n', '|'+punct+'| |', 1)
                    ph = ph + punctuations[-1]
            else:
                for punct in punctuations:
                    ph = ph.replace('| |\n', '|'+punct+'| |', 1)
    elif version.parse(phonemizer.__version__) >= version.parse('2.1'):
        ph = phonemize(text, separator=seperator, strip=False, njobs=1, backend='espeak', language=language, preserve_punctuation=True)
        # this is a simple fix for phonemizer.
        # https://github.com/bootphon/phonemizer/issues/32
        if punctuations:
            for punctuation in punctuations:
                ph = ph.replace(f"| |{punctuation} ", f"|{punctuation}| |").replace(f"| |{punctuation}", f"|{punctuation}| |")
            ph = ph[:-3]
    else:
        raise RuntimeError(" [!] Use 'phonemizer' version 2.1 or older.")

    return ph
Ejemplo n.º 4
0
def test_issue55(backend, marks, text, expected):
    if marks == 'default':
        marks = Punctuation.default_marks()
    language = 'cree' if backend == 'segments' else 'en-us'

    try:
        with pytest.raises(expected):
            phonemize(text,
                      language=language,
                      backend=backend,
                      preserve_punctuation=True,
                      punctuation_marks=marks)
    except TypeError:
        try:
            assert expected == phonemize(text,
                                         language=language,
                                         backend=backend,
                                         preserve_punctuation=True,
                                         punctuation_marks=marks)
        except RuntimeError:
            if backend == 'festival':
                # TODO on some installations festival fails to phonemize "?".
                # It ends with a segmentation fault. This seems to only appear
                # with festival-2.5 (but is working on travis and docker image)
                pass
Ejemplo n.º 5
0
def test_text_type():
    t1 = ['one two', 'three', 'four five']
    t2 = '\n'.join(t1)

    p1 = phonemize(t1, language='en-us', backend='espeak', strip=True)
    p2 = phonemize(t2, language='en-us', backend='espeak', strip=True)

    assert isinstance(p1, list)
    assert isinstance(p2, str)
    assert '\n'.join(p1) == p2
Ejemplo n.º 6
0
def test_festival_bad():
    # cannot use options valid for espeak only
    text = ['one two', 'three', 'four five']

    with pytest.raises(RuntimeError):
        phonemize(text, language='en-us', backend='festival', with_stress=True)

    with pytest.raises(RuntimeError):
        phonemize(text,
                  language='en-us',
                  backend='festival',
                  language_switch='remove-flags')
Ejemplo n.º 7
0
def test_espeak_mbrola(njobs):
    text = ['un deux', 'trois', 'quatre cinq']

    out = phonemize(
        text, language='mb-fr1', backend='espeak-mbrola',
        strip=True, njobs=njobs)
    assert out == ['9~d2', 'tRwa', 'katRse~k']

    out = phonemize(
        text, language='mb-fr1', backend='espeak-mbrola',
        strip=False, njobs=njobs)
    assert out == ['9~d2', 'tRwa', 'katRse~k']
Ejemplo n.º 8
0
def text2phone(text, language):
    '''
    Convert graphemes to phonemes.
    '''
    seperator = phonemizer.separator.Separator(' |', '', '|')
    #try:
    punctuations = re.findall(PHONEME_PUNCTUATION_PATTERN, text)
    ph = phonemize(text,
                   separator=seperator,
                   strip=False,
                   njobs=1,
                   backend='espeak',
                   language=language)
    ph = ph[:-1].strip()  # skip the last empty character
    # Replace \n with matching punctuations.
    if punctuations:
        # if text ends with a punctuation.
        if text[-1] == punctuations[-1]:
            for punct in punctuations[:-1]:
                ph = ph.replace('| |\n', '|' + punct + '| |', 1)
            try:
                ph = ph + punctuations[-1]
            except:
                print(text)
        else:
            for punct in punctuations:
                ph = ph.replace('| |\n', '|' + punct + '| |', 1)
    return ph
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--in_file", type=str)
    parser.add_argument("--out_file", type=str)

    args = parser.parse_args()

    fout = open(args.out_file, 'w')

    with open(args.in_file, 'r', encoding="utf-8") as fin:
        for line in fin:
            lines = line.split('\t')

            try:
                out = phonemize(lines,
                                language='en-us',
                                backend='espeak',
                                strip=True,
                                njobs=njobs)
                if len(out) == 2:
                    fout.write(lines[0] + '\t' + out[0] + '\t' + lines[1] +
                               '\t' + out[1] + '\n')
            except AttributeError:
                continue

    fout.close()
Ejemplo n.º 10
0
    def __call__(
                self, 
                text: Union[str, list], 
                stress=True, 
                n_jobs=1, 
                language='en-us',
                allowed_phonemes=None)-> Union[str, list]:

        # call function with instance params or arguments (language, njobs, with_stress)
        language, n_jobs, stress = self.params_or_args([language,n_jobs,stress])
        


        # compute raw phonemes without filtration
        raw_phonemes = phonemize(
                            text,
                            language=language,
                            backend='espeak',
                            strip=True,
                            preserve_punctuation=True,
                            with_stress=stress,
                            njobs=n_jobs,
                            language_switch='remove-flags')
        
        
        # return filtrated phonemes
        return self.filter_characters(raw_phonemes, allowed_phonemes)
Ejemplo n.º 11
0
def test_lang_switch():
    text = ['bonjour apple', 'bonjour toi']
    out = phonemize(text,
                    language='fr-fr',
                    backend='espeak',
                    prepend_text=True,
                    language_switch='remove-utterance')
    assert out == [('bonjour apple', ''), ('bonjour toi', 'bɔ̃ʒuʁ twa ')]
Ejemplo n.º 12
0
def text2phone(text, char2code):
    seperator = separator.Separator('', '', ' ')
    ph = phonemize.phonemize(text, separator=seperator)
    ph = ph.split(' ')
    ph.remove('')

    result = [char2code[p] for p in ph]
    return torch.LongTensor(result)
Ejemplo n.º 13
0
def main():
    """Phonemize a text from command-line arguments"""
    args = parse_args()

    if args.version:
        print(version())
        return

    # configure logging according to --verbose option. If verbose,
    # init a logger to output on stderr. Else init a logger going to
    # the void.
    logger = logging.getLogger()
    logger.handlers = []
    logger.setLevel(logging.DEBUG)
    if args.verbose:
        handler = logging.StreamHandler(sys.stderr)
        handler.setFormatter(logging.Formatter('%(message)s'))
    else:
        handler = logging.NullHandler()
    logger.addHandler(handler)

    # configure input as a readable stream
    streamin = args.input
    if isinstance(streamin, str):
        streamin = codecs.open(streamin, 'r', encoding='utf8')
    logger.debug('reading from %s', streamin.name)

    # configure output as a writable stream
    streamout = args.output
    if isinstance(streamout, str):
        streamout = codecs.open(streamout, 'w', 'utf8')
    logger.debug('writing to %s', streamout.name)

    # configure the separator for phonemes, syllables and words.
    sep = separator.Separator(phone=args.phone_separator,
                              syllable=args.syllable_separator,
                              word=args.word_separator)
    logger.debug('separator is %s', sep)

    # load the input text (python2 optionnally needs an extra decode)
    text = streamin.read()
    try:
        text = text.decode('utf8')
    except (AttributeError, UnicodeEncodeError):
        pass

    # phonemize the input text
    out = phonemize.phonemize(text,
                              language=args.language,
                              backend=args.backend,
                              separator=sep,
                              strip=args.strip,
                              use_sampa=args.sampa,
                              njobs=args.njobs,
                              logger=logger)

    if len(out):
        streamout.write(out + '\n')
Ejemplo n.º 14
0
def text2phone(text, char2code):
    seperator = separator.Separator('', '', ' ')
    ph = phonemize(text, backend="festival", separator=seperator)
    ph = ph.split(' ')
    ph.remove('')

    print('text %s ~ ph %s' % (text, ph))

    result = [char2code[p] for p in ph]
    return torch.LongTensor(result)
Ejemplo n.º 15
0
 def __call__(self, text, strip=True, preserve_punctuation=True, with_stress=False, njobs=4):
     phonemes = phonemize(text,
                          language=self.language,
                          backend='espeak',
                          strip=strip,
                          preserve_punctuation=preserve_punctuation,
                          with_stress=with_stress,
                          njobs=njobs,
                          language_switch='remove-flags')
     return phonemes
Ejemplo n.º 16
0
def test_preserve_2(text, output):
    marks = ".!;:,?"
    p = Punctuation(marks=marks)
    t, m = p.preserve(text)
    assert text == p.restore(t, m)

    o = phonemize(
        text, backend="espeak",
        preserve_punctuation=True, punctuation_marks=marks)
    assert o == output
Ejemplo n.º 17
0
def test_bad_language():
    with pytest.raises(RuntimeError):
        phonemize('', language='fr-fr', backend='festival')

    with pytest.raises(RuntimeError):
        phonemize('', language='ffr', backend='espeak')

    with pytest.raises(RuntimeError):
        phonemize('', language='/path/to/nonexisting/file', backend='segments')

    with pytest.raises(RuntimeError):
        phonemize('', language='creep', backend='segments')
Ejemplo n.º 18
0
def test_preserve_2(text, expected):
    marks = ".!;:,?"
    punct = Punctuation(marks=marks)
    assert text == punct.restore(*punct.preserve(text))

    output = phonemize(text,
                       backend="espeak",
                       preserve_punctuation=True,
                       punctuation_marks=marks)
    assert output == expected
Ejemplo n.º 19
0
def test_text_type():
    text1 = ['one two', 'three', 'four five']
    text2 = os.linesep.join(text1)

    phn1 = phonemize(text1, language='en-us', backend='espeak', strip=True)
    phn2 = phonemize(text2, language='en-us', backend='espeak', strip=True)
    out3 = phonemize(text2,
                     language='en-us',
                     backend='espeak',
                     strip=True,
                     prepend_text=True)
    text3 = [o[0] for o in out3]
    phn3 = [o[1] for o in out3]

    assert isinstance(phn1, list)
    assert isinstance(phn2, str)
    assert os.linesep.join(phn1) == phn2
    assert os.linesep.join(phn3) == phn2
    assert text3 == text1
Ejemplo n.º 20
0
def main():
    """Phonemize a text from command-line arguments"""
    args = parse_args()

    if args.version:
        print(version.version())
        return

    # configure logging according to --verbose/--quiet options
    verbosity = 'normal'
    if args.verbose:
        verbosity = 'verbose'
    elif args.quiet:
        verbosity = 'quiet'
    log = logger.get_logger(verbosity=verbosity)

    # configure input as a readable stream
    streamin = args.input
    if isinstance(streamin, str):
        streamin = codecs.open(streamin, 'r', encoding='utf8')
    log.debug('reading from %s', streamin.name)

    # configure output as a writable stream
    streamout = args.output
    if isinstance(streamout, str):
        streamout = codecs.open(streamout, 'w', 'utf8')
    log.debug('writing to %s', streamout.name)

    # configure the separator for phonemes, syllables and words.
    sep = separator.Separator(phone=args.phone_separator,
                              syllable=args.syllable_separator,
                              word=args.word_separator)
    log.debug('separator is %s', sep)

    # load the input text (python2 optionnally needs an extra decode)
    text = streamin.read()
    try:
        text = text.decode('utf8')
    except (AttributeError, UnicodeEncodeError):
        pass

    # phonemize the input text
    out = phonemize.phonemize(text,
                              language=args.language,
                              backend=args.backend,
                              separator=sep,
                              strip=args.strip,
                              with_stress=args.with_stress,
                              use_sampa=args.sampa,
                              language_switch=args.language_switch,
                              njobs=args.njobs,
                              logger=log)

    if len(out):
        streamout.write(out + '\n')
Ejemplo n.º 21
0
def test_festival(njobs):
    text = ['one two', 'three', 'four five']

    with pytest.raises(RuntimeError):
        phonemize(text,
                  language='en-us',
                  backend='festival',
                  use_sampa=True,
                  strip=True,
                  njobs=njobs)

    out = phonemize(text,
                    language='en-us',
                    backend='festival',
                    strip=True,
                    njobs=njobs)
    assert out == ['wahn tuw', 'thriy', 'faor fayv']

    out = phonemize(text,
                    language='en-us',
                    backend='festival',
                    strip=False,
                    njobs=njobs)
    assert out == ['wahn tuw ', 'thriy ', 'faor fayv ']

    out = phonemize(' '.join(text),
                    language='en-us',
                    backend='festival',
                    strip=True,
                    njobs=njobs)
    assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv'])

    out = phonemize(' '.join(text),
                    language='en-us',
                    backend='festival',
                    strip=False,
                    njobs=njobs)
    assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv '])

    out = phonemize('\n'.join(text),
                    language='en-us',
                    backend='festival',
                    strip=True,
                    njobs=njobs)
    assert out == '\n'.join(['wahn tuw', 'thriy', 'faor fayv'])

    out = phonemize('\n'.join(text),
                    language='en-us',
                    backend='festival',
                    strip=False,
                    njobs=njobs)
    assert out == '\n'.join(['wahn tuw ', 'thriy ', 'faor fayv '])
Ejemplo n.º 22
0
def main():
    """Phonemize a text from command-line arguments"""
    args = parse_args()

    if args.version:
        print(version())
        return

    # configure logging according to --verbose option. If verbose,
    # init a logger to output on stderr. Else init a logger going to
    # the void.
    logger = logging.getLogger()
    logger.handlers = []
    logger.setLevel(logging.DEBUG)
    if args.verbose:
        handler = logging.StreamHandler(sys.stderr)
        handler.setFormatter(logging.Formatter('%(message)s'))
    else:
        handler = logging.NullHandler()
    logger.addHandler(handler)

    # configure input as a readable stream
    streamin = args.input
    if isinstance(streamin, str):
        streamin = codecs.open(streamin, 'r', encoding='utf8')
    logger.debug('reading from %s', streamin.name)

    # configure output as a writable stream
    streamout = args.output
    if isinstance(streamout, str):
        streamout = codecs.open(streamout, 'w', 'utf8')
    logger.debug('writing to %s', streamout.name)

    # configure the separator for phonemes, syllables and words.
    sep = separator.Separator(
        phone=args.phone_separator,
        syllable=args.syllable_separator,
        word=args.word_separator)
    logger.debug('separator is %s', sep)

    # load the input text (python2 optionnally needs an extra decode)
    text = streamin.read()
    try:
        text = text.decode('utf8')
    except (AttributeError, UnicodeEncodeError):
        pass

    # phonemize the input text
    out = phonemize.phonemize(
        text, language=args.language, backend=args.backend,
        separator=sep, strip=args.strip, njobs=args.njobs, logger=logger)

    if len(out):
        streamout.write(out + '\n')
Ejemplo n.º 23
0
def _phonemize(text, language):
    try:
        seperators = Separator(word=' ', phone='')
        phonemes = phonemize(text,
                             separator=seperators,
                             backend='espeak',
                             language=language)
    except RuntimeError:
        epi = epitran.Epitran(language)
        phonemes = epi.transliterate(text, normpunc=True)
    phonemes.replace('\n', ' ', 1)
    return phonemes
Ejemplo n.º 24
0
def to_phonemes(text: str, lang: str) -> str:
    phonemes = phonemize(text,
                         language=lang,
                         backend='espeak',
                         strip=True,
                         preserve_punctuation=True,
                         with_stress=False,
                         njobs=1,
                         punctuation_marks=';:,.!?¡¿—…"«»“”()',
                         language_switch='remove-flags')
    phonemes = ''.join([p for p in phonemes if p in phonemes_set])
    return phonemes
Ejemplo n.º 25
0
def test_festival(njobs):
    text = ['one two', 'three', 'four five']

    out = phonemize(
        text, language='en-us', backend='festival',
        strip=True, njobs=njobs)
    assert out == ['wahn tuw', 'thriy', 'faor fayv']

    out = phonemize(
        text, language='en-us', backend='festival',
        strip=False, njobs=njobs)
    assert out == ['wahn tuw ', 'thriy ', 'faor fayv ']

    out = phonemize(
        ' '.join(text), language='en-us', backend='festival',
        strip=True, njobs=njobs)
    assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv'])

    out = phonemize(
        ' '.join(text), language='en-us', backend='festival',
        strip=False, njobs=njobs)
    assert out == ' '.join(['wahn tuw', 'thriy', 'faor fayv '])

    out = phonemize(
        '\n'.join(text), language='en-us', backend='festival',
        strip=True, njobs=njobs)
    assert out == '\n'.join(['wahn tuw', 'thriy', 'faor fayv'])

    out = phonemize(
        '\n'.join(text), language='en-us', backend='festival',
        strip=False, njobs=njobs)
    assert out == '\n'.join(['wahn tuw ', 'thriy ', 'faor fayv '])
Ejemplo n.º 26
0
def test_espeak(njobs):
    text = ['one two', 'three', 'four five']

    out = phonemize(
        text, language='en-us', backend='espeak',
        strip=True, njobs=njobs)
    assert out == ['wʌn tuː', 'θɹiː', 'foːɹ faɪv']

    out = phonemize(
        text, language='en-us', backend='espeak',
        strip=False, njobs=njobs)
    assert out == ['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv ']

    out = phonemize(
        ' '.join(text), language='en-us', backend='espeak',
        strip=True, njobs=njobs)
    assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv'])

    out = phonemize(
        ' '.join(text), language='en-us', backend='espeak',
        strip=False, njobs=njobs)
    assert out == ' '.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv '])

    out = phonemize(
        '\n'.join(text), language='en-us', backend='espeak',
        strip=True, njobs=njobs)
    assert out == '\n'.join(['wʌn tuː', 'θɹiː', 'foːɹ faɪv'])

    out = phonemize(
        '\n'.join(text), language='en-us', backend='espeak',
        strip=False, njobs=njobs)
    assert out == '\n'.join(['wʌn tuː ', 'θɹiː ', 'foːɹ faɪv '])
Ejemplo n.º 27
0
def test_espeak_mbrola(caplog, njobs):
    text = ['un deux', 'trois', 'quatre cinq']

    out = phonemize(text,
                    language='mb-fr1',
                    backend='espeak-mbrola',
                    njobs=njobs,
                    preserve_punctuation=True)
    assert out == ['9~d2', 'tRwa', 'katRse~k']

    messages = [msg[2] for msg in caplog.record_tuples]
    assert 'espeak-mbrola backend cannot preserve punctuation' in messages
    assert 'espeak-mbrola backend cannot preserve word separation' in messages
Ejemplo n.º 28
0
def to_phonemes(text):
    text = text.replace('-', '—')
    phonemes = phonemize(text,
                         language='nl',
                         backend='espeak',
                         strip=True,
                         preserve_punctuation=True,
                         with_stress=False,
                         njobs=1,
                         punctuation_marks=';:,.!?¡¿—…"«»“”()',
                         language_switch='remove-flags')
    phonemes = phonemes.replace('—', '-')
    return phonemes
Ejemplo n.º 29
0
def test_espeak(njobs):
    text = ['one two', 'three', 'four five']

    out = phonemize(text,
                    language='en-us',
                    backend='espeak',
                    strip=True,
                    njobs=njobs)
    assert out == [u'wʌn tuː', u'θɹiː', u'foːɹ faɪv']

    if EspeakBackend.is_espeak_ng():
        out = phonemize(text,
                        language='en-us',
                        backend='espeak',
                        use_sampa=True,
                        strip=True,
                        njobs=njobs)
        assert out == [u'wVn tu:', u'Tri:', u'fo@ faIv']

    out = phonemize(text,
                    language='en-us',
                    backend='espeak',
                    strip=False,
                    njobs=njobs)
    assert out == [u'wʌn tuː ', u'θɹiː ', u'foːɹ faɪv ']

    out = phonemize(' '.join(text),
                    language='en-us',
                    backend='espeak',
                    strip=True,
                    njobs=njobs)
    assert out == ' '.join([u'wʌn tuː', u'θɹiː', u'foːɹ faɪv'])

    out = phonemize(' '.join(text),
                    language='en-us',
                    backend='espeak',
                    strip=False,
                    njobs=njobs)
    assert out == ' '.join([u'wʌn tuː', u'θɹiː', u'foːɹ faɪv '])

    out = phonemize('\n'.join(text),
                    language='en-us',
                    backend='espeak',
                    strip=True,
                    njobs=njobs)
    assert out == '\n'.join([u'wʌn tuː', u'θɹiː', u'foːɹ faɪv'])

    out = phonemize('\n'.join(text),
                    language='en-us',
                    backend='espeak',
                    strip=False,
                    njobs=njobs)
    assert out == '\n'.join([u'wʌn tuː ', u'θɹiː ', u'foːɹ faɪv '])
Ejemplo n.º 30
0
def test_espeak_langswitch(njobs, caplog):
    text = ["j'aime le football", "moi aussi", "moi aussi j'aime le football"]
    out = phonemize(text,
                    language='fr-fr',
                    backend='espeak',
                    njobs=njobs,
                    strip=True)

    assert out == [
        'ʒɛm lə (en)fʊtbɔːl(fr)', 'mwa osi', 'mwa osi ʒɛm lə (en)fʊtbɔːl(fr)'
    ]

    assert ('2 utterances containing language switches on lines 1, 3'
            in caplog.text)
Ejemplo n.º 31
0
def extract_phonemes(filename):
    from phonemizer.phonemize import phonemize
    from phonemizer.backend import FestivalBackend
    from phonemizer.separator import Separator

    with open(filename) as f:
        text = f.read()
        phones = phonemize(text,
                           language='en-us',
                           backend='festival',
                           separator=Separator(phone=' ', syllable='',
                                               word=''))

    with open(filename.replace(".txt", ".phones"), "w") as outfile:
        print(phones, file=outfile)