Ejemplo n.º 1
0
def test_mismatch(caplog, text, mode):
    phn = phonemize(text,
                    backend='espeak',
                    language='en-us',
                    words_mismatch=mode)

    if mode == 'ignore':
        assert phn == ['haʊ ɑːɹ juː ', 'aɪ hɐvbɪn bɪzi ', 'aɪ woʊntɐv taɪm ']
        messages = [msg[2] for msg in caplog.record_tuples]
        assert len(messages) == 1
        assert 'words count mismatch on 67.0% of the lines (2/3)' in messages
    elif mode == 'remove':
        assert phn == ['haʊ ɑːɹ juː ', '', '']
        messages = [msg[2] for msg in caplog.record_tuples]
        assert len(messages) == 2
        assert 'words count mismatch on 67.0% of the lines (2/3)' in messages
        assert 'removing the mismatched lines' in messages
    elif mode == 'warn':
        assert phn == ['haʊ ɑːɹ juː ', 'aɪ hɐvbɪn bɪzi ', 'aɪ woʊntɐv taɪm ']
        messages = [msg[2] for msg in caplog.record_tuples]
        assert len(messages) == 3
        assert ('words count mismatch on line 2 (expected 4 words but get 3)'
                in messages)
        assert ('words count mismatch on line 3 (expected 4 words but get 3)'
                in messages)
        assert 'words count mismatch on 67.0% of the lines (2/3)' in messages
Ejemplo n.º 2
0
def phonemize_labels(file_name, column_name, language):
    """
    Phonemize function:
    This function allow to convert text to phonemes. You need to be sure to have installed the backend beforehand (sudo apt-get install festival espeak-ng mbrola).
    
    Parameters:
    -----------
        file_name: str
            Name of the tsv file that contains the sentences we need to phonemize
    
        column_name: str
            Name of the columns that contains the sentences we want to phonemize
            
        language: str
            Language of the sentences. See https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md for reference
        
    """

    data = pd.read_csv(file_name, sep='\t')
    data['sentence_phonemes'] = phonemize(data[column_name],
                                          language=language,
                                          backend='espeak',
                                          language_switch='remove-flags',
                                          njobs=8)
    data.to_csv(file_name, sep='\t')
Ejemplo n.º 3
0
 def get_phone_string(self, text):
     utt = clean(text,
                 fix_unicode=True,
                 to_ascii=False,
                 lower=False,
                 lang=self.clean_lang)
     self.expand_abbrevations(utt)
     utt = utt.replace("_SIL_", "~")
     phones = phonemizer.phonemize(utt,
                                   language_switch='remove-flags',
                                   backend="espeak",
                                   language=self.g2p_lang,
                                   preserve_punctuation=True,
                                   strip=True,
                                   punctuation_marks=';:,.!?¡¿—…"«»“”~',
                                   with_stress=self.use_stress).replace(";", ",") \
         .replace(":", ",").replace('"', ",").replace("-", ",").replace("-", ",").replace("\n", " ") \
         .replace("\t", " ").replace("¡", "").replace("¿", "").replace(",", "~")
     phones = re.sub("~+", "~", phones)
     if not self.use_prosody:
         phones = phones.replace("ˌ", "").replace("ː", "").replace(
             "ˑ", "").replace("˘", "").replace("|", "").replace("‖", "")
     if not self.use_word_boundaries:
         phones = phones.replace(" ", "")
     return phones + "#"
Ejemplo n.º 4
0
def text2phone(text, char2code):
    seperator = phonemizer.separator.Separator('', '', ' ')
    ph = phonemizer.phonemize(text, separator=seperator)
    ph = ph.split(' ')
    ph.remove('')

    result = [char2code[p] for p in ph]
    return torch.LongTensor(result)
Ejemplo n.º 5
0
 def setup(self):
     # just a name shortcut
     self.p = lambda text: phonemize(text,
                                     language='en-us',
                                     backend='festival',
                                     strip=True,
                                     separator=separator.Separator(
                                         ' ', '|', '-'))
Ejemplo n.º 6
0
def english_cleaners2(text):
  '''Pipeline for English text, including abbreviation expansion. + punctuation + stress'''
  text = convert_to_ascii(text)
  text = lowercase(text)
  text = expand_abbreviations(text)
  phonemes = phonemize(text, language='en-us', backend='espeak', strip=True, preserve_punctuation=True, with_stress=True)
  phonemes = collapse_whitespace(phonemes)
  return phonemes
Ejemplo n.º 7
0
 def string_to_tensor(self, text, view=False):
     """
     Fixes unicode errors, expands some abbreviations,
     turns graphemes into phonemes and then vectorizes
     the sequence as IDs to be fed into an embedding
     layer
     """
     # clean unicode errors, expand abbreviations, handle emojis etc.
     utt = clean(text,
                 fix_unicode=True,
                 to_ascii=False,
                 lower=False,
                 lang=self.clean_lang)
     self.expand_abbreviations(utt)
     # if an aligner has produced silence tokens before, turn
     # them into silence markers now so that they survive the
     # phonemizer:
     utt = utt.replace("_SIL_", "~")
     # phonemize
     phones = phonemizer.phonemize(utt,
                                   language_switch='remove-flags',
                                   backend="espeak",
                                   language=self.g2p_lang,
                                   preserve_punctuation=True,
                                   strip=True,
                                   punctuation_marks=';:,.!?¡¿—…"«»“”~/',
                                   with_stress=self.use_stress).replace(";", ",").replace("/", " ") \
         .replace(":", ",").replace('"', ",").replace("-", ",").replace("-", ",").replace("\n", " ") \
         .replace("\t", " ").replace("¡", "").replace("¿", "").replace(",", "~")
     phones = re.sub("~+", "~", phones)
     if not self.use_prosody:
         # retain ~ as heuristic pause marker, even though all other symbols are removed with this option.
         # also retain . ? and ! since they can be indicators for the stop token
         phones = phones.replace("ˌ", "").replace("ː", "").replace("ˑ", "") \
             .replace("˘", "").replace("|", "").replace("‖", "")
     if not self.use_word_boundaries:
         phones = phones.replace(" ", "")
     else:
         phones = re.sub(r"\s+", " ", phones)
     if view:
         print("Phonemes: \n{}\n".format(phones))
     phones_vector = list()
     # turn into numeric vectors
     for char in phones:
         if self.allow_unknown:
             phones_vector.append(
                 self.ipa_to_vector.get(char, self.default_vector))
         else:
             try:
                 phones_vector.append(self.ipa_to_vector[char])
             except KeyError:
                 print("unknown phoneme: {}".format(char))
     if self.use_explicit_eos:
         phones_vector.append(self.ipa_to_vector["end_of_input"])
     return torch.LongTensor(phones_vector).unsqueeze(0)
Ejemplo n.º 8
0
def load_transcripts(path: str, split: str, use_percentage: float) -> dict:
    dataset = pd.read_csv(os.path.join(path, "{}.tsv".format(split)), sep='\t')
    length = len(dataset)
    m60_48, _ = load_phone_map()
    data = {}
    for file_name, text in tqdm(
            dataset[['path',
                     'sentence']].values[:int(use_percentage * (length))]):
        phn = phonemize(text=text.encode('ascii', 'ignore').decode('ascii'),
                        language='en-us',
                        backend='festival',
                        separator=phone_separator).split()
        phonemes = [m60_48[p] for p in phn if p in m60_48]
        data[os.path.join(path, 'clips', file_name)] = phonemes
    return data
Ejemplo n.º 9
0
def ipa_phonemize(text, lang="en-us", use_g2p=False):
    if use_g2p:
        assert lang == "en-us", "g2pE phonemizer only works for en-us"
        try:
            from g2p_en import G2p
            g2p = G2p()
            return " ".join("|" if p == " " else p for p in g2p(text))
        except ImportError:
            raise ImportError("Please install phonemizer: pip install g2p_en")
    else:
        try:
            from phonemizer import phonemize
            from phonemizer.separator import Separator
            return phonemize(text,
                             backend='espeak',
                             language=lang,
                             separator=Separator(word="| ", phone=" "))
        except ImportError:
            raise ImportError(
                "Please install phonemizer: pip install phonemizer")
Ejemplo n.º 10
0
    def phonemize(self, text: str, phonemizer_lang: Optional[str] = None) -> str:
        requires_backends(self, "phonemizer")

        from phonemizer import phonemize
        from phonemizer.separator import Separator

        word_delimiter = self.word_delimiter_token + " " if self.word_delimiter_token is not None else ""
        phonemizer_lang = phonemizer_lang if phonemizer_lang is not None else self.phonemizer_lang

        separator = Separator(phone=self.phone_delimiter_token, word=word_delimiter, syllable="")
        phonemes = phonemize(
            text,
            language=phonemizer_lang,
            backend=self.phonemizer_backend,
            separator=separator,
            language_switch="remove-flags",
        )
        phonemes = phonemes.strip()

        return phonemes
Ejemplo n.º 11
0
    def phonemize(
        cls,
        text: str,
        lang: Optional[str],
        phonemizer: Optional[str] = None,
        preserve_punct: bool = False,
        to_simplified_zh: bool = False,
    ):
        if to_simplified_zh:
            import hanziconv

            text = hanziconv.HanziConv.toSimplified(text)

        if phonemizer == "g2p":
            import g2p_en

            g2p = g2p_en.G2p()
            if preserve_punct:
                return " ".join("|" if p == " " else p for p in g2p(text))
            else:
                res = [{",": "sp", ";": "sp"}.get(p, p) for p in g2p(text)]
                return " ".join(p for p in res if p.isalnum())
        if phonemizer == "g2pc":
            import g2pc

            g2p = g2pc.G2pC()
            return " ".join([w[3] for w in g2p(text)])
        elif phonemizer == "ipa":
            assert lang is not None
            import phonemizer
            from phonemizer.separator import Separator

            lang_map = {"en": "en-us", "fr": "fr-fr"}
            return phonemizer.phonemize(
                text,
                backend="espeak",
                language=lang_map.get(lang, lang),
                separator=Separator(word="| ", phone=" "),
            )
        else:
            return text
Ejemplo n.º 12
0
def phonemize(lang, files):
    print("Phonemize")
    validated_set = pandas.read_csv(f"{DB_IN}/{lang}/validated.tsv", sep="\t")
    paths = list(validated_set["path"])
    sentences = list(validated_set["sentence"])
    validated_dict = {paths[i][:-4]: sentences[i] for i in range(len(paths))}

    phonemes_to_id = {}
    annotations = []

    for i, file in enumerate(files):
        sentence = validated_dict[file]
        phonemes = phonemizer.phonemize(sentence,
                                        language=lang,
                                        backend="espeak")

        out = []
        for char in phonemes:
            if char == " ":
                continue
            try:
                out.append(str(phonemes_to_id[char]))
            except KeyError:
                phonemes_to_id[char] = len(phonemes_to_id)
                out.append(str(phonemes_to_id[char]))

        line = file + " " + " ".join(out)
        annotations.append(line)
        print(f"\r{i+1}/{len(files)} ....", end="", flush=True)
    print(f"\r                           \r", end="", flush=True)

    with open(f"{DB_OUT}/{lang}/annotations.txt", "w") as f:  # Annotation file
        f.write("\n".join(annotations))

    with open(f"{DB_OUT}/{lang}/phonemes_to_id.json",
              "w") as f:  # Phonemes to id
        json.dump(phonemes_to_id, f)

    return annotations
Ejemplo n.º 13
0
def collate_fn_common(batch,
                      data_type,
                      max_len_mel=2000,
                      reconstructed_phoneme=False):
    #
    ggg, truefalse = batch[0]
    batch_size = len(batch)
    parts = 6
    final_list = []

    if truefalse:
        seq_len = len(ggg[1])
        del ggg
        example_id = []
        for n in range(parts):
            final_list.append([None] * (batch_size * seq_len))
        #
        for i in range(batch_size):
            part, truefalse = batch[i]
            for j in range(parts):
                final_list[j][i * seq_len:(i + 1) * seq_len] = part[j]
            example_id.append(i * seq_len)
        #
    else:
        del ggg
        for n in range(parts):
            final_list.append([None] * (batch_size))
        #
        for i in range(batch_size):
            part, truefalse = batch[i]
            waveform, sample_rate, client_id, sentence = part
            part = [
                waveform, waveform.shape[1], sample_rate, client_id, sentence,
                len(sentence)
            ]
            for j in range(parts):
                final_list[j][i] = part[j]
        example_id = None
    #
    waveforms, waveform_l, sample_rates, client_ids, sentences, sentences_l = final_list
    #
    if not reconstructed_phoneme:
        sentences = phonemize(sentences,
                              backend='espeak',
                              with_stress=False,
                              separator=separator.Separator(phone=' ',
                                                            syllable='',
                                                            word='- '))
    for i in range(len(sentences_l)):
        sentences_l[i] = len(sentences[i])
    biggest_l_index = sentences_l.index(max(sentences_l))

    token = tokenizer.tokenize(sentences[biggest_l_index])
    text_field = TextField(token, token_indexer)
    text_field.index(vocab)
    padding_lengths = text_field.get_padding_lengths()

    list_tokens = []
    mel_list = [None] * len(sample_rates)
    mel_list_l = [None] * len(sample_rates)
    #

    for i in range(len(sentences_l)):
        token = tokenizer.tokenize(sentences[i])
        text_field = TextField(token, token_indexer)
        text_field.index(vocab)
        tensor_dict = text_field.as_tensor(padding_lengths)
        list_tokens.append(tensor_dict)
        #
        if data_type == "train":
            mel_list[i] = train_audio_transforms(
                waveforms[i]).squeeze(0).transpose(0, 1)
        else:
            mel_list[i] = test_audio_transforms(
                waveforms[i]).squeeze(0).transpose(0, 1)
        mel_list_l[i] = mel_list[i].shape[0]
        waveforms[i] = waveforms[i].squeeze(0)
        #
    waveforms = nn.utils.rnn.pad_sequence(waveforms,
                                          batch_first=True).unsqueeze(1)
    #
    mel_list.append(torch.zeros((max_len_mel, mel_list[0].shape[1])))
    spectrograms = nn.utils.rnn.pad_sequence(
        mel_list, batch_first=True).unsqueeze(1).transpose(2, 3)
    spectrograms = spectrograms[1:]
    highest_mel_l = spectrograms[0].shape[2]
    mel_mask = create_mask_pad(highest_mel_l, mel_list_l)
    #
    text_field_tensors = text_field.batch_tensors(list_tokens)
    #
    sentences_tensor = nn_util.get_token_ids_from_text_field_tensors(
        text_field_tensors)
    sentences_mask = nn_util.get_text_field_mask(text_field_tensors) == False

    return sentences_tensor, sentences_mask, spectrograms, mel_mask, waveforms, waveform_l, client_ids, example_id
Ejemplo n.º 14
0
def get_phonetic(w):
        return phonemize(w,backend="espeak")
Ejemplo n.º 15
0
def test_relative():
    from phonemizer import phonemize
    assert phonemize('a') == 'eɪ '
Ejemplo n.º 16
0
def test_absolute():
    from phonemizer.phonemize import phonemize
    assert phonemize('a') == 'eɪ '
Ejemplo n.º 17
0
def _test(text):
    return phonemize(text,
                     language='en-us',
                     backend='festival',
                     strip=True,
                     separator=separator.Separator(' ', '|', '-'))
Ejemplo n.º 18
0
    def string_to_tensor(self, text, view=False, return_string=False):
        """
        Fixes unicode errors, expands some abbreviations,
        turns graphemes into phonemes and then vectorizes
        the sequence either as IDs to be fed into an embedding
        layer, or as an articulatory matrix.
        """
        # clean unicode errors, expand abbreviations
        utt = clean(text,
                    fix_unicode=True,
                    to_ascii=False,
                    lower=False,
                    lang=self.clean_lang)
        self.expand_abbrevations(utt)

        # phonemize with code switching
        if self.use_codeswitching:
            cs_dicts = self.lid.identify(utt)
            chunks = []
            for i in range(len(cs_dicts)):
                word = cs_dicts[i]['word']
                cs_lang = cs_dicts[i]['entity']
                # print(word, "\t", cs_lang)
                if cs_lang == 'spa' or cs_lang == 'other':
                    g2p_lang = 'es'
                elif cs_lang == 'en':
                    g2p_lang = 'en-us'
                elif cs_lang == 'ne':
                    if word in self.en_cities:
                        g2p_lang = 'en-us'
                    else:
                        g2p_lang = 'es'
                else:
                    g2p_lang = 'es'

                if i == 0:
                    current_lang = g2p_lang
                    current_chunk = word
                    continue

                if word.startswith('##') or word.startswith(
                        "'") or word == "s":
                    g2p_lang = current_lang  # wordpieces of one word should all have the same language

                if g2p_lang == current_lang:
                    current_chunk += " " + word
                else:
                    chunks.append({
                        'word': current_chunk,
                        'lang': current_lang
                    })
                    current_chunk = word
                    current_lang = g2p_lang
            chunks.append({'word': current_chunk, 'lang': current_lang})
            chunks = self.postprocess_codeswitch(chunks)

            # phonemize chunks
            phones_chunks = []
            for chunk in chunks:
                # chunk = self.postprocess_codeswitch_simple(chunk) # uncomment this line if postprocessing doesn't work
                seq = chunk['word']
                g2p_lang = chunk['lang']
                # print('seq: ', seq, '\t', g2p_lang)
                phones_chunk = phonemizer.phonemize(seq,
                                                    language_switch='remove-flags',
                                                    backend="espeak",
                                                    language=g2p_lang,
                                                    preserve_punctuation=True,
                                                    strip=True,
                                                    punctuation_marks=';:,.!?¡¿—…"«»“”~/',
                                                    with_stress=self.use_stress).replace(";", ",") \
                    .replace(":", ",").replace('"', ",").replace("-", ",").replace("-", ",").replace("\n", " ") \
                    .replace("\t", " ").replace("/", " ").replace("¡", "").replace("¿", "").replace(",", "~")

                if g2p_lang == 'en-us':
                    phones_chunk = self.map_phones(phones_chunk)
                if len(phones_chunk.split()) > 4:
                    phones_chunks.append("~" + phones_chunk + "~")
                else:
                    phones_chunks.append(phones_chunk)

            phones = ' '.join(phones_chunks)
            phones = phones.replace(" ~", "~").replace(" .", ".").replace(
                " !", "!").replace(" ?", "?").lstrip()
            phones = re.sub("~+", "~", phones)
        else:
            # just phonemize without code switching
            phones = phonemizer.phonemize(utt,
                                          language_switch='remove-flags',
                                          backend="espeak",
                                          language=self.g2p_lang,
                                          preserve_punctuation=True,
                                          strip=True,
                                          punctuation_marks=';:,.!?¡¿—…"«»“”~/',
                                          with_stress=self.use_stress).replace(";", ",") \
                .replace(":", ",").replace('"', ",").replace("-", ",").replace("-", ",").replace("\n", " ") \
                .replace("\t", " ").replace("/", " ").replace("¡", "").replace("¿", "").replace(",", "~")
            phones = re.sub("~+", "~", phones)

        if not self.use_prosody:
            # retain ~ as heuristic pause marker, even though all other symbols are removed with this option.
            # also retain . ? and ! since they can be indicators for the stop token
            phones = phones.replace("ˌ", "").replace("ː", "").replace(
                "ˑ", "").replace("˘", "").replace("|", "").replace("‖", "")

        if not self.use_word_boundaries:
            phones = phones.replace(" ", "")
        else:
            phones = re.sub(r"\s+", " ", phones)

        phones = "+" + phones

        # I have no idea how this happened, but the synthesis just cannot pronounce ɔ.
        # Seems like it did not occur in the training data, maybe aligner removed it? As hacky fix, use o instead.
        phones = phones.replace("ɔ", "o") + "~"
        # phones = self.map_phones(phones)

        if view:
            print("Phonemes: \n{}\n".format(phones))

        phones_vector = list()
        # turn into numeric vectors
        for char in phones:
            if self.allow_unknown:
                phones_vector.append(
                    self.ipa_to_vector.get(char, self.default_vector))
            else:
                if char in self.ipa_to_vector.keys():
                    phones_vector.append(self.ipa_to_vector[char])
        if self.use_explicit_eos:
            phones_vector.append(self.ipa_to_vector["end_of_input"])

        # combine tensors and return
        if not return_string:
            return torch.LongTensor(phones_vector).unsqueeze(0)
        else:
            return phones + "#"
Ejemplo n.º 19
0
 def text2phoneme(self, batch):
     """Convert text to phoneme."""
     batch["sentence"] = phonemize(batch["sentence"], language=self.language, backend="espeak")
     return batch
Ejemplo n.º 20
0
def main():
    """Phonemize a text from command-line arguments"""
    args = parse_args()

    # setup a custom path to espeak and festival if required (this must be done
    # before generating the version message)
    if args.espeak_path:
        EspeakBackend.set_espeak_path(args.espeak_path)
    if args.festival_path:
        FestivalBackend.set_festival_path(args.festival_path)

    # display version information and exit
    if args.version:
        print(version.version())
        return

    # list supported languages and exit
    if args.list_languages:
        backends = (['festival', 'segments', 'espeak', 'espeak-mbrola']
                    if not args.backend else [args.backend])
        for backend in backends:
            print(f'supported languages for {backend} are:\n' +
                  '\n'.join(f'\t{k}\t->\t{v}' for k, v in sorted(
                      BACKENDS_MAP[backend].supported_languages().items())))
        return

    # set default backend as espeak if not specified
    args.backend = args.backend or 'espeak'

    # configure logging according to --verbose/--quiet options
    verbosity = 'normal'
    if args.verbose:
        verbosity = 'verbose'
    elif args.quiet:
        verbosity = 'quiet'
    log = logger.get_logger(verbosity=verbosity)

    # configure input as a readable stream
    streamin = args.input
    if isinstance(streamin, str):
        streamin = codecs.open(streamin, 'r', encoding='utf8')
    log.debug('reading from %s', streamin.name)

    # configure output as a writable stream
    streamout = args.output
    if isinstance(streamout, str):
        streamout = codecs.open(streamout, 'w', 'utf8')
    log.debug('writing to %s', streamout.name)

    # configure the separator for phonemes, syllables and words.
    if args.backend == 'espeak-mbrola':
        log.debug('using espeak-mbrola backend: ignoring word separator')
        sep = separator.Separator(phone=args.phone_separator,
                                  syllable=None,
                                  word=None)
    else:
        sep = separator.Separator(phone=args.phone_separator,
                                  syllable=args.syllable_separator,
                                  word=args.word_separator)
    log.debug('separator is %s', sep)

    text = [line.strip() for line in streamin]

    # phonemize the input text
    out = phonemize(text,
                    language=args.language,
                    backend=args.backend,
                    separator=sep,
                    strip=args.strip,
                    preserve_punctuation=args.preserve_punctuation,
                    punctuation_marks=args.punctuation_marks,
                    with_stress=args.with_stress,
                    language_switch=args.language_switch,
                    njobs=args.njobs,
                    logger=log)

    if out:
        streamout.write('\n'.join(out) + '\n')
Ejemplo n.º 21
0
def test_bad():
    with pytest.raises(RuntimeError):
        phonemize('', words_mismatch='foo')

    with pytest.raises(RuntimeError):
        phonemize('', backend='festival', words_mismatch='remove')
Ejemplo n.º 22
0
def main():
    """Phonemize a text from command-line arguments"""
    args = parse_args()

    # setup a custom path to espeak and festival if required (this must be done
    # before generating the version message)
    if args.espeak_library:
        BACKENDS['espeak'].set_library(args.espeak_library)
    if args.festival_executable:
        BACKENDS['festival'].set_executable(args.festival_executable)

    # display version information and exit
    if args.version:
        print(version.version())
        return

    # list supported languages and exit
    if args.list_languages:
        print(list_languages(args.backend))
        return

    # set default backend as espeak if not specified
    args.backend = args.backend or 'espeak'

    # configure logging according to --verbose/--quiet options
    log = get_logger(args.verbose, args.quiet)

    # configure input:output as a readable/writable streams
    streamin = setup_stream(args.input, 'r')
    log.debug('reading from %s', streamin.name)
    streamout = setup_stream(args.output, 'w')
    log.debug('writing to %s', streamout.name)

    # configure the separator for phonemes, syllables and words.
    if args.backend == 'espeak-mbrola':
        log.debug('using espeak-mbrola backend: ignoring word separator')
        sep = separator.Separator(phone=args.phone_separator,
                                  syllable=None,
                                  word=None)
    else:
        sep = separator.Separator(phone=args.phone_separator,
                                  syllable=args.syllable_separator,
                                  word=args.word_separator)
    log.debug('separator is %s', sep)

    if args.prepend_text:
        input_output_separator = sep.input_output_separator(args.prepend_text)
        log.debug('prepend input text to output, separator is "%s"',
                  input_output_separator)
    else:
        input_output_separator = False

    # phonemize the input text
    out = phonemize(streamin.readlines(),
                    language=args.language,
                    backend=args.backend,
                    separator=sep,
                    strip=args.strip,
                    prepend_text=args.prepend_text,
                    preserve_punctuation=args.preserve_punctuation,
                    punctuation_marks=args.punctuation_marks,
                    with_stress=args.with_stress,
                    tie=args.tie,
                    language_switch=args.language_switch,
                    words_mismatch=args.words_mismatch,
                    njobs=args.njobs,
                    logger=log)

    if out and input_output_separator:
        streamout.write(
            os.linesep.join(f'{line[0]} {input_output_separator} {line[1]}'
                            for line in out) + os.linesep)
    elif out:
        streamout.write(os.linesep.join(out) + os.linesep)
Ejemplo n.º 23
0
from phonemizer import phonemize

ret = phonemize('English', language='en-us')
print(ret)
Ejemplo n.º 24
0
def text2phoneme(text):
    text = phonemizer.phonemize(text, seperator=char_sep)
    return text