def test_language_switch(): text = '\n'.join([ "j'aime l'anglais", "j'aime le football", "football", "surtout le real madrid", "n'utilise pas google" ]) backend = EspeakBackend('fr-fr', language_switch='keep-flags') out = backend._phonemize_aux(text, separator.Separator(), True) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə- (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)', 'syʁtu lə- (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)' ] # default behavior is to keep the flags backend = EspeakBackend('fr-fr') out = backend._phonemize_aux(text, separator.Separator(), True) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə- (en)fʊtbɔːl(fr)', '(en)fʊtbɔːl(fr)', 'syʁtu lə- (en)ɹiəl(fr) madʁid', 'nytiliz pa (en)ɡuːɡəl(fr)' ] backend = EspeakBackend('fr-fr', language_switch='remove-flags') out = backend._phonemize_aux(text, separator.Separator(), True) assert out == [ 'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə- fʊtbɔːl', 'fʊtbɔːl', 'syʁtu lə- ɹiəl madʁid', 'nytiliz pa ɡuːɡəl' ] backend = EspeakBackend('fr-fr', language_switch='remove-utterance') out = backend._phonemize_aux(text, separator.Separator(), True) assert out == ['ʒɛm lɑ̃ɡlɛ'] with pytest.raises(RuntimeError): backend = EspeakBackend('fr-fr', language_switch='foo')
def test_french(): backend = EspeakBackend('fr-fr') text = u'bonjour le monde' sep = separator.Separator(word=';eword ', syllable=None, phone=' ') expected = [u'b ɔ̃ ʒ u ʁ ;eword l ə ;eword m ɔ̃ d ;eword '] out = backend._phonemize_aux(text, sep, False) assert out == expected
def test_separator_3(): backend = SegmentsBackend('cree') text = 'achi acho' sep = separator.Separator(word=' ', syllable=None, phone='_') assert backend.phonemize(text, separator=sep) == u'ʌ_tʃ_ɪ_ ʌ_tʃ_ʊ_ ' assert backend.phonemize(text, separator=sep, strip=True) \ == u'ʌ_tʃ_ɪ ʌ_tʃ_ʊ'
def text2phone(text, char2code): seperator = separator.Separator('', '', ' ') ph = phonemize.phonemize(text, separator=seperator) ph = ph.split(' ') ph.remove('') result = [char2code[p] for p in ph] return torch.LongTensor(result)
def setup(self): # just a name shortcut self.p = lambda text: phonemize(text, language='en-us', backend='festival', strip=True, separator=separator.Separator( ' ', '|', '-'))
def test_separator_5(): backend = SegmentsBackend('cree') text = 'achi acho' sep = separator.Separator(phone=' ', word='_') assert backend.phonemize(text, separator=sep) == u'ʌ tʃ ɪ _ʌ tʃ ʊ _' assert backend.phonemize(text, separator=sep, strip=True) \ == u'ʌ tʃ ɪ_ʌ tʃ ʊ'
def main(): """Phonemize a text from command-line arguments""" args = parse_args() if args.version: print(version()) return # configure logging according to --verbose option. If verbose, # init a logger to output on stderr. Else init a logger going to # the void. logger = logging.getLogger() logger.handlers = [] logger.setLevel(logging.DEBUG) if args.verbose: handler = logging.StreamHandler(sys.stderr) handler.setFormatter(logging.Formatter('%(message)s')) else: handler = logging.NullHandler() logger.addHandler(handler) # configure input as a readable stream streamin = args.input if isinstance(streamin, str): streamin = codecs.open(streamin, 'r', encoding='utf8') logger.debug('reading from %s', streamin.name) # configure output as a writable stream streamout = args.output if isinstance(streamout, str): streamout = codecs.open(streamout, 'w', 'utf8') logger.debug('writing to %s', streamout.name) # configure the separator for phonemes, syllables and words. sep = separator.Separator(phone=args.phone_separator, syllable=args.syllable_separator, word=args.word_separator) logger.debug('separator is %s', sep) # load the input text (python2 optionnally needs an extra decode) text = streamin.read() try: text = text.decode('utf8') except (AttributeError, UnicodeEncodeError): pass # phonemize the input text out = phonemize.phonemize(text, language=args.language, backend=args.backend, separator=sep, strip=args.strip, use_sampa=args.sampa, njobs=args.njobs, logger=logger) if len(out): streamout.write(out + '\n')
def test_separator_4(): backend = SegmentsBackend('cree') text = 'achi acho' # TODO bug when sep.phone == ' ' with no sep.word sep = separator.Separator(phone=' ', word='') assert backend.phonemize(text, separator=sep) == u'ʌ tʃ ɪ ʌ tʃ ʊ ' assert backend.phonemize(text, separator=sep, strip=True) \ == u'ʌ tʃ ɪʌ tʃ ʊ'
def test_punctuation(text, strip, sep): if sep == separator.Separator(): expected = 'ɐ kɑːmə ɐ pɔɪnt' if strip else 'ɐ kɑːmə ɐ pɔɪnt ' else: expected = ( 'ɐ_k ɑː m ə_ɐ_p ɔɪ n t' if strip else 'ɐ _k ɑː m ə _ɐ _p ɔɪ n t _') output = EspeakBackend('en-us').phonemize(text, strip=strip, separator=sep) assert expected == output
def text2phone(text, char2code): seperator = separator.Separator('', '', ' ') ph = phonemize(text, backend="festival", separator=seperator) ph = ph.split(' ') ph.remove('') print('text %s ~ ph %s' % (text, ph)) result = [char2code[p] for p in ph] return torch.LongTensor(result)
def main(): """Phonemize a text from command-line arguments""" args = parse_args() if args.version: print(version.version()) return # configure logging according to --verbose/--quiet options verbosity = 'normal' if args.verbose: verbosity = 'verbose' elif args.quiet: verbosity = 'quiet' log = logger.get_logger(verbosity=verbosity) # configure input as a readable stream streamin = args.input if isinstance(streamin, str): streamin = codecs.open(streamin, 'r', encoding='utf8') log.debug('reading from %s', streamin.name) # configure output as a writable stream streamout = args.output if isinstance(streamout, str): streamout = codecs.open(streamout, 'w', 'utf8') log.debug('writing to %s', streamout.name) # configure the separator for phonemes, syllables and words. sep = separator.Separator(phone=args.phone_separator, syllable=args.syllable_separator, word=args.word_separator) log.debug('separator is %s', sep) # load the input text (python2 optionnally needs an extra decode) text = streamin.read() try: text = text.decode('utf8') except (AttributeError, UnicodeEncodeError): pass # phonemize the input text out = phonemize.phonemize(text, language=args.language, backend=args.backend, separator=sep, strip=args.strip, with_stress=args.with_stress, use_sampa=args.sampa, language_switch=args.language_switch, njobs=args.njobs, logger=log) if len(out): streamout.write(out + '\n')
def test_arabic(): backend = EspeakBackend('ar') text = u'السلام عليكم' sep = separator.Separator() # Arabic seems to have changed starting at espeak-ng-1.49.3 if tuple(EspeakBackend.version().split('.')) >= ('1', '49', '3'): expected = [u'ʔassalaːm ʕliːkm '] else: expected = [u'ʔassalaam ʕaliijkum '] out = backend._phonemize_aux(text, sep, False) assert out == expected
def test_phone_separator_simple(): text = 'The lion and the tiger ran' sep = separator.Separator(phone='_') backend = EspeakBackend('en-us') output = backend.phonemize(text, separator=sep, strip=True) expected = 'ð_ə l_aɪə_n æ_n_d ð_ə t_aɪ_ɡ_ɚ ɹ_æ_n' assert expected == output output = backend.phonemize(text, separator=sep, strip=False) expected = 'ð_ə_ l_aɪə_n_ æ_n_d_ ð_ə_ t_aɪ_ɡ_ɚ_ ɹ_æ_n_ ' assert expected == output
def test_french_sampa(): text = u'bonjour le monde' backend = EspeakMbrolaBackend('mb-fr1') sep = separator.Separator(word=None, phone=' ') expected = 'b o~ Z u R l @ m o~ d ' out = backend.phonemize(text, separator=sep, strip=False) assert out == expected expected = 'b o~ Z u R l @ m o~ d' out = backend.phonemize(text, separator=sep, strip=True) assert out == expected assert '' == backend.phonemize('', separator=sep, strip=True) assert '' == backend.phonemize('"', separator=sep, strip=True)
def main(): """Phonemize a text from command-line arguments""" args = parse_args() # setup a custom path to espeak and festival if required (this must be done # before generating the version message) if args.espeak_path: EspeakBackend.set_espeak_path(args.espeak_path) if args.festival_path: FestivalBackend.set_festival_path(args.festival_path) # display version information and exit if args.version: print(version.version()) return # list supported languages and exit if args.list_languages: backends = (['festival', 'segments', 'espeak', 'espeak-mbrola'] if not args.backend else [args.backend]) for backend in backends: print(f'supported languages for {backend} are:\n' + '\n'.join(f'\t{k}\t->\t{v}' for k, v in sorted( BACKENDS_MAP[backend].supported_languages().items()))) return # set default backend as espeak if not specified args.backend = args.backend or 'espeak' # configure logging according to --verbose/--quiet options verbosity = 'normal' if args.verbose: verbosity = 'verbose' elif args.quiet: verbosity = 'quiet' log = logger.get_logger(verbosity=verbosity) # configure input as a readable stream streamin = args.input if isinstance(streamin, str): streamin = codecs.open(streamin, 'r', encoding='utf8') log.debug('reading from %s', streamin.name) # configure output as a writable stream streamout = args.output if isinstance(streamout, str): streamout = codecs.open(streamout, 'w', 'utf8') log.debug('writing to %s', streamout.name) # configure the separator for phonemes, syllables and words. if args.backend == 'espeak-mbrola': log.debug('using espeak-mbrola backend: ignoring word separator') sep = separator.Separator(phone=args.phone_separator, syllable=None, word=None) else: sep = separator.Separator(phone=args.phone_separator, syllable=args.syllable_separator, word=args.word_separator) log.debug('separator is %s', sep) text = [line.strip() for line in streamin] # phonemize the input text out = phonemize(text, language=args.language, backend=args.backend, separator=sep, strip=args.strip, preserve_punctuation=args.preserve_punctuation, punctuation_marks=args.punctuation_marks, with_stress=args.with_stress, language_switch=args.language_switch, njobs=args.njobs, logger=log) if out: streamout.write('\n'.join(out) + '\n')
def _test(text, separator=separator.Separator(' ', '|', '-')): return festival.phonemize(text, language='en-us', strip=True, separator=separator)
def test_phone_separator(text, expected): sep = separator.Separator(phone='_') backend = EspeakBackend('en-us') output = backend.phonemize(text, separator=sep, strip=True) assert output == expected
def test_im(): sep = separator.Separator(word=' ', syllable='', phone='') assert _test("I'm looking for an image", sep) \ == ['aym luhkaxng faor axn ihmaxjh'] assert _test("Im looking for an image", sep) \ == ['ihm luhkaxng faor axn ihmaxjh']
def collate_fn_common(batch, data_type, max_len_mel=2000, reconstructed_phoneme=False): # ggg, truefalse = batch[0] batch_size = len(batch) parts = 6 final_list = [] if truefalse: seq_len = len(ggg[1]) del ggg example_id = [] for n in range(parts): final_list.append([None] * (batch_size * seq_len)) # for i in range(batch_size): part, truefalse = batch[i] for j in range(parts): final_list[j][i * seq_len:(i + 1) * seq_len] = part[j] example_id.append(i * seq_len) # else: del ggg for n in range(parts): final_list.append([None] * (batch_size)) # for i in range(batch_size): part, truefalse = batch[i] waveform, sample_rate, client_id, sentence = part part = [ waveform, waveform.shape[1], sample_rate, client_id, sentence, len(sentence) ] for j in range(parts): final_list[j][i] = part[j] example_id = None # waveforms, waveform_l, sample_rates, client_ids, sentences, sentences_l = final_list # if not reconstructed_phoneme: sentences = phonemize(sentences, backend='espeak', with_stress=False, separator=separator.Separator(phone=' ', syllable='', word='- ')) for i in range(len(sentences_l)): sentences_l[i] = len(sentences[i]) biggest_l_index = sentences_l.index(max(sentences_l)) token = tokenizer.tokenize(sentences[biggest_l_index]) text_field = TextField(token, token_indexer) text_field.index(vocab) padding_lengths = text_field.get_padding_lengths() list_tokens = [] mel_list = [None] * len(sample_rates) mel_list_l = [None] * len(sample_rates) # for i in range(len(sentences_l)): token = tokenizer.tokenize(sentences[i]) text_field = TextField(token, token_indexer) text_field.index(vocab) tensor_dict = text_field.as_tensor(padding_lengths) list_tokens.append(tensor_dict) # if data_type == "train": mel_list[i] = train_audio_transforms( waveforms[i]).squeeze(0).transpose(0, 1) else: mel_list[i] = test_audio_transforms( waveforms[i]).squeeze(0).transpose(0, 1) mel_list_l[i] = mel_list[i].shape[0] waveforms[i] = waveforms[i].squeeze(0) # waveforms = nn.utils.rnn.pad_sequence(waveforms, batch_first=True).unsqueeze(1) # mel_list.append(torch.zeros((max_len_mel, mel_list[0].shape[1]))) spectrograms = nn.utils.rnn.pad_sequence( mel_list, batch_first=True).unsqueeze(1).transpose(2, 3) spectrograms = spectrograms[1:] highest_mel_l = spectrograms[0].shape[2] mel_mask = create_mask_pad(highest_mel_l, mel_list_l) # text_field_tensors = text_field.batch_tensors(list_tokens) # sentences_tensor = nn_util.get_token_ids_from_text_field_tensors( text_field_tensors) sentences_mask = nn_util.get_text_field_mask(text_field_tensors) == False return sentences_tensor, sentences_mask, spectrograms, mel_mask, waveforms, waveform_l, client_ids, example_id
def test_im(): sep = separator.Separator(' ', '', '') assert _test("I'm looking for an image", sep) \ == ['aym luhkaxng faor axn ihmaxjh'] assert _test("Im looking for an image", sep) \ == ['ihm luhkaxng faor axn ihmaxjh']
def recognize(self, wav=None): if not self.eng.find("fest") == -1: # ======= festival english us only ===================================== from phonemizer.backend import FestivalBackend out1 = FestivalBackend( 'en-us', preserve_punctuation=False).phonemize(self.text, strip=True) elif not self.eng.find("esp") == -1: # ======= espeak ======================================================= from phonemizer.backend import EspeakBackend if not self.lan.find("en") == -1: backend = EspeakBackend('en-us') elif not self.lan.find("fr") == -1: backend = EspeakBackend('fr-fr') elif not self.lan.find("de") == -1 or not self.lan.find( "ger") == -1: backend = EspeakBackend('de-de') elif not self.lan.find("ita") == -1: backend = EspeakBackend('it-it') elif not self.lan.find("esp") == -1 or not self.lan.find( "spa") == -1: backend = EspeakBackend('es-es') else: print("Invalid Language specified") exit(1) sep = separator.Separator(word=';eword ', syllable=None, phone=' ') out1 = backend.phonemize(self.text, sep, False) elif not self.eng.find("mbr") == -1: # ====== mbrola ======================================================== from phonemizer.backend import EspeakBackend, EspeakMbrolaBackend if EspeakMbrolaBackend.is_available(): if not self.lan.find("en") == -1: EspeakMbrolaBackend('mb-en1') elif not self.lan.find("fr") == -1: EspeakMbrolaBackend('mb-fr2') elif not self.lan.find("de") == -1 or not self.lan.find( "ger") == -1: EspeakMbrolaBackend('mb-de1') elif not self.lan.find("ita") == -1: EspeakMbrolaBackend('mb-es1') elif not self.lan.find("esp") == -1 or not self.lan.find( "spa") == -1: EspeakMbrolaBackend('mb-it3') else: print("Invalid Language specified") exit(1) sep = separator.Separator(word=';eword ', syllable=None, phone=' ') out1 = backend.phonemize(self.text, sep, False) elif not self.eng.find("seg") == -1: # ===== segment ======================================================== from phonemizer.backend import SegmentsBackend if not self.lan.find("ja") == -1: backend = SegmentsBackend('japanese') else: print("Invalid Language specified") exit(1) sep = separator.Separator(word=';eword ', syllable=None, phone=' ') out1 = backend.phonemize(self.text, sep, False) else: print("Invalid Language specified") exit(1) print(out1) # return the result string to the speach engine
def test_french(): text = u'bonjour le monde' sep = separator.Separator(word=';eword ', syllable=None, phone=' ') expected = [u'b ɔ̃ ʒ u ʁ ;eword l ə- ;eword m ɔ̃ d ;eword '] out = espeak.phonemize(text, language='fr-fr', separator=sep, strip=False) assert out == expected
'ʒɛm lɑ̃ɡlɛ', 'ʒɛm lə fʊtbɔːl', 'fʊtbɔːl', 'syʁtu lə ɹiəl madʁid', 'nytiliz pa ɡuːɡəl' ] backend = EspeakBackend('fr-fr', language_switch='remove-utterance') out = backend.phonemize(text, separator.Separator(), True) assert out == ['ʒɛm lɑ̃ɡlɛ'] with pytest.raises(RuntimeError): backend = EspeakBackend('fr-fr', language_switch='foo') @pytest.mark.parametrize('text, strip, sep', ((t, s, u) for t in [ 'a comma a point', 'a comma. a point.', 'a comma,, a point.', 'a comma, , a point.', 'a comma? a point!' ] for s in (True, False) for u in (separator.Separator(), separator.Separator(word='_', phone=' ')))) def test_punctuation(text, strip, sep): if sep == separator.Separator(): expected = 'ɐ kɑːmə ɐ pɔɪnt' if strip else 'ɐ kɑːmə ɐ pɔɪnt ' else: expected = ('ɐ_k ɑː m ə_ɐ_p ɔɪ n t' if strip else 'ɐ _k ɑː m ə _ɐ _p ɔɪ n t _') output = EspeakBackend('en-us').phonemize(text, strip=strip, separator=sep) assert expected == output # see https://github.com/bootphon/phonemizer/issues/31 def test_phone_separator_simple(): text = 'The lion and the tiger ran'
def main(): """Phonemize a text from command-line arguments""" args = parse_args() # setup a custom path to espeak and festival if required (this must be done # before generating the version message) if args.espeak_library: BACKENDS['espeak'].set_library(args.espeak_library) if args.festival_executable: BACKENDS['festival'].set_executable(args.festival_executable) # display version information and exit if args.version: print(version.version()) return # list supported languages and exit if args.list_languages: print(list_languages(args.backend)) return # set default backend as espeak if not specified args.backend = args.backend or 'espeak' # configure logging according to --verbose/--quiet options log = get_logger(args.verbose, args.quiet) # configure input:output as a readable/writable streams streamin = setup_stream(args.input, 'r') log.debug('reading from %s', streamin.name) streamout = setup_stream(args.output, 'w') log.debug('writing to %s', streamout.name) # configure the separator for phonemes, syllables and words. if args.backend == 'espeak-mbrola': log.debug('using espeak-mbrola backend: ignoring word separator') sep = separator.Separator(phone=args.phone_separator, syllable=None, word=None) else: sep = separator.Separator(phone=args.phone_separator, syllable=args.syllable_separator, word=args.word_separator) log.debug('separator is %s', sep) if args.prepend_text: input_output_separator = sep.input_output_separator(args.prepend_text) log.debug('prepend input text to output, separator is "%s"', input_output_separator) else: input_output_separator = False # phonemize the input text out = phonemize(streamin.readlines(), language=args.language, backend=args.backend, separator=sep, strip=args.strip, prepend_text=args.prepend_text, preserve_punctuation=args.preserve_punctuation, punctuation_marks=args.punctuation_marks, with_stress=args.with_stress, tie=args.tie, language_switch=args.language_switch, words_mismatch=args.words_mismatch, njobs=args.njobs, logger=log) if out and input_output_separator: streamout.write( os.linesep.join(f'{line[0]} {input_output_separator} {line[1]}' for line in out) + os.linesep) elif out: streamout.write(os.linesep.join(out) + os.linesep)
assert out == ['ʒɛm lɑ̃ɡlɛ'] with pytest.raises(RuntimeError): backend = EspeakBackend('fr-fr', language_switch='foo') @pytest.mark.parametrize( 'text, strip, sep', ((t, s, u) for t in [ 'a comma a point', 'a comma. a point.', 'a comma,, a point.', 'a comma, , a point.', 'a comma? a point!'] for s in (True, False) for u in (separator.Separator(), separator.Separator(word='_', phone=' ')) )) def test_punctuation(text, strip, sep): if sep == separator.Separator(): expected = 'ɐ kɑːmə ɐ pɔɪnt' if strip else 'ɐ kɑːmə ɐ pɔɪnt ' else: expected = ( 'ɐ_k ɑː m ə_ɐ_p ɔɪ n t' if strip else 'ɐ _k ɑː m ə _ɐ _p ɔɪ n t _') output = EspeakBackend('en-us').phonemize(text, strip=strip, separator=sep) assert expected == output # see https://github.com/bootphon/phonemizer/issues/31 def test_phone_separator_simple(): text = 'The lion and the tiger ran'
def _test(text): return phonemize(text, language='en-us', backend='festival', strip=True, separator=separator.Separator(' ', '|', '-'))
def _test(text, separator=separator.Separator( word=' ', syllable='|', phone='-')): backend = FestivalBackend('en-us') return backend._phonemize_aux(text, separator, True)