def test_wav(self, dtype, sample_rate, num_channels, channels_first, enc_params, mocked_write): """soundfile_backend.save passes correct subtype to soundfile.write when WAV""" filepath = "foo.wav" input_tensor = get_wav_data( dtype, num_channels, num_frames=3 * sample_rate, normalize=dtype == "float32", channels_first=channels_first, ).t() encoding, bits_per_sample = enc_params soundfile_backend.save( filepath, input_tensor, sample_rate, channels_first=channels_first, encoding=encoding, bits_per_sample=bits_per_sample ) # on +Py3.8 call_args.kwargs is more descreptive args = mocked_write.call_args[1] assert args["file"] == filepath assert args["samplerate"] == sample_rate assert args["subtype"] == fetch_wav_subtype( dtype, encoding, bits_per_sample) assert args["format"] is None self.assertEqual( args["data"], input_tensor.t() if channels_first else input_tensor )
def assert_non_wav( self, fmt, dtype, sample_rate, num_channels, channels_first, mocked_write, encoding=None, bits_per_sample=None, ): """soundfile_backend.save passes correct subtype and format to soundfile.write when SPHERE""" filepath = f"foo.{fmt}" input_tensor = get_wav_data( dtype, num_channels, num_frames=3 * sample_rate, normalize=False, channels_first=channels_first, ).t() expected_data = input_tensor.t() if channels_first else input_tensor soundfile_backend.save( filepath, input_tensor, sample_rate, channels_first, encoding=encoding, bits_per_sample=bits_per_sample, ) # on +Py3.8 call_args.kwargs is more descreptive args = mocked_write.call_args[1] assert args["file"] == filepath assert args["samplerate"] == sample_rate if fmt in ["sph", "nist", "nis"]: assert args["format"] == "NIST" else: assert args["format"] is None self.assertEqual(args["data"], expected_data)
def test_channels_first(self, channels_first): """channels_first swaps axes""" path = self.get_temp_path("data.wav") data = get_wav_data("int32", 2, channels_first=channels_first) soundfile_backend.save(path, data, 8000, channels_first=channels_first) found = load_wav(path)[0] expected = data if channels_first else data.transpose(1, 0) self.assertEqual(found, expected, atol=1e-4, rtol=1e-8)
def assert_wav(self, dtype, sample_rate, num_channels, num_frames): """`soundfile_backend.save` can save wav format.""" path = self.get_temp_path("data.wav") expected = get_wav_data( dtype, num_channels, num_frames=num_frames, normalize=False ) soundfile_backend.save(path, expected, sample_rate) found, sr = load_wav(path, normalize=False) assert sample_rate == sr self.assertEqual(found, expected)
def _test_fileobj(self, ext): """Saving audio to file-like object works""" sample_rate = 16000 path = self.get_temp_path(f'test.{ext}') subtype = 'FLOAT' if ext == 'wav' else None data = get_wav_data('float32', num_channels=2) soundfile.write(path, data.numpy().T, sample_rate, subtype=subtype) expected = soundfile.read(path, dtype='float32')[0] fileobj = io.BytesIO() soundfile_backend.save(fileobj, data, sample_rate, format=ext) fileobj.seek(0) found, sr = soundfile.read(fileobj, dtype='float32') assert sr == sample_rate self.assertEqual(expected, found, atol=1e-4, rtol=1e-8)
def _assert_non_wav(self, fmt, dtype, sample_rate, num_channels): """`soundfile_backend.save` can save non-wav format. Due to precision missmatch, and the lack of alternative way to decode the resulting files without using soundfile, only meta data are validated. """ num_frames = sample_rate * 3 path = self.get_temp_path(f"data.{fmt}") expected = get_wav_data( dtype, num_channels, num_frames=num_frames, normalize=False ) soundfile_backend.save(path, expected, sample_rate) sinfo = soundfile.info(path) assert sinfo.format == fmt.upper() assert sinfo.frames == num_frames assert sinfo.channels == num_channels assert sinfo.samplerate == sample_rate
def process_voice_message(message): if not message.text: bot.reply_to(message, 'А где текст??!') return # normalize the input # convert pluses into accents text_with_accents = pluses_to_accents(message.text) # replace accents into pluses text_normalized = replace_accents(text_with_accents) if not text_normalized: bot.reply_to(message, 'Ошибка: а где текст?') return # add a dot if it is not set last_char = text_normalized[-1] if last_char not in ['.', '!', '?']: text_normalized = text_normalized + '.' text_len = len(text_normalized) if text_len > 150: bot.reply_to( message, f'Ошибка: ваш текст больше 150 символов, а именно {text_len}') return bot.reply_to( message, f'Текст c ударениями:\n\n{text_with_accents}\n\nНормализованный текст:\n\n{text_normalized}' ) # do the synthesizing audios = apply_tts(texts=[text_normalized], model=model, sample_rate=SAMPLE_RATE, symbols=SYMBOLS, device=device) s = 0 for n, audio_tensor in enumerate(audios): # form the filename filename = dirname(abspath('__file__')) + f'/files/file_{n}.wav' # save to the disk soundfile_backend.save(filename, audio_tensor, SAMPLE_RATE) # send back to the user audio = open(filename, 'rb') bot.send_voice(message.chat.id, audio) # remove WAV file remove(filename) # increment the counter s = s + 1 if s == 0: bot.reply_to(message, "Ничего не смог синтезировать :(")