def text2audio(self, text): stub = tts_pb2_grpc.TextToSpeechStub( grpc.secure_channel(self._endpoint, grpc.ssl_channel_credentials())) request = self._build_request(text) metadata = authorization_metadata(self._api_key, self._secret_key, "tinkoff.cloud.tts") responses = stub.StreamingSynthesize(request, metadata=metadata) for key, value in responses.initial_metadata(): if key == "x-audio-num-samples": print("Estimated audio duration is " + str(int(value) / self._sample_rate) + " seconds") break f = io.BytesIO() ogg_opus_writer = pyogg.OggOpusWriter(f) ogg_opus_writer.set_application("audio") ogg_opus_writer.set_sampling_frequency(self._sample_rate) ogg_opus_writer.set_channels(1) ogg_opus_writer.set_frame_size(20) # milliseconds for stream_response in responses: ogg_opus_writer.encode(stream_response.audio_chunk) # close writer ogg_opus_writer.close() # get audio f.seek(0) audio = f.getvalue() return audio
def text2speach(text, tmp_dir='./tmp'): ts = int(time.time() * 10**6) wav_tmp = os.path.join(tmp_dir, '{}.wav'.format(ts)) oga_tmp = os.path.join(tmp_dir, '{}.oga'.format(ts)) with wave.open(wav_tmp, "wb") as f: f.setframerate(sample_rate) f.setnchannels(1) f.setsampwidth(2) stub = tts_pb2_grpc.TextToSpeechStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) request = build_request(text) metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts") responses = stub.StreamingSynthesize(request, metadata=metadata) # for key, value in responses.initial_metadata(): # if key == "x-audio-num-samples": # #print("Estimated audio duration is " + str(int(value) / sample_rate) + " seconds") # break for stream_response in responses: f.writeframes(stream_response.audio_chunk) stream = ffmpeg.input(wav_tmp) stream = ffmpeg.output(stream, oga_tmp) ffmpeg.run(stream, overwrite_output=True) with open(oga_tmp, 'rb') as f: fp = f.read() os.remove(oga_tmp) os.remove(wav_tmp) return fp
def __init__(self, phrase): phrase = phrase.replace(' ', '. ') self._ssml = '<speak><p>' + phrase + '</p></speak>' self._text = re.sub(r'\<[^>]*\>', '', phrase) stub = tts_pb2_grpc.TextToSpeechStub( grpc.secure_channel(ENDPOINT, grpc.ssl_channel_credentials())) metadata = authorization_metadata(API_KEY, SECRET_KEY, "tinkoff.cloud.tts") request = tts_pb2.SynthesizeSpeechRequest( input=tts_pb2.SynthesisInput(text=self._text, ssml=self._ssml), audio_config=tts_pb2.AudioConfig(audio_encoding=tts_pb2.LINEAR16, speaking_rate=1, sample_rate_hertz=SAMPLE_RATE)) self._responses = stub.StreamingSynthesize(request, metadata=metadata)
def main(): args = BaseSynthesisParser().parse_args() if args.encoding == tts_pb2.LINEAR16 and args.rate != 48000: raise ValueError("LINEAR16 supports only 48kHz for now, use RAW_OPUS") with audio_open_write(args.output_file, args.encoding, args.rate) as audio_writer: stub = tts_pb2_grpc.TextToSpeechStub(make_channel(args)) request = build_synthesis_request(args, args.input_text) metadata = authorization_metadata(args.api_key, args.secret_key, "tinkoff.cloud.tts") responses = stub.StreamingSynthesize(request, metadata=metadata) for stream_response in responses: audio_writer.write(stream_response.audio_chunk)
def generate(text, chat_id): with wave.open(f"synthesised{chat_id}.wav", "wb") as f: f.setframerate(sample_rate) f.setnchannels(1) f.setsampwidth(2) stub = tts_pb2_grpc.TextToSpeechStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) request = build_request(text) metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts") responses = stub.StreamingSynthesize(request, metadata=metadata) for key, value in responses.initial_metadata(): if key == "x-audio-num-samples": print("Estimated audio duration is " + str(int(value) / sample_rate) + " seconds") break for stream_response in responses: f.writeframes(stream_response.audio_chunk)
Библиотеки просто набиты ими. </s> </p> </speak> """), audio_config=tts_pb2.AudioConfig( audio_encoding=tts_pb2.LINEAR16, sample_rate_hertz=sample_rate, ), ) pyaudio_lib = pyaudio.PyAudio() f = pyaudio_lib.open(output=True, channels=1, format=pyaudio.paInt16, rate=sample_rate) stub = tts_pb2_grpc.TextToSpeechStub( grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) request = build_request() metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts") responses = stub.StreamingSynthesize(request, metadata=metadata) for key, value in responses.initial_metadata(): if key == "x-audio-num-samples": print("Estimated audio duration is {:.2f} seconds".format( int(value) / sample_rate)) break for stream_response in responses: f.write(stream_response.audio_chunk)
def repeat_all_message(message): file_info = bot.get_file(message.voice.file_id) file = requests.get('https://api.telegram.org/file/bot{0}/{1}'.format(token, file_info.file_path)) with open('voice.ogg','wb') as f: f.write(file.content) command = ['bash','a.sh'] res = ((subprocess.run(command,shell=False))) with open('tmp.txt','r') as f: res = json.loads(f.read())['result'] print(res) user_id = str(message.from_user.id) if message.from_user else '<unknown>' text = res with wave.open("synthesised123.wav", "wb") as f: f.setframerate(sample_rate) f.setnchannels(1) f.setsampwidth(2) stub = tts_pb2_grpc.TextToSpeechStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials())) if user_id not in user_stages: user_stages[user_id]=1 user_data[user_id] = [0,0] answer = stage_dict[user_stages[user_id]] _send(message, response=stage_dict[user_stages[user_id]]) else: printbar(user_data[user_id][1], 60000) photo = open('fig1.png', 'rb') bot.send_photo(message.chat.id, photo) _send(message, response='Ваш уровень: '+str(user_data[user_id][0])+'\nВаше количество денег:'+str(user_data[user_id][1])+"\n") if 'anything' in stage_shifts[user_stages[user_id]]: user_stages[user_id]= stage_shifts[user_stages[user_id]]['anything'] _send(message, response=stage_dict[user_stages[user_id]]) answer = stage_dict[user_stages[user_id]] else: if text in stage_shifts[user_stages[user_id]]: user_stages[user_id]= stage_shifts[user_stages[user_id]][text] _send(message, response=stage_dict[user_stages[user_id]]) answer = stage_dict[user_stages[user_id]] else: _send(message, response='Не понял тебя') answer = 'Не понял тебя' if user_stages[user_id] in level_money_gain: user_data[user_id][0]+=level_money_gain[user_stages[user_id]][0] user_data[user_id][1]+=level_money_gain[user_stages[user_id]][1] #func_fig(8000, user_data[user_id][1]) printbar(user_data[user_id][1], 60000) photo = open('fig1.png', 'rb') _send(message, response='Ваш уровень:' ' '+str(user_data[user_id][0])+'\nВаше количество денег:'+str(user_data[user_id][1])+"\n") bot.send_photo(message.chat.id, photo) if user_stages[user_id] in is_next_level: photo = open('pics/'+ str(user_stages[user_id])+'.jpg', 'rb') bot.send_photo(message.chat.id, photo) _send(message, response='А вот твой классный ПРИЗ!') photo = open('pics/' + str(user_stages[user_id]) + 'r.jpg', 'rb') bot.send_photo(message.chat.id, photo) with open('b.sh', 'w') as f: f.write('curl -X POST -H "Authorization: Bearer ${IAM_TOKEN}" --data-urlencode "text='+answer+'" -d "lang=ru-RU&folderId=${FOLDER_ID}" "https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize" > speech.ogg') command = ['bash','b.sh'] res = (subprocess.run(command, shell=False)) voice = open('speech.ogg', 'rb') bot.send_voice(message.chat.id, voice)