Python TextToSpeechStub 예제들, tinkoff.cloud.tts.v1.tts_pb2_grpc.TextToSpeechStub Python 예제들

예제 #1

0

파일 보기

파일: tts.py 프로젝트: ilya16/pepequestbot

    def text2audio(self, text):
        stub = tts_pb2_grpc.TextToSpeechStub(
            grpc.secure_channel(self._endpoint,
                                grpc.ssl_channel_credentials()))
        request = self._build_request(text)
        metadata = authorization_metadata(self._api_key, self._secret_key,
                                          "tinkoff.cloud.tts")
        responses = stub.StreamingSynthesize(request, metadata=metadata)

        for key, value in responses.initial_metadata():
            if key == "x-audio-num-samples":
                print("Estimated audio duration is " +
                      str(int(value) / self._sample_rate) + " seconds")
                break

        f = io.BytesIO()
        ogg_opus_writer = pyogg.OggOpusWriter(f)
        ogg_opus_writer.set_application("audio")
        ogg_opus_writer.set_sampling_frequency(self._sample_rate)
        ogg_opus_writer.set_channels(1)
        ogg_opus_writer.set_frame_size(20)  # milliseconds
        for stream_response in responses:
            ogg_opus_writer.encode(stream_response.audio_chunk)

        # close writer
        ogg_opus_writer.close()

        # get audio
        f.seek(0)
        audio = f.getvalue()

        return audio

예제 #2

0

파일 보기

def text2speach(text, tmp_dir='./tmp'):
    ts = int(time.time() * 10**6)
    wav_tmp = os.path.join(tmp_dir, '{}.wav'.format(ts))
    oga_tmp = os.path.join(tmp_dir, '{}.oga'.format(ts))

    with wave.open(wav_tmp, "wb") as f:
        f.setframerate(sample_rate)
        f.setnchannels(1)
        f.setsampwidth(2)

        stub = tts_pb2_grpc.TextToSpeechStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
        request = build_request(text)
        metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts")
        responses = stub.StreamingSynthesize(request, metadata=metadata)
        # for key, value in responses.initial_metadata():
        #     if key == "x-audio-num-samples":
        #         #print("Estimated audio duration is " + str(int(value) / sample_rate) + " seconds")
        #         break
        for stream_response in responses:
            f.writeframes(stream_response.audio_chunk)

    stream = ffmpeg.input(wav_tmp)
    stream = ffmpeg.output(stream, oga_tmp)
    ffmpeg.run(stream, overwrite_output=True)
    with open(oga_tmp, 'rb') as f:
        fp = f.read()
    os.remove(oga_tmp)
    os.remove(wav_tmp)
    return fp

예제 #3

0

파일 보기

파일: command.py 프로젝트: eagurin/conversation_flow

 def __init__(self, phrase):
     phrase = phrase.replace('&nbsp;', '. ')
     self._ssml = '<speak><p>' + phrase + '</p></speak>'
     self._text = re.sub(r'\<[^>]*\>', '', phrase)
     stub = tts_pb2_grpc.TextToSpeechStub(
         grpc.secure_channel(ENDPOINT, grpc.ssl_channel_credentials()))
     metadata = authorization_metadata(API_KEY, SECRET_KEY,
                                       "tinkoff.cloud.tts")
     request = tts_pb2.SynthesizeSpeechRequest(
         input=tts_pb2.SynthesisInput(text=self._text, ssml=self._ssml),
         audio_config=tts_pb2.AudioConfig(audio_encoding=tts_pb2.LINEAR16,
                                          speaking_rate=1,
                                          sample_rate_hertz=SAMPLE_RATE))
     self._responses = stub.StreamingSynthesize(request, metadata=metadata)

예제 #4

0

파일 보기

def main():
    args = BaseSynthesisParser().parse_args()
    if args.encoding == tts_pb2.LINEAR16 and args.rate != 48000:
        raise ValueError("LINEAR16 supports only 48kHz for now, use RAW_OPUS")

    with audio_open_write(args.output_file, args.encoding,
                          args.rate) as audio_writer:
        stub = tts_pb2_grpc.TextToSpeechStub(make_channel(args))
        request = build_synthesis_request(args, args.input_text)
        metadata = authorization_metadata(args.api_key, args.secret_key,
                                          "tinkoff.cloud.tts")
        responses = stub.StreamingSynthesize(request, metadata=metadata)
        for stream_response in responses:
            audio_writer.write(stream_response.audio_chunk)

예제 #5

0

파일 보기

def generate(text, chat_id):
    with wave.open(f"synthesised{chat_id}.wav", "wb") as f:
        f.setframerate(sample_rate)
        f.setnchannels(1)
        f.setsampwidth(2)

        stub = tts_pb2_grpc.TextToSpeechStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
        request = build_request(text)
        metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts")
        responses = stub.StreamingSynthesize(request, metadata=metadata)
        for key, value in responses.initial_metadata():
            if key == "x-audio-num-samples":
                print("Estimated audio duration is " + str(int(value) / sample_rate) + " seconds")
                break

        for stream_response in responses:
            f.writeframes(stream_response.audio_chunk)

예제 #6

0

파일 보기

파일: tts_streaming_synthesize_ssml_linear16_playback.py 프로젝트: truep/voicekit-examples

                      Библиотеки просто набиты ими.
                    </s>
                  </p>
                </speak>
            """),
        audio_config=tts_pb2.AudioConfig(
            audio_encoding=tts_pb2.LINEAR16,
            sample_rate_hertz=sample_rate,
        ),
    )


pyaudio_lib = pyaudio.PyAudio()
f = pyaudio_lib.open(output=True,
                     channels=1,
                     format=pyaudio.paInt16,
                     rate=sample_rate)

stub = tts_pb2_grpc.TextToSpeechStub(
    grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
request = build_request()
metadata = authorization_metadata(api_key, secret_key, "tinkoff.cloud.tts")
responses = stub.StreamingSynthesize(request, metadata=metadata)
for key, value in responses.initial_metadata():
    if key == "x-audio-num-samples":
        print("Estimated audio duration is {:.2f} seconds".format(
            int(value) / sample_rate))
        break
for stream_response in responses:
    f.write(stream_response.audio_chunk)

예제 #7

0

파일 보기

    def repeat_all_message(message):
        file_info = bot.get_file(message.voice.file_id)
        file = requests.get('https://api.telegram.org/file/bot{0}/{1}'.format(token, file_info.file_path))
          

        
        with open('voice.ogg','wb') as f:
            f.write(file.content)
        command = ['bash','a.sh']
        res = ((subprocess.run(command,shell=False)))
        with open('tmp.txt','r') as f:
            res = json.loads(f.read())['result']
        print(res)  

        user_id = str(message.from_user.id) if message.from_user else '<unknown>'
        text = res
        
        with wave.open("synthesised123.wav", "wb") as f:
            f.setframerate(sample_rate)
            f.setnchannels(1)
            f.setsampwidth(2)

            stub = tts_pb2_grpc.TextToSpeechStub(grpc.secure_channel(endpoint, grpc.ssl_channel_credentials()))
            
            
            if user_id not in user_stages:
                user_stages[user_id]=1
                
                user_data[user_id] = [0,0]
                
                answer = stage_dict[user_stages[user_id]]
                _send(message, response=stage_dict[user_stages[user_id]])
            else:  
                printbar(user_data[user_id][1], 60000)
                photo = open('fig1.png', 'rb')
                bot.send_photo(message.chat.id, photo)             	
                _send(message, response='Ваш уровень: '+str(user_data[user_id][0])+'\nВаше количество денег:'+str(user_data[user_id][1])+"\n")             	                                    
                if 'anything' in stage_shifts[user_stages[user_id]]:
                    user_stages[user_id]= stage_shifts[user_stages[user_id]]['anything']
                    _send(message, response=stage_dict[user_stages[user_id]])
                    answer = stage_dict[user_stages[user_id]]
                else:
                    if text in stage_shifts[user_stages[user_id]]:                
                        user_stages[user_id]= stage_shifts[user_stages[user_id]][text]    
                        _send(message, response=stage_dict[user_stages[user_id]])
                        answer = stage_dict[user_stages[user_id]]
                    else:         
                        _send(message, response='Не понял тебя')
                        answer = 'Не понял тебя'
                if user_stages[user_id] in level_money_gain:
                    user_data[user_id][0]+=level_money_gain[user_stages[user_id]][0]  
                    user_data[user_id][1]+=level_money_gain[user_stages[user_id]][1]
                    #func_fig(8000, user_data[user_id][1])
                    printbar(user_data[user_id][1], 60000)
                    photo = open('fig1.png', 'rb')
                    _send(message, response='Ваш уровень:'
                                            ' '+str(user_data[user_id][0])+'\nВаше количество денег:'+str(user_data[user_id][1])+"\n")
                    bot.send_photo(message.chat.id, photo)
                if user_stages[user_id] in is_next_level:
                    photo = open('pics/'+ str(user_stages[user_id])+'.jpg', 'rb')
                    bot.send_photo(message.chat.id, photo)
                    _send(message,
                          response='А вот твой классный ПРИЗ!')
                    photo = open('pics/' + str(user_stages[user_id]) + 'r.jpg', 'rb')
                    bot.send_photo(message.chat.id, photo)
            with open('b.sh', 'w') as f:
                f.write('curl -X POST -H "Authorization: Bearer ${IAM_TOKEN}" --data-urlencode "text='+answer+'" -d "lang=ru-RU&folderId=${FOLDER_ID}" "https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize" > speech.ogg')
            command = ['bash','b.sh']
            res = (subprocess.run(command, shell=False))
        voice = open('speech.ogg', 'rb')
        bot.send_voice(message.chat.id, voice)