async def voice(txt: str): #list_voices() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=txt) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Wavenet-F", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = tts_client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. async def generate(): with io.BytesIO(response.audio_content) as buf: data = buf.read(1024) while data: yield data data = buf.read(1024) return StreamingResponse(generate(), media_type="audio/wav")
def download_tmp_audio_file(self, card_type, foreign_text): """Returns a temporary filename with the audio.""" language_id = self.config().card_type_property(\ "sublanguage_id", card_type) if not language_id: language_id = self.config().card_type_property(\ "language_id", card_type) if " ج " in foreign_text: singular, plural = foreign_text.split(" ج ") foreign_text = \ f"""<speak>{singular}<break time="0.3s"/>{plural}</speak>""" if "<br>" in foreign_text: foreign_text = "<speak>" + \ foreign_text.replace("<br>", """<break time="0.3s"/>""") +\ "</speak>" client = texttospeech.TextToSpeechClient() synthesis_input = texttospeech.SynthesisInput(ssml=foreign_text) voice = texttospeech.VoiceSelectionParams( language_code=language_id, ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(request={ "input": synthesis_input, "voice": voice, "audio_config": audio_config }) filename = expand_path("__GTTS__TMP__.mp3", self.database().media_dir()) with open(filename, 'wb') as mp3_file: mp3_file.write(response.audio_content) return filename
def play(): for file in os.listdir("static"): if file.endswith(".mp3"): os.remove("static/" + file) #get pasta text = random.choice(pastas) synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) print(response) name = "static/temp" + str(random.randint(0, 10000000)) + ".mp3" with open(name, "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"') return render_template("playpage.html", words=text, track_ogg=name, track_mp3=name)
def text_to_speech_pcm(text, language='en-us', gender=texttospeech.SsmlVoiceGender.NEUTRAL) -> bytes: # Create a text-to-speech client with maximum receive size of 24MB. This limit can be adjusted if necessary. It needs to be specified because the default of 4MB is not # enough for some definitions. channel = TextToSpeechGrpcTransport.create_channel(options=[('grpc.max_receive_message_length', 24 * 1024 * 1024)]) transport = TextToSpeechGrpcTransport(channel=channel) client = texttospeech.TextToSpeechClient(transport=transport) language_components = language.split('-') language_code = '-'.join(language_components[:2]) name = None if len(language_components) == 4: name = language # Build the voice request voice = texttospeech.VoiceSelectionParams( language_code=language_code, ssml_gender=gender, name=name ) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16, sample_rate_hertz=48000 ) # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) # Request text-to-speech data response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) return response.audio_content
def speak(self, text, rate=0.8, language='en-US', filename=None, voice_name='en-US-Wavenet-D'): """ Create the audio file from text. """ if not filename: filename = self.filename_from_text(text, rate, language) filepath = path_in_medialib(filename) filepath_wav = f'{filepath}.wav' filepath_mp3 = f'{filepath}.mp3' if os.path.exists(filepath_wav): return filepath_wav synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams(language_code=language, name=voice_name) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=rate) # voice parameters and audio file type response = self.client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open(filepath_mp3, "wb") as out: out.write(response.audio_content) self.mp3_to_wav(filepath_mp3, filepath_wav) return filepath_wav
def synthesize_ssml_file(ssml_file): """Synthesizes speech from the input file of ssml. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ """ from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() with open(ssml_file, "r") as f: ssml = f.read() input_text = texttospeech.SynthesisInput(ssml=ssml) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code="ko-KR", name="ko-KR-Standard-C", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("output.mp3", "wb") as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def __init__(self, text, filename): self.text = text self.filename = filename self.client = texttospeech.TextToSpeechClient() self.synthesis_input = texttospeech.SynthesisInput(text=self.text) # Set the text input to be synthesized # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") self.voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned self.audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type self.response = self.client.synthesize_speech( input=self.synthesis_input, voice=self.voice, audio_config=self.audio_config) # The response's audio_content is binary. with open(f"./static/audio/{filename}.mp3", "wb") as out: # Write the response to the output file. out.write(self.response.audio_content)
def playAudioFromText(Text="",): # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized if '<speak>' in Text: synthesis_input = texttospeech.SynthesisInput(ssml=Text) else: synthesis_input = texttospeech.SynthesisInput(text=Text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code=Settings.getSetting()['language-tts'], ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL,name=Settings.getSetting()['name-tts']) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3,speaking_rate=Settings.getSetting()['speaking_rate-tts']) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input,voice=voice,audio_config=audio_config) # The response's audio_content is binary. mem_file = io.BytesIO(response.audio_content) mem_file.seek(0) mixer.init() mixer.music.load(mem_file) mixer.music.play(0) while mixer.music.get_busy(): pass mem_file.close()
def translate(): """ Route to synthesize speech using Google Text-to-Speech API. """ # Get requested text messages = json.loads(request.args['messages']) # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=messages['text']) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code=messages['language'], ssml_gender=messages['gender']) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig(audio_encoding=2) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open('./static/output.mp3', 'wb') as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"') return send_file('./static/output.mp3', attachment_filename='output.mp3')
async def tts(ctx, language, *, input): # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=input) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code=language, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL, ) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = ttsClient.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("output.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) await ctx.send(file=discord.File("output.mp3"))
def synthesize_ssml(ssml, langcode, voicename, ssmlgender, output_filename): """Synthesizes speech from the input string of ssml. Note: ssml must be well-formed according to: https://www.w3.org/TR/speech-synthesis/ Example: <speak>Hello there.</speak> """ client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(ssml=ssml) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code=langcode, name=voicename, ssml_gender=ssmlgender, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open(output_filename, "wb") as out: out.write(response.audio_content) print('Audio content written to file', output_filename, '.')
def ssml_to_audio(ssml_text, outfile): # Google Cloud Credentials: auth setup https://cloud.google.com/docs/authentication/getting-started os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = '/Users/christinatuttle/Downloads/text-to-speech-service-account.json' # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(ssml=ssml_text) # Build the voice request, select the language code ("en-US") and the ssml voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL ) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) # Perform the text-to-speech request on the text input with the selected voice parameters and audio file type response = client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) # The response's audio_content is binary. with open(outfile, "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "' + outfile + '"')
def tts_control(synthesis_input): client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized #synthesis_input = texttospeech.SynthesisInput(text=texttospeak) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="ko-KR", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("output.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Playing sound...') os.system('omxplayer ./output.mp3') sys.exit()
def getAudio(inputStr): # Instantiates a client tts = texttospeech.TextToSpeechAsyncClient.from_service_account_json( GoogleApiFile) # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=inputStr) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams({ "language_code": "en-US", "ssml_gender": texttospeech.SsmlVoiceGender.MALE, "name": "en-US-Wavenet-I" }) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( {"audio_encoding": texttospeech.AudioEncoding.MP3}) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = tts.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open(SoundFile, "wb") as out: # Write the response to the output file. out.write(response.audio_content)
def text_to_speech(text_content, output_file_path): """ 文字列を渡すと音声ファイルを作成して保存する。 """ # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text_content) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="ja-JP", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL, name="ja-JP-Wavenet-B") # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open(output_file_path, "wb") as out: # Write the response to the output file. out.write(response.audio_content)
def synthesize_text_file(text_file): """Synthesizes speech from the input file of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() with open(text_file, "r") as f: text = f.read() # convert plain text to SSML # Wait 1 second on each line break ssml = "<speak>{}</speak>".format( text.replace("\n\n", '.<break time="1s"/>\n\n')) input_text = texttospeech.SynthesisInput(ssml=ssml) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( name='en-US-Wavenet-D', language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.MALE) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(request={ "input": input_text, "voice": voice, "audio_config": audio_config }) # The response's audio_content is binary. output_file = text_file + '.mp3' with open(output_file, "wb") as out: out.write(response.audio_content) print('Audio content written to file "{}"'.format(output_file))
def synthesize_text_file(text_file): """Synthesizes speech from the input file of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() with open(text_file, "r") as f: text = f.read() input_text = texttospeech.SynthesisInput(text=text) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code="ko-KR", name="ko-KR-Standard-C", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(request={ "input": input_text, "voice": voice, "audio_config": audio_config }) # The response's audio_content is binary. with open("output.mp3", "wb") as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def synthesize_text(text): """Synthesizes speech from the input string of text.""" client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="uk-UA", name="uk-UA-Standard-A", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(request={ "input": input_text, "voice": voice, "audio_config": audio_config }) with open("output.mp3", "wb") as out: out.write(response.audio_content) playsound("output.mp3")
def speech_result_return(create_text): client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized #text_send_to_server2 = text_send_to_server() text_send_to_server2 = create_text #text_send_to_server2 = input("nyomjad: ") synthesis_input = texttospeech.SynthesisInput(text=text_send_to_server2) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="en-GB", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("text.mp3", "wb") as out: file = out.write(response.audio_content) print('Audio content written to file "text.mp3"') play_mp3_content()
def synthesize_text_with_audio_profile(text, output, effects_profile_id): """Synthesizes speech from the input string of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(text=text) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams(language_code="en-US") # Note: you can pass in multiple effects_profile_id. They will be applied # in the same order they are provided. audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, effects_profile_id=[effects_profile_id], ) response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open(output, "wb") as out: out.write(response.audio_content) print('Audio content written to file "%s"' % output)
def write_speech(text, file_name, config): print('WRITE SPEECH') print([text, file_name, config]) # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) input_text = texttospeech.SynthesisInput(text=text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code=config.get('language', 'en-US'), ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(request={ "input": input_text, "voice": voice, "audio_config": audio_config }) print('GOT GOO SPEECH REQ') return response.audio_content
def convert_to_audio(name, text, save_to): # initialize the API client client = texttospeech.TextToSpeechClient() # we can send up to 5000 characters per request, so split up the text step = 5000 for j, i in enumerate(range(0, len(text), step)): synthesis_input = texttospeech.SynthesisInput(text=text[i:i + step]) voice = texttospeech.VoiceSelectionParams(language_code='en-US', name='en-US-Wavenet-B') audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) logging.info(f'Synthesizing speech for {name}_{j}') response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) with open(f'{name}_{j}.mp3', 'wb') as out: # Write the response to the output file. out.write(response.audio_content) logging.info(f'Audio content written to file "{name}_{j}.mp3"') mp3_segments = sorted(glob(f'{name}_*.mp3')) segments = [AudioSegment.from_mp3(f) for f in mp3_segments] logging.info(f'Stitching together {len(segments)} mp3 files for {name}') audio = functools.reduce(lambda a, b: a + b, segments) logging.info(f'Exporting {name}.mp3') audio.export(f'{save_to}/{name}.mp3', format='mp3') logging.info('Removing intermediate files') for f in mp3_segments: os.remove(f)
def synthesize_text(text): """Synthesizes speech from the input string of text.""" from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(text=text) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Wavenet-D", ssml_gender=texttospeech.SsmlVoiceGender.MALE, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(request={ "input": input_text, "voice": voice, "audio_config": audio_config }) # The response's audio_content is binary. with open("output" + datetime.now().strftime("%H%M%S") + ".mp3", "wb") as out: out.write(response.audio_content) print('Audio content written to file "output.mp3"')
def text_to_speech_converter(text, lang, voice_name=None, speaking_rate=1): # convert text to audio client = texttospeech.TextToSpeechAsyncClient() synthesis_input = texttospeech.SynthesisInput(text=text) # build the voice request, set lang to 'en-US' and ssml voice genter to 'neutral' if not voice_name: voice = texttospeech.VoiceSelectionParams( language_code=lang, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) else: voice = texttospeech.VoiceSelectionParams(language_code=lang, name=voice_name) # select the type of audio file to return as the output audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=speaking_rate) # preform the text-to-speech request on the text input with the selected voice params and output type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) return response.audio_content
def main(): client = texttospeech.TextToSpeechClient() available_voices = list_voices(client=client) args = parse_arguments() if args.list_voices: print(f'Available voices\n{available_voices}\n') return use_voice = args.voice if use_voice not in available_voices: exit('Invalid voice! Use -list-voices to see all available options.') use_language = '-'.join(use_voice.split('-')[0:2]) logger.info(f'Using voice `{use_voice}` in language `{use_language}`') voice = texttospeech.VoiceSelectionParams(language_code=use_language, name=use_voice) audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3) if args.input: input_path = Path(args.input) else: input_path = Path('/data/input') process_input_files( input_path=input_path, output_directory_path=Path('data/output'), client=client, voice=voice, audio_config=audio_config ) logger.info('Done!') logger.info('Failures have been saved next to their respective output files.')
def text_to_speech(word): ''' :param word: :return: the binary of the sound in selected encoding ''' # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=word) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="zh-CN", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE ) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16 ) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) # The response's audio_content is binary. return response
def write_transcription_audioFile(text, filename): filename = my_audio_out_directory + filename client = texttospeech.TextToSpeechClient.from_service_account_file( my_credentials_file_path) synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code="en-GB", name="en-GB-Wavenet-B", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.LINEAR16, speaking_rate=0.89, pitch=2.4, sample_rate_hertz=8000) response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) wav_filename = filename + ".wav" with open(wav_filename, "wb") as out: out.write(response.audio_content) sln_filename = filename + ".sln" with open(sln_filename, "wb") as out: audio = bytearray(response.audio_content) audio_without_wav_header = audio[44:] out.write(audio_without_wav_header)
def synthesize_text(text, file_name): client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(text=text) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Standard-C", ssml_gender=texttospeech.SsmlVoiceGender.FEMALE, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(request={ "input": input_text, "voice": voice, "audio_config": audio_config }) # The response's audio_content is binary. with open(f"{file_name}_audiobook.mp3", "wb") as out: out.write(response.audio_content) print(f'Audio content written to file "{file_name}_audiobook.mp3"')
def run(text): # Instantiates a client client = texttospeech.TextToSpeechClient() # Set the text input to be synthesized synthesis_input = texttospeech.SynthesisInput(text=text) # Build the voice request, select the language code ("en-US") and the ssml # voice gender ("neutral") voice = texttospeech.VoiceSelectionParams( language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL) # Select the type of audio file you want returned audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) # Perform the text-to-speech request on the text input with the selected # voice parameters and audio file type response = client.synthesize_speech(input=synthesis_input, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open("output.mp3", "wb") as out: # Write the response to the output file. out.write(response.audio_content) print('Audio content written to file "output.mp3"') return 'Audio content written to file "output.mp3"'
def TTS_ssml(ssml_sentence, output): from google.cloud import texttospeech client = texttospeech.TextToSpeechClient() input_text = texttospeech.SynthesisInput(ssml=ssml_sentence) # Note: the voice can also be specified by name. # Names of voices can be retrieved with client.list_voices(). voice = texttospeech.VoiceSelectionParams( language_code="ko-KR", name="ko-KR-Wavenet-D", ssml_gender=texttospeech.SsmlVoiceGender.MALE, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3) response = client.synthesize_speech(input=input_text, voice=voice, audio_config=audio_config) # The response's audio_content is binary. with open(output, "wb") as out: out.write(response.audio_content) print('Audio content written to file {}'.format(output)) return os.path.abspath(output)