def generate_voice(): ############# AZURE ####################### # set volume/rate/pitch -> volume default = 50 rate = "-12%" pitch = "3%" vol_ = 10 # AZURE 키 필요 speech_config = SpeechConfig(subscription="APIKEY", region="eastus") speech_config.set_speech_synthesis_output_format( SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"]) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None) num_ = 0 # text file with open('./tts_storage/text/tts_script.txt', encoding='utf-8') as file_in: text = "" for line in file_in: text += line print("## TTS script:", text) root = ElementTree.fromstring( '<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="ko-KR"><voice name="ko-KR-SunHiNeural"><prosody volume="{}" rate="{}" pitch="{}">{}</prosody></voice></speak>' .format(vol_, rate, pitch, text)) xml_script = ElementTree.ElementTree() ElementTree.dump(root) xml_script._setroot(root) xml_script.write('ssml.xml')
def result(): message = request.form['message'] number = request.form['number'] speech_config = SpeechConfig( subscription="0a6a0817af9f46aea9054beaa3d30290", region="westeurope") audio_config = AudioOutputConfig(filename="message_fr.wav") speech_config.speech_synthesis_voice_name = "fr-FR-DeniseNeural" synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async(message) # Add your subscription key and endpoint subscription_key = "e134037165514c648a57bf6ccc95e541" endpoint = "https://api.cognitive.microsofttranslator.com" # Add your location, also known as region. The default is global. # This is required if using a Cognitive Services resource. location = "francecentral" path = '/translate' constructed_url = endpoint + path params = {'api-version': '3.0', 'from': 'fr', 'to': ['en']} constructed_url = endpoint + path headers = { 'Ocp-Apim-Subscription-Key': subscription_key, 'Ocp-Apim-Subscription-Region': location, 'Content-type': 'application/json', 'X-ClientTraceId': str(uuid.uuid4()) } # You can pass more than one object in body. body = [{'text': message}] quest = requests.post(constructed_url, params=params, headers=headers, json=body) response = quest.json() translator = response[0]["translations"][0]["text"] audio_config = AudioOutputConfig(filename="message_en.wav") speech_config.speech_synthesis_voice_name = "en-US-AriaNeural" synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_text_async(translator) data = {"number": number} with open("limit.json", "w") as file: json.dump(data, file) return (message)
def show_action_from_speech(intent, entities): import matplotlib.pyplot as plt from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, AudioConfig from PIL import Image from dotenv import load_dotenv import json import os action = 'unknown' device = 'none' if intent in ['switch_on', 'switch_off']: # Check for entities if len(entities) > 0: # Check for a device entity # Get the first entity (if any) if entities[0]["type"] == 'device': device = entities[0]["entity"] action = intent + '_' + device load_dotenv() cog_key = os.getenv('SPEECH_KEY') cog_location = os.getenv('SPEECH_REGION') response_text = "OK, I'll {} the {}!".format(intent, device).replace("_", " ") speech_config = SpeechConfig(cog_key, cog_location) speech_synthesizer = SpeechSynthesizer(speech_config) result = speech_synthesizer.speak_text(response_text) img_name = action + '.jpg' img = Image.open(os.path.join("data", "luis", img_name)) plt.axis('off') plt.imshow(img)
def try_setup_apis(self): api_key = self.settings.get_azure_key() if api_key: try: self.aservice = SpeechConfig(subscription=api_key, region="eastus") except: logger.warning( 'Azure speech setup failed. Check your API key.')
def tts(language, text): speech_config = SpeechConfig(subscription=tts_key, region=region) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=None) ssml_string = open("spellout/common/language.xml", "r").read() ssml_string = ssml_string.format(lang=language, text=text) result = synthesizer.speak_ssml_async(ssml_string).get() result = result.audio_data return result
async def setup_azure(filename): """ Returns an Azure Speech Synthesizer pointing to the given filename """ auto_detect_source_language_config = None speech_config = SpeechConfig(subscription=setup['azure']['key'], region=setup['azure']['region']) if setup['azure']['voice'] == '' or setup['azure']['voice'] == 'default': auto_detect_source_language_config = AutoDetectSourceLanguageConfig( None, None) else: speech_config.speech_synthesis_voice_name = setup['azure']['voice'] if filename == None: audio_config = AudioOutputConfig(use_default_speaker=True) else: audio_config = AudioOutputConfig(filename=filename) synthesizer = SpeechSynthesizer( speech_config=speech_config, audio_config=audio_config, auto_detect_source_language_config=auto_detect_source_language_config) return synthesizer
def _do_tts(self, use_speaker: bool, ssml_config: str, output_file: str): print("Start: ", output_file) speech_config = SpeechConfig(subscription=self._subscription, region=self._region) audio_config = AudioOutputConfig(use_default_speaker=use_speaker) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) result = synthesizer.speak_ssml_async(ssml_config).get() stream = AudioDataStream(result) stream.save_to_wav_file(output_file) print("Finished", output_file)
def tts(item): speech_config = SpeechConfig( subscription="bc0912f626b44d5a8bb00e4497644fa4", region="westus") audio_config = AudioOutputConfig(filename="./result.wav") synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) appendString = "" # if res == "OK": # appendString = "is in direction you're looking" # else: # appendString = "is not in direction you're looking" # result = synthesizer.speak_text_async(item + appendString).get() stream = AudioDataStream(result) stream.save_to_wav_file("./result.mp3")
from azure.cognitiveservices.speech.audio import AudioOutputConfig key = "45af936cd5f54c8790ba15d2950766bc" endpoint = "https://analisistextohack.cognitiveservices.azure.com/" from azure.ai.textanalytics import TextAnalyticsClient from azure.core.credentials import AzureKeyCredential import pydot import os documents=[' '] listapalabras=[' '] speech_config = SpeechConfig(subscription="546da9a0b95d4b29a806c1c7d8d147bc", region="southcentralus") audio_config = AudioOutputConfig(filename="file.wav") app = Flask(__name__,template_folder="templates") app.config['MAX_CONTENT_LENGTH'] = 2 * 1024 * 1024 app.config['UPLOAD_EXTENSIONS'] = ['.txt'] app.config['UPLOAD_PATH'] = 'uploads' def leerfichero(): global documents archivo = open("uploads/archivo.txt", "r") documents[0] = archivo.read() documents[0] = documents[0].rstrip('\n') def authenticate_client():
from reachy import parts, Reachy robot = Reachy( right_arm=parts.RightArm(io='ws', hand='force_gripper'), left_arm=parts.LeftArm(io='ws', hand='force_gripper'), ) engine = pyttsx3.init() robot.left_arm.shoulder_roll.goal_position = 0 robot.left_arm.arm_yaw.goal_position = 0 robot.left_arm.elbow_pitch.goal_position = 0 robot.left_arm.hand.forearm_yaw.goal_position = 0 speech_config = SpeechConfig(subscription="subscriptionkey", region="westeurope") audio_config = AudioOutputConfig(use_default_speaker=True) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com' END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE PLAYING = embedded_assistant_pb2.ScreenOutConfig.PLAYING DEFAULT_GRPC_DEADLINE = 60 * 3 + 5 global spokenAnswer global followUp global followUpSentence
#!/usr/bin/python3 from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat import azure.cognitiveservices.speech as speechsdk from azure.cognitiveservices.speech.audio import AudioOutputConfig voice = "zh-CN-XiaoxiaoNeural" text = '你好' speech_config = SpeechConfig(subscription="3cb77646eea84168b348969306ff2a3c", region="eastus") speech_config.speech_synthesis_voice_name = voice audio_config = AudioOutputConfig(filename="file.wav") synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) result = synthesizer.speak_text_async(text).get() # Check result if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: print("Speech synthesized to speaker for text [{}] with voice [{}]".format( text, voice)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech synthesis canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat from azure.cognitiveservices.speech.audio import AudioOutputConfig addr = 'http://127.0.0.1:5000' test_url = addr + '/predict_api' content_type = 'image/jpeg' headers = {'content-type': content_type} print("read img") img = cv2.imread('images/t21.jpg') _, img_encoded = cv2.imencode('.jpg', img) print("send img") response = requests.post(test_url, data=img_encoded.tostring(), headers=headers) print("recv img") pred = json.loads(response.text) query = pred["pred"] stopwords = ['startseq', 'endseq'] querywords = query.split() resultwords = [word for word in querywords if word.lower() not in stopwords] result = ' '.join(resultwords) print(result) res = '<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="en-US-Guy24kRUS">' + result + '</voice></speak>' subscription_key = '639cbe821c074e68ba19be3d46a9cbda' speech_config = SpeechConfig(subscription=subscription_key, region="centralindia") audio_config = AudioOutputConfig(use_default_speaker=True) synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) synthesizer.speak_ssml_async(res)
from random import randrange from nltk.corpus import wordnet as wn from azure.cognitiveservices.speech.audio import AudioOutputConfig from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat uuid = sys.argv[1] with open("bing.txt") as f: bing_subs = f.read() with open("speech.txt") as f: speech_subs = f.read() search_url = "https://api.bing.microsoft.com/v7.0/images/search" headers = {"Ocp-Apim-Subscription-Key": bing_subs} speech_config = SpeechConfig(subscription=speech_subs, region="southcentralus") if (os.path.isfile("results/{}/summary.txt".format(uuid))): f2work = "results/{}/summary.txt".format(uuid) else: f2work = "results/{}/text.txt".format(uuid) with open(f2work) as f: text = f.read() sentences = text.split(".") sentences = [x for x in sentences if len(x) > 5] for snumb, sentence in enumerate(sentences): if (snumb > 3):
from azure.cognitiveservices.speech import AudioDataStream from azure.cognitiveservices.speech import SpeechConfig from azure.cognitiveservices.speech import SpeechSynthesizer from azure.cognitiveservices.speech import SpeechSynthesisOutputFormat from azure.cognitiveservices.speech.audio import AudioOutputConfig import os import time time_now = time.strftime("%Y.%m.%d.%H%M%S", time.localtime()) KEY = 'dece00af114f42a8b6c7324dca4d4125' REGION = "southeastasia" speech_config = SpeechConfig(subscription=KEY, region=REGION) mp3_format = 'Audio16Khz32KBitRateMonoMp3' speech_config.set_speech_synthesis_output_format( SpeechSynthesisOutputFormat[mp3_format]) audio_config = AudioOutputConfig(filename=r'Cache\ConvertCache.mp3') synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) data1 = data2 = data3 = "" data1 = open("Cache\Top.txt", "r").read() data2 = open("InputText.txt", "r").read() data3 = open("Cache\Bottom.txt", "r").read() data1 += "\n" data1 += data2 data1 += "\n" data1 += data3 open("Cache\InputText.xml", "w").write(data1) ssml_string = open("Cache\InputText.xml", "r").read() synthesizer.speak_ssml_async(ssml_string) filename1 = r'Cache\ConvertCache.mp3'
from azure.cognitiveservices.speech import AudioDataStream, SpeechRecognizer, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat from azure.cognitiveservices.speech.audio import AudioOutputConfig from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient from msrest.authentication import CognitiveServicesCredentials import json luis_app_id = '20263b4d-b405-4c9b-8de8-e51663797c41' luis_key = 'b45490c8a83243f9a6320ec7e8e85a43' luis_endpoint = 'https://koinonos-language-understanding.cognitiveservices.azure.com/' # Configure speech recognizer speech_key, service_region = "40a03ef9d3d44916bdcd1c4457b82c13", "eastus" speech_config = SpeechConfig(subscription=speech_key, region=service_region) speech_recognizer = SpeechRecognizer(speech_config=speech_config) # Configure speech synthesizer audio_config = AudioOutputConfig(use_default_speaker=True) synthesizer = SpeechSynthesizer(speech_config=speech_config) runtimeCredentials = CognitiveServicesCredentials(luis_key) clientRuntime = LUISRuntimeClient(endpoint=luis_endpoint, credentials=runtimeCredentials) print("Start listening...") speech = speech_recognizer.recognize_once() try: while speech.text != "Stop": # Production == slot name print("Your query is: ", speech.text) predictionRequest = { "query" : speech.text}
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat from azure.cognitiveservices.speech.audio import AudioOutputConfig # In[ ]: ap = argparse.ArgumentParser() ap.add_argument("-a", "--audio_name", required=True, help="Please Enter Audio File Name(Without extension)") args = vars(ap.parse_args()) # In[2]: speech_config = SpeechConfig(subscription="__KEY", region="Region") # In[3]: audio_config = AudioOutputConfig(filename="public/python/output_audio_files/" + args['audio_name'] + "_summary.wav") # In[4]: with open("summary/" + args['audio_name'] + "_summary.txt", 'r') as file: data = file.read().replace('\n', '') # In[5]: data
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat from azure.cognitiveservices.speech.audio import AudioOutputConfig from array import array import os from PIL import Image import sys import time #Suscripcion de Computer vision subscription_key = "" endpoint = "" #Suscripcion de Speech speech_config = SpeechConfig(subscription="", region="") #Autenticacion del cliente computervision_client = ComputerVisionClient( endpoint, CognitiveServicesCredentials(subscription_key)) #Consumir el servicio para analizar la imagen #Imagen 1 #remote_image_url = "https://image.slidesharecdn.com/greenfinance-150427100253-conversion-gate01/95/green-finance-8-638.jpg?cb=1430129290" #Imagen 2 #remote_image_url = "https://image.slidesharecdn.com/subculturerepresentation-170607074544/95/subculture-representation-30-638.jpg?cb=1496821576" #Imagen 3 remote_image_url = "https://image1.slideserve.com/2042659/fault-tolerance-l.jpg"