def generate_voice():
    ############# AZURE #######################
    # set volume/rate/pitch -> volume default = 50
    rate = "-12%"
    pitch = "3%"
    vol_ = 10
    # AZURE 키 필요
    speech_config = SpeechConfig(subscription="APIKEY", region="eastus")
    speech_config.set_speech_synthesis_output_format(
        SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"])
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=None)
    num_ = 0
    # text file
    with open('./tts_storage/text/tts_script.txt',
              encoding='utf-8') as file_in:
        text = ""
        for line in file_in:
            text += line
        print("## TTS script:", text)

    root = ElementTree.fromstring(
        '<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="ko-KR"><voice name="ko-KR-SunHiNeural"><prosody  volume="{}" rate="{}" pitch="{}">{}</prosody></voice></speak>'
        .format(vol_, rate, pitch, text))
    xml_script = ElementTree.ElementTree()
    ElementTree.dump(root)
    xml_script._setroot(root)
    xml_script.write('ssml.xml')
Beispiel #2
0
def result():
    message = request.form['message']
    number = request.form['number']

    speech_config = SpeechConfig(
        subscription="0a6a0817af9f46aea9054beaa3d30290", region="westeurope")
    audio_config = AudioOutputConfig(filename="message_fr.wav")
    speech_config.speech_synthesis_voice_name = "fr-FR-DeniseNeural"
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(message)

    # Add your subscription key and endpoint
    subscription_key = "e134037165514c648a57bf6ccc95e541"
    endpoint = "https://api.cognitive.microsofttranslator.com"

    # Add your location, also known as region. The default is global.
    # This is required if using a Cognitive Services resource.
    location = "francecentral"

    path = '/translate'
    constructed_url = endpoint + path

    params = {'api-version': '3.0', 'from': 'fr', 'to': ['en']}
    constructed_url = endpoint + path

    headers = {
        'Ocp-Apim-Subscription-Key': subscription_key,
        'Ocp-Apim-Subscription-Region': location,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    # You can pass more than one object in body.
    body = [{'text': message}]

    quest = requests.post(constructed_url,
                          params=params,
                          headers=headers,
                          json=body)
    response = quest.json()

    translator = response[0]["translations"][0]["text"]

    audio_config = AudioOutputConfig(filename="message_en.wav")
    speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(translator)

    data = {"number": number}
    with open("limit.json", "w") as file:
        json.dump(data, file)

    return (message)
Beispiel #3
0
def show_action_from_speech(intent, entities):
    import matplotlib.pyplot as plt
    from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, AudioConfig
    from PIL import Image
    from dotenv import load_dotenv
    import json
    import os

    action = 'unknown'
    device = 'none'
    if intent in ['switch_on', 'switch_off']:
        # Check for entities
        if len(entities) > 0:
            # Check for a device entity
            # Get the first entity (if any)
            if entities[0]["type"] == 'device':
                device = entities[0]["entity"]
                action = intent + '_' + device
        load_dotenv()
        cog_key = os.getenv('SPEECH_KEY')
        cog_location = os.getenv('SPEECH_REGION')
        response_text = "OK, I'll {} the {}!".format(intent,
                                                     device).replace("_", " ")
        speech_config = SpeechConfig(cog_key, cog_location)
        speech_synthesizer = SpeechSynthesizer(speech_config)
        result = speech_synthesizer.speak_text(response_text)

    img_name = action + '.jpg'
    img = Image.open(os.path.join("data", "luis", img_name))
    plt.axis('off')
    plt.imshow(img)
Beispiel #4
0
 def try_setup_apis(self):
     api_key = self.settings.get_azure_key()
     if api_key:
         try:
             self.aservice = SpeechConfig(subscription=api_key,
                                          region="eastus")
         except:
             logger.warning(
                 'Azure speech setup failed.  Check your API key.')
Beispiel #5
0
def tts(language, text):
    speech_config = SpeechConfig(subscription=tts_key, region=region)
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=None)
    ssml_string = open("spellout/common/language.xml", "r").read()
    ssml_string = ssml_string.format(lang=language, text=text)
    result = synthesizer.speak_ssml_async(ssml_string).get()
    result = result.audio_data
    return result
Beispiel #6
0
async def setup_azure(filename):
    """
    Returns an Azure Speech Synthesizer pointing to the given filename
    """
    auto_detect_source_language_config = None
    speech_config = SpeechConfig(subscription=setup['azure']['key'],
                                 region=setup['azure']['region'])
    if setup['azure']['voice'] == '' or setup['azure']['voice'] == 'default':
        auto_detect_source_language_config = AutoDetectSourceLanguageConfig(
            None, None)
    else:
        speech_config.speech_synthesis_voice_name = setup['azure']['voice']
    if filename == None:
        audio_config = AudioOutputConfig(use_default_speaker=True)
    else:
        audio_config = AudioOutputConfig(filename=filename)
    synthesizer = SpeechSynthesizer(
        speech_config=speech_config,
        audio_config=audio_config,
        auto_detect_source_language_config=auto_detect_source_language_config)
    return synthesizer
Beispiel #7
0
    def _do_tts(self, use_speaker: bool, ssml_config: str, output_file: str):
        print("Start: ", output_file)
        speech_config = SpeechConfig(subscription=self._subscription,
                                     region=self._region)
        audio_config = AudioOutputConfig(use_default_speaker=use_speaker)
        synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                        audio_config=audio_config)

        result = synthesizer.speak_ssml_async(ssml_config).get()

        stream = AudioDataStream(result)
        stream.save_to_wav_file(output_file)
        print("Finished", output_file)
Beispiel #8
0
def tts(item):
    speech_config = SpeechConfig(
        subscription="bc0912f626b44d5a8bb00e4497644fa4", region="westus")
    audio_config = AudioOutputConfig(filename="./result.wav")

    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    appendString = ""

    # if res == "OK":
    #     appendString = "is in direction you're looking"
    # else:
    #     appendString = "is not in direction you're looking"
    #

    result = synthesizer.speak_text_async(item + appendString).get()
    stream = AudioDataStream(result)
    stream.save_to_wav_file("./result.mp3")
Beispiel #9
0
from azure.cognitiveservices.speech.audio import AudioOutputConfig
key = "45af936cd5f54c8790ba15d2950766bc"
endpoint = "https://analisistextohack.cognitiveservices.azure.com/"

from azure.ai.textanalytics import TextAnalyticsClient
from azure.core.credentials import AzureKeyCredential

import pydot
import os

documents=[' ']

listapalabras=[' ']


speech_config = SpeechConfig(subscription="546da9a0b95d4b29a806c1c7d8d147bc", region="southcentralus")
audio_config = AudioOutputConfig(filename="file.wav")

app = Flask(__name__,template_folder="templates")
app.config['MAX_CONTENT_LENGTH'] = 2 * 1024 * 1024
app.config['UPLOAD_EXTENSIONS'] = ['.txt']
app.config['UPLOAD_PATH'] = 'uploads'


def leerfichero():
    global documents
    archivo = open("uploads/archivo.txt", "r") 
    documents[0] = archivo.read() 
    documents[0] = documents[0].rstrip('\n')

def authenticate_client():
from reachy import parts, Reachy

robot = Reachy(
    right_arm=parts.RightArm(io='ws', hand='force_gripper'),
    left_arm=parts.LeftArm(io='ws', hand='force_gripper'),
)

engine = pyttsx3.init()

robot.left_arm.shoulder_roll.goal_position = 0
robot.left_arm.arm_yaw.goal_position = 0
robot.left_arm.elbow_pitch.goal_position = 0
robot.left_arm.hand.forearm_yaw.goal_position = 0

speech_config = SpeechConfig(subscription="subscriptionkey",
                             region="westeurope")
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)

ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com'
END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE
DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON
CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE
PLAYING = embedded_assistant_pb2.ScreenOutConfig.PLAYING
DEFAULT_GRPC_DEADLINE = 60 * 3 + 5

global spokenAnswer
global followUp
global followUpSentence
Beispiel #11
0
#!/usr/bin/python3

from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
import azure.cognitiveservices.speech as speechsdk
from azure.cognitiveservices.speech.audio import AudioOutputConfig

voice = "zh-CN-XiaoxiaoNeural"
text = '你好'
speech_config = SpeechConfig(subscription="3cb77646eea84168b348969306ff2a3c",
                             region="eastus")
speech_config.speech_synthesis_voice_name = voice
audio_config = AudioOutputConfig(filename="file.wav")
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
result = synthesizer.speak_text_async(text).get()

# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized to speaker for text [{}] with voice [{}]".format(
        text, voice))
elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        print("Error details: {}".format(cancellation_details.error_details))
Beispiel #12
0
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

addr = 'http://127.0.0.1:5000'
test_url = addr + '/predict_api'
content_type = 'image/jpeg'
headers = {'content-type': content_type}
print("read img")
img = cv2.imread('images/t21.jpg')
_, img_encoded = cv2.imencode('.jpg', img)
print("send img")
response = requests.post(test_url,
                         data=img_encoded.tostring(),
                         headers=headers)
print("recv img")
pred = json.loads(response.text)
query = pred["pred"]
stopwords = ['startseq', 'endseq']
querywords = query.split()
resultwords = [word for word in querywords if word.lower() not in stopwords]
result = ' '.join(resultwords)
print(result)
res = '<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="en-US-Guy24kRUS">' + result + '</voice></speak>'
subscription_key = '639cbe821c074e68ba19be3d46a9cbda'
speech_config = SpeechConfig(subscription=subscription_key,
                             region="centralindia")
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
synthesizer.speak_ssml_async(res)
Beispiel #13
0
from random import randrange
from nltk.corpus import wordnet as wn
from azure.cognitiveservices.speech.audio import AudioOutputConfig
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat

uuid = sys.argv[1]
with open("bing.txt") as f:
    bing_subs = f.read()

with open("speech.txt") as f:
    speech_subs = f.read()

search_url = "https://api.bing.microsoft.com/v7.0/images/search"

headers = {"Ocp-Apim-Subscription-Key": bing_subs}
speech_config = SpeechConfig(subscription=speech_subs, region="southcentralus")

if (os.path.isfile("results/{}/summary.txt".format(uuid))):
    f2work = "results/{}/summary.txt".format(uuid)
else:
    f2work = "results/{}/text.txt".format(uuid)

with open(f2work) as f:
    text = f.read()

sentences = text.split(".")

sentences = [x for x in sentences if len(x) > 5]

for snumb, sentence in enumerate(sentences):
    if (snumb > 3):
Beispiel #14
0
from azure.cognitiveservices.speech import AudioDataStream
from azure.cognitiveservices.speech import SpeechConfig
from azure.cognitiveservices.speech import SpeechSynthesizer
from azure.cognitiveservices.speech import SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import os
import time
time_now = time.strftime("%Y.%m.%d.%H%M%S", time.localtime())
KEY = 'dece00af114f42a8b6c7324dca4d4125'
REGION = "southeastasia"
speech_config = SpeechConfig(subscription=KEY, region=REGION)
mp3_format = 'Audio16Khz32KBitRateMonoMp3'
speech_config.set_speech_synthesis_output_format(
    SpeechSynthesisOutputFormat[mp3_format])
audio_config = AudioOutputConfig(filename=r'Cache\ConvertCache.mp3')
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)

data1 = data2 = data3 = ""
data1 = open("Cache\Top.txt", "r").read()
data2 = open("InputText.txt", "r").read()
data3 = open("Cache\Bottom.txt", "r").read()
data1 += "\n"
data1 += data2
data1 += "\n"
data1 += data3
open("Cache\InputText.xml", "w").write(data1)

ssml_string = open("Cache\InputText.xml", "r").read()
synthesizer.speak_ssml_async(ssml_string)
filename1 = r'Cache\ConvertCache.mp3'
Beispiel #15
0
from azure.cognitiveservices.speech import AudioDataStream, SpeechRecognizer, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig
from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient
from msrest.authentication import CognitiveServicesCredentials
import json

luis_app_id = '20263b4d-b405-4c9b-8de8-e51663797c41' 
luis_key = 'b45490c8a83243f9a6320ec7e8e85a43'
luis_endpoint = 'https://koinonos-language-understanding.cognitiveservices.azure.com/'

# Configure speech recognizer
speech_key, service_region = "40a03ef9d3d44916bdcd1c4457b82c13", "eastus" 
speech_config = SpeechConfig(subscription=speech_key, region=service_region)
speech_recognizer = SpeechRecognizer(speech_config=speech_config)

# Configure speech synthesizer
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config)

runtimeCredentials = CognitiveServicesCredentials(luis_key)
clientRuntime = LUISRuntimeClient(endpoint=luis_endpoint, credentials=runtimeCredentials)

print("Start listening...")
speech = speech_recognizer.recognize_once()
try:   
    while speech.text != "Stop":
        # Production == slot name
        print("Your query is: ", speech.text)
        predictionRequest = { "query" : speech.text}
Beispiel #16
0
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

# In[ ]:

ap = argparse.ArgumentParser()
ap.add_argument("-a",
                "--audio_name",
                required=True,
                help="Please Enter Audio File Name(Without extension)")
args = vars(ap.parse_args())

# In[2]:

speech_config = SpeechConfig(subscription="__KEY", region="Region")

# In[3]:

audio_config = AudioOutputConfig(filename="public/python/output_audio_files/" +
                                 args['audio_name'] + "_summary.wav")

# In[4]:

with open("summary/" + args['audio_name'] + "_summary.txt", 'r') as file:
    data = file.read().replace('\n', '')

# In[5]:

data
Beispiel #17
0
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

from array import array
import os
from PIL import Image
import sys
import time

#Suscripcion de Computer vision
subscription_key = ""
endpoint = ""

#Suscripcion de Speech
speech_config = SpeechConfig(subscription="", region="")

#Autenticacion del cliente
computervision_client = ComputerVisionClient(
    endpoint, CognitiveServicesCredentials(subscription_key))

#Consumir el servicio para analizar la imagen

#Imagen 1
#remote_image_url = "https://image.slidesharecdn.com/greenfinance-150427100253-conversion-gate01/95/green-finance-8-638.jpg?cb=1430129290"

#Imagen 2
#remote_image_url = "https://image.slidesharecdn.com/subculturerepresentation-170607074544/95/subculture-representation-30-638.jpg?cb=1496821576"

#Imagen 3
remote_image_url = "https://image1.slideserve.com/2042659/fault-tolerance-l.jpg"