Ejemplo n.º 1
0
def voice_from_text(text, path):
    filepath = "{}/speech_{}.wav".format(path, str(uuid.uuid1().hex))
    if os.path.isfile(filepath): os.remove(filepath)

    ssml_string = """
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
    xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">
  <voice name="en-US-JennyNeural">
    <mstts:express-as style="chat">
      {}
    </mstts:express-as>
  </voice>
</speak>
    """.format(text)

    speech_config = speechsdk.SpeechConfig(
        subscription=api_keys["microsoft-speech"]["key"],
        region=api_keys["microsoft-speech"]["region"])
    # audio_config = AudioOutputConfig(filename=filepath)
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=None)
    result = synthesizer.speak_ssml_async(ssml_string).get()
    stream = AudioDataStream(result)
    stream.save_to_wav_file(filepath)
    synthesizer.speak_text_async(text)

    return filepath
Ejemplo n.º 2
0
def show_action_from_speech(intent, entities):
    import matplotlib.pyplot as plt
    from azure.cognitiveservices.speech import SpeechConfig, SpeechSynthesizer, AudioConfig
    from PIL import Image
    from dotenv import load_dotenv
    import json
    import os

    action = 'unknown'
    device = 'none'
    if intent in ['switch_on', 'switch_off']:
        # Check for entities
        if len(entities) > 0:
            # Check for a device entity
            # Get the first entity (if any)
            if entities[0]["type"] == 'device':
                device = entities[0]["entity"]
                action = intent + '_' + device
        load_dotenv()
        cog_key = os.getenv('SPEECH_KEY')
        cog_location = os.getenv('SPEECH_REGION')
        response_text = "OK, I'll {} the {}!".format(intent,
                                                     device).replace("_", " ")
        speech_config = SpeechConfig(cog_key, cog_location)
        speech_synthesizer = SpeechSynthesizer(speech_config)
        result = speech_synthesizer.speak_text(response_text)

    img_name = action + '.jpg'
    img = Image.open(os.path.join("data", "luis", img_name))
    plt.axis('off')
    plt.imshow(img)
Ejemplo n.º 3
0
def generaraudio():
    archivo = open("uploads/archivo.txt", "r")
    documentos = archivo.read()
    documentos = documentos.rstrip('\n')
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(documentos)
Ejemplo n.º 4
0
    def audio_tts(self, text):

        self.speech_config.set_speech_synthesis_output_format(SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"])
        synthesizer = SpeechSynthesizer(speech_config=self.speech_config, audio_config=None)
        ssml_string = open("ssml.xml", "r").read()
        result = synthesizer.speak_ssml_async(ssml_string).get()
        stream = AudioDataStream(result)
        stream.save_to_wav_file("/root/alfonso/ext/")
Ejemplo n.º 5
0
def tts(language, text):
    speech_config = SpeechConfig(subscription=tts_key, region=region)
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=None)
    ssml_string = open("spellout/common/language.xml", "r").read()
    ssml_string = ssml_string.format(lang=language, text=text)
    result = synthesizer.speak_ssml_async(ssml_string).get()
    result = result.audio_data
    return result
Ejemplo n.º 6
0
def voice_from_text(text, path):
    filepath = "{}/speech.wav".format(path)
    if os.path.isfile(filepath): os.remove(filepath)
    
    speech_config = speechsdk.SpeechConfig(subscription=api_keys["microsoft-speech"]["key"], region=api_keys["microsoft-speech"]["region"])
    audio_config = AudioOutputConfig(filename=filepath)
    synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
    synthesizer.speak_text_async("The Birch canoe slid on the smooth planks. Glue the sheet to the dark blue background.")

    return path
Ejemplo n.º 7
0
def welcome_message(name):
    speech_config = speechsdk.SpeechConfig(
        subscription="b58d19e457574aa39bc0f8b9b763cd55",
        region="australiaeast")
    audio_config = AudioOutputConfig(
        filename=
        "C:/Users/Pranav Patel/Documents/schabu/back_end/python/welcome.wav")
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    text = "Hello " + name + "! Welcome to Schubu Recrutiment Process. Please Click on the Start button to begin the interview process."
    synthesizer.speak_text_async(text)
    print(text)
Ejemplo n.º 8
0
 def azure_text_to_speech(self, text):
     try:
         synthesizer = SpeechSynthesizer(speech_config=self.aservice,
                                         audio_config=None)
         ssml = TSUBAKI_SSML.format(text=text)
         result = synthesizer.speak_ssml_async(ssml).get()
         data = result.audio_data
         if not data:
             logger.error(str(result.cancellation_details))
         return data
     except Exception as e:
         logger.exception("Azure Text to Speech Failiure:")
Ejemplo n.º 9
0
    def _do_tts(self, use_speaker: bool, ssml_config: str, output_file: str):
        print("Start: ", output_file)
        speech_config = SpeechConfig(subscription=self._subscription,
                                     region=self._region)
        audio_config = AudioOutputConfig(use_default_speaker=use_speaker)
        synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                        audio_config=audio_config)

        result = synthesizer.speak_ssml_async(ssml_config).get()

        stream = AudioDataStream(result)
        stream.save_to_wav_file(output_file)
        print("Finished", output_file)
Ejemplo n.º 10
0
def generate_voice():
    ############# AZURE #######################
    # set volume/rate/pitch -> volume default = 50
    rate = "-12%"
    pitch = "3%"
    vol_ = 10
    # AZURE 키 필요
    speech_config = SpeechConfig(subscription="APIKEY", region="eastus")
    speech_config.set_speech_synthesis_output_format(
        SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"])
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=None)
    num_ = 0
    # text file
    with open('./tts_storage/text/tts_script.txt',
              encoding='utf-8') as file_in:
        text = ""
        for line in file_in:
            text += line
        print("## TTS script:", text)

    root = ElementTree.fromstring(
        '<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="ko-KR"><voice name="ko-KR-SunHiNeural"><prosody  volume="{}" rate="{}" pitch="{}">{}</prosody></voice></speak>'
        .format(vol_, rate, pitch, text))
    xml_script = ElementTree.ElementTree()
    ElementTree.dump(root)
    xml_script._setroot(root)
    xml_script.write('ssml.xml')
Ejemplo n.º 11
0
def result():
    message = request.form['message']
    number = request.form['number']

    speech_config = SpeechConfig(
        subscription="0a6a0817af9f46aea9054beaa3d30290", region="westeurope")
    audio_config = AudioOutputConfig(filename="message_fr.wav")
    speech_config.speech_synthesis_voice_name = "fr-FR-DeniseNeural"
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(message)

    # Add your subscription key and endpoint
    subscription_key = "e134037165514c648a57bf6ccc95e541"
    endpoint = "https://api.cognitive.microsofttranslator.com"

    # Add your location, also known as region. The default is global.
    # This is required if using a Cognitive Services resource.
    location = "francecentral"

    path = '/translate'
    constructed_url = endpoint + path

    params = {'api-version': '3.0', 'from': 'fr', 'to': ['en']}
    constructed_url = endpoint + path

    headers = {
        'Ocp-Apim-Subscription-Key': subscription_key,
        'Ocp-Apim-Subscription-Region': location,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    # You can pass more than one object in body.
    body = [{'text': message}]

    quest = requests.post(constructed_url,
                          params=params,
                          headers=headers,
                          json=body)
    response = quest.json()

    translator = response[0]["translations"][0]["text"]

    audio_config = AudioOutputConfig(filename="message_en.wav")
    speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(translator)

    data = {"number": number}
    with open("limit.json", "w") as file:
        json.dump(data, file)

    return (message)
Ejemplo n.º 12
0
def tts(item):
    speech_config = SpeechConfig(
        subscription="bc0912f626b44d5a8bb00e4497644fa4", region="westus")
    audio_config = AudioOutputConfig(filename="./result.wav")

    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    appendString = ""

    # if res == "OK":
    #     appendString = "is in direction you're looking"
    # else:
    #     appendString = "is not in direction you're looking"
    #

    result = synthesizer.speak_text_async(item + appendString).get()
    stream = AudioDataStream(result)
    stream.save_to_wav_file("./result.mp3")
Ejemplo n.º 13
0
async def setup_azure(filename):
    """
    Returns an Azure Speech Synthesizer pointing to the given filename
    """
    auto_detect_source_language_config = None
    speech_config = SpeechConfig(subscription=setup['azure']['key'],
                                 region=setup['azure']['region'])
    if setup['azure']['voice'] == '' or setup['azure']['voice'] == 'default':
        auto_detect_source_language_config = AutoDetectSourceLanguageConfig(
            None, None)
    else:
        speech_config.speech_synthesis_voice_name = setup['azure']['voice']
    if filename == None:
        audio_config = AudioOutputConfig(use_default_speaker=True)
    else:
        audio_config = AudioOutputConfig(filename=filename)
    synthesizer = SpeechSynthesizer(
        speech_config=speech_config,
        audio_config=audio_config,
        auto_detect_source_language_config=auto_detect_source_language_config)
    return synthesizer
speech_key, service_region = os.getenv('SPEECH_RESOURCE_KEY'), "westus"

# A speech Synthesizer is created with the given settings.
speech_config = SpeechConfig(subscription=speech_key, region=service_region)

print("Enter your choice :")
print("1. Output from speaker")
print("2. Save output to a file\n")
choice = int(input())

# Output is recieved via the device speaker

if (choice == 1):
    audio_config = AudioOutputConfig(use_default_speaker=True)

# Output is saved in the files whose name is provided as an input

elif (choice == 2):
    audio_config = AudioOutputConfig(
        filename=("tts_output/" +
                  input("Enter the name of the output file : ") + ".wav"))

# A speech Synthesizer is initialized with given settings

synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)

# An asynchronous call to the api is made with the input waiting for the output

synthesizer.speak_text_async(input("Enter a string : "))
# 
speech_recognizer = SpeechRecognizer(speech_config=speech_config)

print("Say something...")

result = speech_recognizer.recognize_once()



# Checks result.
if result.reason == ResultReason.RecognizedSpeech:
    print("Recognized: {}".format(result.text))
elif result.reason == ResultReason.NoMatch:
    print("No speech could be recognized: {}".format(result.no_match_details))
elif result.reason == ResultReason.Canceled:
    cancellation_details = cancellation_details
    print("Speech Recognition canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == CancellationReason.Error:
        print("Error details: {}".format(cancellation_details.error_details))
        
#output


audio_config = AudioOutputConfig(use_default_speaker=True)
language_config = SourceLanguageConfig("ko-KR")
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
synthesizer.speak_text_async("result.text")



Ejemplo n.º 16
0
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

addr = 'http://127.0.0.1:5000'
test_url = addr + '/predict_api'
content_type = 'image/jpeg'
headers = {'content-type': content_type}
print("read img")
img = cv2.imread('images/t21.jpg')
_, img_encoded = cv2.imencode('.jpg', img)
print("send img")
response = requests.post(test_url,
                         data=img_encoded.tostring(),
                         headers=headers)
print("recv img")
pred = json.loads(response.text)
query = pred["pred"]
stopwords = ['startseq', 'endseq']
querywords = query.split()
resultwords = [word for word in querywords if word.lower() not in stopwords]
result = ' '.join(resultwords)
print(result)
res = '<speak version="1.0" xmlns="https://www.w3.org/2001/10/synthesis" xml:lang="en-US"><voice name="en-US-Guy24kRUS">' + result + '</voice></speak>'
subscription_key = '639cbe821c074e68ba19be3d46a9cbda'
speech_config = SpeechConfig(subscription=subscription_key,
                             region="centralindia")
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
synthesizer.speak_ssml_async(res)
Ejemplo n.º 17
0
    get_handw_text_results = computervision_client.get_read_result(
        operation_id)
    if get_handw_text_results.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

#Crear el archivo de texto donde se guardara el texto obtenido de la imagen
archivo = open("archivo.txt", "w")

# Imprimimos el texto linea por linea
if get_handw_text_results.status == OperationStatusCodes.succeeded:
    for text_result in get_handw_text_results.analyze_result.read_results:
        for line in text_result.lines:
            print(line.text)
            archivo.write(line.text)
            #print(line.bounding_box)
print()

archivo.close()

###### Código para convertir el texto obtenido y archivado en la imagen en audio ######

#Creacion del audio del texto obtenido
audio_config = AudioOutputConfig(use_default_speaker=True)

# #Escritura del archivo de audio
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
archivo_string = open("archivo.txt", "r", encoding="utf-8-sig").read()
synthesizer.speak_text_async(archivo_string).get()
Ejemplo n.º 18
0
        search_results = response.json()
        i2download = [img["contentUrl"]
                      for img in search_results["value"]][randrange(20)]
        print(i2download)
        response = requests.get(i2download)
        ext = i2download[-3:]
        imgfile = open(
            "results/{}/{:04d}/{:04d}.{}".format(uuid, snumb, wi, ext), "wb")
        imgfile.write(response.content)
        imgfile.close()
        print("Got {}".format(wi))
        time.sleep(2)

    audio_config = AudioOutputConfig(
        filename="results/{}/{:04d}/wav.wav".format(uuid, snumb))
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(sentence)

    os.system(
        "python collage_maker.py -o results/{0}/slide-{1:04d}.png -f results/{0}/{1:04d} -w 800 -i 600"
        .format(uuid, snumb))

    print(
        'ffmpeg -loop 1 -i results/{0}/slide-{1:04d}.png -i results/{0}/{1:04d}/wav.wav -c:v libx264 -tune stillimage -c:a aac -b:a 192k -pix_fmt yuv420p -shortest results/{0}/{1:04d}.mp4 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2"'
        .format(uuid, snumb))

    pngs = [
        x for x in os.listdir("results/{}".format(uuid)) if x[-3:] == "png"
    ]

    for png in pngs:
Ejemplo n.º 19
0
                "--audio_name",
                required=True,
                help="Please Enter Audio File Name(Without extension)")
args = vars(ap.parse_args())

# In[2]:

speech_config = SpeechConfig(subscription="__KEY", region="Region")

# In[3]:

audio_config = AudioOutputConfig(filename="public/python/output_audio_files/" +
                                 args['audio_name'] + "_summary.wav")

# In[4]:

with open("summary/" + args['audio_name'] + "_summary.txt", 'r') as file:
    data = file.read().replace('\n', '')

# In[5]:

data

# In[11]:

synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
synthesizer.speak_text_async(data)

# In[ ]:
Ejemplo n.º 20
0
#!/usr/bin/python3

from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
import azure.cognitiveservices.speech as speechsdk
from azure.cognitiveservices.speech.audio import AudioOutputConfig

voice = "zh-CN-XiaoxiaoNeural"
text = '你好'
speech_config = SpeechConfig(subscription="3cb77646eea84168b348969306ff2a3c",
                             region="eastus")
speech_config.speech_synthesis_voice_name = voice
audio_config = AudioOutputConfig(filename="file.wav")
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
result = synthesizer.speak_text_async(text).get()

# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized to speaker for text [{}] with voice [{}]".format(
        text, voice))
elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        print("Error details: {}".format(cancellation_details.error_details))
Ejemplo n.º 21
0
import azure.cognitiveservices.speech as speechsdk
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

speech_key, service_region = "UseYourSpeechAPI", "eastus"

speech_config = speechsdk.SpeechConfig(subscription=speech_key,
                                       region=service_region)

voice = "Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)"  #en-US-GuyRUS
speech_config.speech_synthesis_voice_name = voice
speech_config.set_speech_synthesis_output_format(
    SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"])

audio_config = AudioOutputConfig(filename="c:/OutputVoiceFile.mp3")

synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
synthesizer.speak_text_async(
    "Hello World, This is a test of creating a playable mp3 file")
Ejemplo n.º 22
0
robot = Reachy(
    right_arm=parts.RightArm(io='ws', hand='force_gripper'),
    left_arm=parts.LeftArm(io='ws', hand='force_gripper'),
)

engine = pyttsx3.init()

robot.left_arm.shoulder_roll.goal_position = 0
robot.left_arm.arm_yaw.goal_position = 0
robot.left_arm.elbow_pitch.goal_position = 0
robot.left_arm.hand.forearm_yaw.goal_position = 0

speech_config = SpeechConfig(subscription="subscriptionkey",
                             region="westeurope")
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)

ASSISTANT_API_ENDPOINT = 'embeddedassistant.googleapis.com'
END_OF_UTTERANCE = embedded_assistant_pb2.AssistResponse.END_OF_UTTERANCE
DIALOG_FOLLOW_ON = embedded_assistant_pb2.DialogStateOut.DIALOG_FOLLOW_ON
CLOSE_MICROPHONE = embedded_assistant_pb2.DialogStateOut.CLOSE_MICROPHONE
PLAYING = embedded_assistant_pb2.ScreenOutConfig.PLAYING
DEFAULT_GRPC_DEADLINE = 60 * 3 + 5

global spokenAnswer
global followUp
global followUpSentence

spokenAnswer = ""
followUpSentence = ""
followUp = False
Ejemplo n.º 23
0
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient
from msrest.authentication import CognitiveServicesCredentials
import json

luis_app_id = '20263b4d-b405-4c9b-8de8-e51663797c41' 
luis_key = 'b45490c8a83243f9a6320ec7e8e85a43'
luis_endpoint = 'https://koinonos-language-understanding.cognitiveservices.azure.com/'

# Configure speech recognizer
speech_key, service_region = "40a03ef9d3d44916bdcd1c4457b82c13", "eastus" 
speech_config = SpeechConfig(subscription=speech_key, region=service_region)
speech_recognizer = SpeechRecognizer(speech_config=speech_config)

# Configure speech synthesizer
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config)

runtimeCredentials = CognitiveServicesCredentials(luis_key)
clientRuntime = LUISRuntimeClient(endpoint=luis_endpoint, credentials=runtimeCredentials)

print("Start listening...")
speech = speech_recognizer.recognize_once()
try:   
    while speech.text != "Stop":
        # Production == slot name
        print("Your query is: ", speech.text)
        predictionRequest = { "query" : speech.text}

        predictionResponse = clientRuntime.prediction.get_slot_prediction(luis_app_id, "Production", predictionRequest)
        
        print("Top intent: {}".format(predictionResponse.prediction.top_intent))