Ejemplo n.º 1
0
def voice_from_text(text, path):
    filepath = "{}/speech_{}.wav".format(path, str(uuid.uuid1().hex))
    if os.path.isfile(filepath): os.remove(filepath)

    ssml_string = """
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"
    xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="en-US">
  <voice name="en-US-JennyNeural">
    <mstts:express-as style="chat">
      {}
    </mstts:express-as>
  </voice>
</speak>
    """.format(text)

    speech_config = speechsdk.SpeechConfig(
        subscription=api_keys["microsoft-speech"]["key"],
        region=api_keys["microsoft-speech"]["region"])
    # audio_config = AudioOutputConfig(filename=filepath)
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=None)
    result = synthesizer.speak_ssml_async(ssml_string).get()
    stream = AudioDataStream(result)
    stream.save_to_wav_file(filepath)
    synthesizer.speak_text_async(text)

    return filepath
Ejemplo n.º 2
0
def generaraudio():
    archivo = open("uploads/archivo.txt", "r")
    documentos = archivo.read()
    documentos = documentos.rstrip('\n')
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(documentos)
Ejemplo n.º 3
0
def voice_from_text(text, path):
    filepath = "{}/speech.wav".format(path)
    if os.path.isfile(filepath): os.remove(filepath)
    
    speech_config = speechsdk.SpeechConfig(subscription=api_keys["microsoft-speech"]["key"], region=api_keys["microsoft-speech"]["region"])
    audio_config = AudioOutputConfig(filename=filepath)
    synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
    synthesizer.speak_text_async("The Birch canoe slid on the smooth planks. Glue the sheet to the dark blue background.")

    return path
Ejemplo n.º 4
0
def result():
    message = request.form['message']
    number = request.form['number']

    speech_config = SpeechConfig(
        subscription="0a6a0817af9f46aea9054beaa3d30290", region="westeurope")
    audio_config = AudioOutputConfig(filename="message_fr.wav")
    speech_config.speech_synthesis_voice_name = "fr-FR-DeniseNeural"
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(message)

    # Add your subscription key and endpoint
    subscription_key = "e134037165514c648a57bf6ccc95e541"
    endpoint = "https://api.cognitive.microsofttranslator.com"

    # Add your location, also known as region. The default is global.
    # This is required if using a Cognitive Services resource.
    location = "francecentral"

    path = '/translate'
    constructed_url = endpoint + path

    params = {'api-version': '3.0', 'from': 'fr', 'to': ['en']}
    constructed_url = endpoint + path

    headers = {
        'Ocp-Apim-Subscription-Key': subscription_key,
        'Ocp-Apim-Subscription-Region': location,
        'Content-type': 'application/json',
        'X-ClientTraceId': str(uuid.uuid4())
    }

    # You can pass more than one object in body.
    body = [{'text': message}]

    quest = requests.post(constructed_url,
                          params=params,
                          headers=headers,
                          json=body)
    response = quest.json()

    translator = response[0]["translations"][0]["text"]

    audio_config = AudioOutputConfig(filename="message_en.wav")
    speech_config.speech_synthesis_voice_name = "en-US-AriaNeural"
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(translator)

    data = {"number": number}
    with open("limit.json", "w") as file:
        json.dump(data, file)

    return (message)
Ejemplo n.º 5
0
def welcome_message(name):
    speech_config = speechsdk.SpeechConfig(
        subscription="b58d19e457574aa39bc0f8b9b763cd55",
        region="australiaeast")
    audio_config = AudioOutputConfig(
        filename=
        "C:/Users/Pranav Patel/Documents/schabu/back_end/python/welcome.wav")
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    text = "Hello " + name + "! Welcome to Schubu Recrutiment Process. Please Click on the Start button to begin the interview process."
    synthesizer.speak_text_async(text)
    print(text)
Ejemplo n.º 6
0
def tts(item):
    speech_config = SpeechConfig(
        subscription="bc0912f626b44d5a8bb00e4497644fa4", region="westus")
    audio_config = AudioOutputConfig(filename="./result.wav")

    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    appendString = ""

    # if res == "OK":
    #     appendString = "is in direction you're looking"
    # else:
    #     appendString = "is not in direction you're looking"
    #

    result = synthesizer.speak_text_async(item + appendString).get()
    stream = AudioDataStream(result)
    stream.save_to_wav_file("./result.mp3")
Ejemplo n.º 7
0
#!/usr/bin/python3

from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
import azure.cognitiveservices.speech as speechsdk
from azure.cognitiveservices.speech.audio import AudioOutputConfig

voice = "zh-CN-XiaoxiaoNeural"
text = '你好'
speech_config = SpeechConfig(subscription="3cb77646eea84168b348969306ff2a3c",
                             region="eastus")
speech_config.speech_synthesis_voice_name = voice
audio_config = AudioOutputConfig(filename="file.wav")
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
result = synthesizer.speak_text_async(text).get()

# Check result
if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
    print("Speech synthesized to speaker for text [{}] with voice [{}]".format(
        text, voice))
elif result.reason == speechsdk.ResultReason.Canceled:
    cancellation_details = result.cancellation_details
    print("Speech synthesis canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == speechsdk.CancellationReason.Error:
        print("Error details: {}".format(cancellation_details.error_details))
Ejemplo n.º 8
0
                      for img in search_results["value"]][randrange(20)]
        print(i2download)
        response = requests.get(i2download)
        ext = i2download[-3:]
        imgfile = open(
            "results/{}/{:04d}/{:04d}.{}".format(uuid, snumb, wi, ext), "wb")
        imgfile.write(response.content)
        imgfile.close()
        print("Got {}".format(wi))
        time.sleep(2)

    audio_config = AudioOutputConfig(
        filename="results/{}/{:04d}/wav.wav".format(uuid, snumb))
    synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                    audio_config=audio_config)
    synthesizer.speak_text_async(sentence)

    os.system(
        "python collage_maker.py -o results/{0}/slide-{1:04d}.png -f results/{0}/{1:04d} -w 800 -i 600"
        .format(uuid, snumb))

    print(
        'ffmpeg -loop 1 -i results/{0}/slide-{1:04d}.png -i results/{0}/{1:04d}/wav.wav -c:v libx264 -tune stillimage -c:a aac -b:a 192k -pix_fmt yuv420p -shortest results/{0}/{1:04d}.mp4 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2"'
        .format(uuid, snumb))

    pngs = [
        x for x in os.listdir("results/{}".format(uuid)) if x[-3:] == "png"
    ]

    for png in pngs:
        im = Image.open("results/{}/{}".format(uuid, png))
Ejemplo n.º 9
0
audio_config = AudioOutputConfig(use_default_speaker=True)
synthesizer = SpeechSynthesizer(speech_config=speech_config)

runtimeCredentials = CognitiveServicesCredentials(luis_key)
clientRuntime = LUISRuntimeClient(endpoint=luis_endpoint, credentials=runtimeCredentials)

print("Start listening...")
speech = speech_recognizer.recognize_once()
try:   
    while speech.text != "Stop":
        # Production == slot name
        print("Your query is: ", speech.text)
        predictionRequest = { "query" : speech.text}

        predictionResponse = clientRuntime.prediction.get_slot_prediction(luis_app_id, "Production", predictionRequest)
        
        print("Top intent: {}".format(predictionResponse.prediction.top_intent))
        print("Sentiment: {}".format (predictionResponse.prediction.sentiment))
        print("Intents: ")

        for intent in predictionResponse.prediction.intents:
            print("\t{}".format (json.dumps(intent)))
        print("Entities: {}".format (predictionResponse.prediction.entities))
        synthesizer.speak_text_async("A simple test to write to a file.")
        # Use a one-time, synchronous call to transcribe the speech
        print("Start listening...")
        speech = speech_recognizer.recognize_once()
except Exception as ex:
    print(ex)
        
        
Ejemplo n.º 10
0
import azure.cognitiveservices.speech as speechsdk
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

speech_key, service_region = "UseYourSpeechAPI", "eastus"

speech_config = speechsdk.SpeechConfig(subscription=speech_key,
                                       region=service_region)

voice = "Microsoft Server Speech Text to Speech Voice (en-US, GuyNeural)"  #en-US-GuyRUS
speech_config.speech_synthesis_voice_name = voice
speech_config.set_speech_synthesis_output_format(
    SpeechSynthesisOutputFormat["Riff24Khz16BitMonoPcm"])

audio_config = AudioOutputConfig(filename="c:/OutputVoiceFile.mp3")

synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
synthesizer.speak_text_async(
    "Hello World, This is a test of creating a playable mp3 file")
Ejemplo n.º 11
0
                "--audio_name",
                required=True,
                help="Please Enter Audio File Name(Without extension)")
args = vars(ap.parse_args())

# In[2]:

speech_config = SpeechConfig(subscription="__KEY", region="Region")

# In[3]:

audio_config = AudioOutputConfig(filename="public/python/output_audio_files/" +
                                 args['audio_name'] + "_summary.wav")

# In[4]:

with open("summary/" + args['audio_name'] + "_summary.txt", 'r') as file:
    data = file.read().replace('\n', '')

# In[5]:

data

# In[11]:

synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
synthesizer.speak_text_async(data)

# In[ ]:
Ejemplo n.º 12
0
from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig

speech_config = SpeechConfig(subscription="546da9a0b95d4b29a806c1c7d8d147bc", region="southcentralus")
audio_config = AudioOutputConfig(filename="file.wav")

archivo = open("archivo.txt", "r") 
documents = archivo.read()
documents = documents.rstrip('\n')


synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
synthesizer.speak_text_async(documents)
speech_key, service_region = os.getenv('SPEECH_RESOURCE_KEY'), "westus"

# A speech Synthesizer is created with the given settings.
speech_config = SpeechConfig(subscription=speech_key, region=service_region)

print("Enter your choice :")
print("1. Output from speaker")
print("2. Save output to a file\n")
choice = int(input())

# Output is recieved via the device speaker

if (choice == 1):
    audio_config = AudioOutputConfig(use_default_speaker=True)

# Output is saved in the files whose name is provided as an input

elif (choice == 2):
    audio_config = AudioOutputConfig(
        filename=("tts_output/" +
                  input("Enter the name of the output file : ") + ".wav"))

# A speech Synthesizer is initialized with given settings

synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)

# An asynchronous call to the api is made with the input waiting for the output

synthesizer.speak_text_async(input("Enter a string : "))
Ejemplo n.º 14
0
    get_handw_text_results = computervision_client.get_read_result(
        operation_id)
    if get_handw_text_results.status not in ['notStarted', 'running']:
        break
    time.sleep(1)

#Crear el archivo de texto donde se guardara el texto obtenido de la imagen
archivo = open("archivo.txt", "w")

# Imprimimos el texto linea por linea
if get_handw_text_results.status == OperationStatusCodes.succeeded:
    for text_result in get_handw_text_results.analyze_result.read_results:
        for line in text_result.lines:
            print(line.text)
            archivo.write(line.text)
            #print(line.bounding_box)
print()

archivo.close()

###### Código para convertir el texto obtenido y archivado en la imagen en audio ######

#Creacion del audio del texto obtenido
audio_config = AudioOutputConfig(use_default_speaker=True)

# #Escritura del archivo de audio
synthesizer = SpeechSynthesizer(speech_config=speech_config,
                                audio_config=audio_config)
archivo_string = open("archivo.txt", "r", encoding="utf-8-sig").read()
synthesizer.speak_text_async(archivo_string).get()
# 
speech_recognizer = SpeechRecognizer(speech_config=speech_config)

print("Say something...")

result = speech_recognizer.recognize_once()



# Checks result.
if result.reason == ResultReason.RecognizedSpeech:
    print("Recognized: {}".format(result.text))
elif result.reason == ResultReason.NoMatch:
    print("No speech could be recognized: {}".format(result.no_match_details))
elif result.reason == ResultReason.Canceled:
    cancellation_details = cancellation_details
    print("Speech Recognition canceled: {}".format(cancellation_details.reason))
    if cancellation_details.reason == CancellationReason.Error:
        print("Error details: {}".format(cancellation_details.error_details))
        
#output


audio_config = AudioOutputConfig(use_default_speaker=True)
language_config = SourceLanguageConfig("ko-KR")
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
synthesizer.speak_text_async("result.text")