Beispiel #1
0
def transcribe_audio(audio_file, extension, model):

    username = os.environ.get("BLUEMIX-STT-USERNAME")
    password = os.environ.get("BLUEMIX-STT-PASSWORD")

    speech_to_text = SpeechToText(
        username = username,
        password = password,
        x_watson_learning_opt_out = False,
    )
    with open(audio_file, "rb") as audio:
        try:
            result = speech_to_text.recognize(
                audio,
                content_type = "audio/" + extension,
                model = model
            )
        except Exception as ex:
            print(ex)
            raise
    try:
        transcripted_text = result["results"][0]["alternatives"][0]["transcript"]
    except:
        print("I'm sorry, the audio is blank! If youre sure that there was"
            "an audio, it probably was below the microphone sensibility. Try "
            "speaking louder.")
        raise
    return transcripted_text.rstrip()
Beispiel #2
0
def transcribe_audio(path_to_audio_file):
    username = os.environ.get("BLUEMIX_USERNAME")
    password = os.environ.get("BLUEMIX_PASSWORD")
    speech_to_text = SpeechToText(username=username, password=password)

    with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file:
        return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(path_to_audio_file):
    username = config.SPEECH_TO_TEXT_USERNAME
    password = config.SPEECH_TO_TEXT_PASSWORD
    speech_to_text = SpeechToText(username=username, password=password)

    with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file:
        return speech_to_text.recognize(audio_file, content_type='audio/wav')
Beispiel #4
0
def transcribe_audio(path_to_audio_file):
    username = "******"
    password = "******"
    speech_to_text = SpeechToText(username=username, password=password)

    with open(path_to_audio_file, 'rb') as audio_file:
        return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(path_to_audio_file):
    # enter your info here
    username = ""
    password = ""
    speech_to_text = SpeechToText(username=username, password=password)

    with open(path_to_audio_file, 'rb') as audio_file:
        return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(path_to_audio_file):
    username = os.environ.get("BLUEMIX_USERNAME")
    password = os.environ.get("BLUEMIX_PASSWORD")
    speech_to_text = SpeechToText(username=username,
                                  password=password)

    with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file:
        return speech_to_text.recognize(audio_file,
            content_type='audio/wav')
Beispiel #7
0
    def transcribe_audio(self, path_to_audio_file):
        #username = os.environ.get("BLUEMIX_USERNAME")
        #password = os.environ.get("BLUEMIX_PASSWORD")
        username = "******"
        password = "******"
        speech_to_text = SpeechToText(username=username, password=password)

        with open(path_to_audio_file, 'rb') as audio_file:
            return speech_to_text.recognize(audio_file,
                                            content_type='audio/wav')
Beispiel #8
0
    def get_watson_stt_object(self):
        """create Watson speech-to-text object"""
        stt = None

        if self.api_key is not None:
            stt = SpeechToTextV1(iam_apikey=self.api_key)

        elif self.username is not None and self.password is not None:
            stt = SpeechToTextV1(username=self.username,
                                 password=self.password)

        return stt
Beispiel #9
0
def initiate_watson():
    """Establishes connection with API
    """
    watson_dict = {
        'ibm_api': 'kzL_ZWnn4T0xxQ_A6bUTBdqdh7yvljTJsWO2qraw-nDa', 
        'ibm_url': 'https://stream.watsonplatform.net/speech-to-text/api', 
        'ibm_version': '2018-08-01', 
        'ibm_api_ta': 'I5yNi3pszexgabF6cCc4BIM1_QS5Fm63jAwnnDGl7j-j', 
        'ibm_url_ta': 'https://gateway.watsonplatform.net/tone-analyzer/api', 
        'ibm_version_ta': '2016-05-19', 'ibm_api_per': 'VwVF3pMl1j_OWcKbKTB_teau5OA4hSj3KSizWYTXIrRQ', 
        'ibm_url_per': 'https://gateway.watsonplatform.net/personality-insights/api', 
        'ibm_version_per': '2017-10-13'}

    speech_to_text = SpeechToTextV1(iam_apikey=watson_dict['ibm_api'])    

    tone_analyzer = ToneAnalyzerV3(
        url = watson_dict['ibm_url_ta'],
        version = watson_dict['ibm_version_ta'],
        iam_apikey = watson_dict['ibm_api_ta'])

    personality_analyzer = PersonalityInsightsV3(
        version = watson_dict['ibm_version_per'],
        url = watson_dict['ibm_url_per'],
        iam_apikey = watson_dict['ibm_api_per'])

    tone_analyzer.set_detailed_response(True)

    return speech_to_text, tone_analyzer, personality_analyzer
Beispiel #10
0
def stt(speech_file):
    sst_api_key = 'e_jevakkc6QsQcCmA41mMtKE2sJl1Ug0OEoKEi8oLIb1'
    sst_url = 'https://stream.watsonplatform.net/speech-to-text/api'

    speech_to_text = SpeechToTextV1(iam_apikey=sst_api_key, url=sst_url)

    class MyRecognizeCallback(RecognizeCallback):
        def __init__(self):
            RecognizeCallback.__init__(self)

        def on_data(self, data):
            print(json.dumps(data, indent=2))

        def on_error(self, error):
            print('Error received: {}'.format(error))

        def on_inactivity_timeout(self, error):
            print('Inactivity timeout: {}'.format(error))

    myRecognizeCallback = MyRecognizeCallback()

    with open(join(dirname(__file__), './.', speech_file), 'rb') as audio_file:
        audio_source = AudioSource(audio_file)
        speech_to_text.recognize_using_websocket(
            audio=audio_source,
            interim_results=True,
            content_type='audio/wav',
            recognize_callback=myRecognizeCallback,
            keywords=[
                'ticket', 'speeding', 'limit', 'cell phone', 'cellphone',
                'seatbelt', 'tailgating'
            ],
            keywords_threshold=0.8)
Beispiel #11
0
    def convertSpeechToText(self, audioFile):
        # Read IBM watson SpeechToText service username and password from config file
        username = self.configObject.get('ibm_speech_to_text_service_username')
        password = self.configObject.get('ibm_speech_to_text_service_password')
        # Create SpeechToText service object
        stt = SpeechToTextV1(username=username, password=password)
        audio_file = open(audioFile, "rb")
        audio_data = json.dumps(stt.recognize(audio_file,
                                              content_type="audio/wav",
                                              model='en-US_NarrowbandModel',
                                              continuous=True),
                                indent=2)
        print(audio_data)
        audio_text = ""
        try:
            audio_json_data = json.loads(audio_data)
            print(" Here is the audio text :::")
            if (len(audio_json_data["results"]) != 0):
                audio_text = audio_json_data["results"][0]["alternatives"][0][
                    "transcript"]
                return (audio_text)
            else:
                return (None)

        except Exception as e:
            print("Error was: ", e)
Beispiel #12
0
def getTextFromSpeech():

    print('at speech to text')
    tts_kwargs = {
        'username': speechToTextUser,
        'password': speechToTextPassword,
        'iam_apikey': speechToTextIAMKey,
        'url': speechToTextUrl
    }

    sttService = SpeechToTextV1(**tts_kwargs)

    response = sttService.recognize(audio=request.get_data(cache=False),
                                    content_type='audio/wav',
                                    timestamps=True,
                                    word_confidence=True,
                                    smart_formatting=True).get_result()

    # Ask user to repeat if STT can't transcribe the speech
    if len(response['results']) < 1:
        return Response(mimetype='plain/text',
                        response="Sorry, didn't get that. please try again!")

    text_output = response['results'][0]['alternatives'][0]['transcript']
    text_output = text_output.strip()
    return Response(response=text_output, mimetype='plain/text')
Beispiel #13
0
 def __init__(self):
     self.STT = SpeechToText(
         username='******',
         password='******')
     self.TTS = TextToSpeech(
         username='******',
         password='******')
Beispiel #14
0
def main(app):
    if request.method == 'POST':
        status = {}
        if 'file' not in request.files:
            status = {'valid': False, 'text': 'No file part'}
        file = request.files['file']
        if file.filename == '':
            status = {'valid': False, 'text': 'No selected file'}
        if file and allowed_file(file.filename,
                                 allowed_extensions=set(['wav', 'mp3'])):
            file.save(
                '/home/sripravan/Projects/sadhana-mega/app/static/audio/test.wav'
            )
            with io.open(
                    '/home/sripravan/Projects/sadhana-mega/app/static/audio/test.wav',
                    'rb') as audio_file:
                speech_to_text = SpeechToTextV1(
                    username='******',
                    password='******')
                speech_recognition_results = speech_to_text.recognize(
                    audio=audio_file,
                    content_type="audio/wav",
                    timestamps=True,
                    smart_formatting=True).get_result()
                status = {
                    'valid': True,
                    'transcript': speech_recognition_results
                }
    return jsonify(status)
Beispiel #15
0
 def __init__(self, uname, pword):
     self.user_name = uname
     self.url = 'https://stream.watsonplatform.net/speech-to-text/api'
     self.password = pword
     self.speech_to_text = SpeechToTextV1(username=self.user_name,
                                          password=self.password,
                                          url=self.url)
Beispiel #16
0
def call_speech2text(audioFileLocation):
    Config=configparser.ConfigParser()
    Config.read("watson.ini")
    userid=ConfigSectionMap(Config,"Speech to Text-RAI")['username']
    pwd=ConfigSectionMap(Config,"Speech to Text-RAI")['password']

    speech_to_text = SpeechToTextV1(username=userid,password=pwd,x_watson_learning_opt_out=False)
    #status = downloadFileandConvert(audioFileLocation)
    status=ffmpegconvert(audioFileLocation,'wav') # Convert to WAV file
    if status == "Not Ok":
        print("Error in File Conversion - In Watson")
        return "Error in File Conversion - In Watson"

    #audioFileLocation=join(dirname(__file__), audioFile)
    #audioFileLocation = audioFile
    speech_to_text.get_model('en-US_NarrowbandModel')
    #speech_to_text.get_custom_model('9c1d00a0-330c-11e7-94ad-3b2269260fbc')

    with open(status,'rb') as audio_file:
        returnedJSON = json.dumps(speech_to_text.recognize(audio_file, content_type='audio/wav', timestamps=True,word_confidence=True,model='en-US_NarrowbandModel',continuous=True),indent=2)
        #print(returnedJSON)
        #Deserialize
        returnedJSONStr = json.loads(returnedJSON)
        print(returnedJSONStr)
        try:
            returnMsg=returnedJSONStr['results'][0]['alternatives'][0]['transcript']
            print(returnedJSONStr['results'][0]['alternatives'][0]['transcript'])
            return returnMsg
        except:
            return "Can't Convert Speech2Text"
Beispiel #17
0
 def speech_to_text(usr, password):
     try:
         return SpeechToTextV1(username=usr,
                               password=password,
                               x_watson_learning_opt_out=False)
     except:
         return []
Beispiel #18
0
def diagraph(path):
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)

    with open(join(dirname(__file__), path),'rb') as audio_file:
        x = speech_to_text.recognize(
        audio_file, content_type='audio/wav', timestamps=True,speaker_labels=True,
        word_confidence=True)

    speakers_stamp = x['speaker_labels']
    checks = x['results']

    speakers = {0:[],
            1:[]}

    c = 0 
    for r in range(len(checks)):
        each_check = checks[r]["alternatives"]
        for i in range(len(each_check)):
            ex1 = checks[r]["alternatives"][i]["timestamps"]
            for w in ex1:
                speakers[int(speakers_stamp[c]["speaker"])].append(w[0])
                c+=1
    return speakers
Beispiel #19
0
def recognize_speech(username, password, audio_file_path,
                     forced_mime_type,
                     buffer_size=_4K,
                     audio_model=None,
                     inactivity_timeout=None,
                     extra_options=None,
                     progress_callback=None):
    stt = SpeechToTextV1(username=username, password=password)
    content_type = guess_mime_type(audio_file_path, forced_mime_type)
    kwargs = {
        'content_type': content_type,
        'continuous': True,
        'timestamps': False,
        'max_alternatives': 1
    }

    default_options = build_default_options(
        audio_model=audio_model,
        inactivity_timeout=inactivity_timeout)
    kwargs.update(default_options)
    kwargs.update(extra_options or {})

    return stt.recognize(
        chunked_upload(audio_file_path, buffer_size, progress_callback),
        **kwargs)
Beispiel #20
0
def getTextFromSpeech():
    tts_kwargs = {
        'username': speechToTextUser,
        'password': speechToTextPassword,
        'iam_apikey': speechToTextIAMKey,
        'url': speechToTextUrl
    }

    sttService = SpeechToTextV1(**tts_kwargs)

    response = sttService.recognize(audio=request.get_data(cache=False),
                                    content_type='audio/wav',
                                    model='ja-JP_BroadbandModel',
                                    timestamps=True,
                                    word_confidence=True,
                                    smart_formatting=True).get_result()

    # Ask user to repeat if STT can't transcribe the speech
    if len(response['results']) < 1:
        return Response(mimetype='plain/text',
                        response="聞こえなかったので、もう一度お願いします!")

    text_output = response['results'][0]['alternatives'][0]['transcript']
    text_output = text_output.strip()
    return Response(response=text_output, mimetype='plain/text')
Beispiel #21
0
    def save_audio():
        #ACESSA API DE RECONHECIMENTO DE AUDIO
        speechtotext = SpeechToTextV1(
            username='******',
            password='******')

        #SALVANDO AUDIO
        Audio.rec.stop()

        url_voz = MEDIA_ROOT + "/usuario" + str(
            datetime.datetime.now()) + ".wav"

        Audio.rec.save(url_voz)

        vetor_audio = open(url_voz, 'rb').read()

        try:
            recognized_audio = speechtotext.recognize(
                audio=vetor_audio,
                content_type='audio/wav',
                model='pt-BR_BroadbandModel',
                interim_results=False,
                keywords=['conta', 'cooperativa', 'valor', 'transferir'],
                keywords_threshold=0.3,
                max_alternatives=3)

            print(recognized_audio)

            VozUsuario.objects.cria_voz(vozusuario_padrao=url_voz)

            return {'url_voz': url_voz, 'recognized_audio': recognized_audio}

        except WatsonApiException as ex:
            Audio.rec.delete(url_voz)
            print("Código de erro " + str(ex.code) + ": " + ex.message)
def speech_to_text(file_name, model_id):
    """Use Watson Speech to Text to convert audio file to text."""
    # create Watson Speech to Text client
    stt = SpeechToTextV1(iam_apikey=keys.speech_to_text_key)

    # open the audio file
    with open(file_name, 'rb') as audio_file:
        # pass the file to Watson for transcription
        result = stt.recognize(audio=audio_file,
                               content_type='audio/wav',
                               model=model_id).get_result()

    # Get the 'results' list. This may contain intermediate and final
    # results, depending on method recognize's arguments. We asked
    # for only final results, so this list contains one element.
    results_list = result['results']

    # Get the final speech recognition result--the list's only element.
    speech_recognition_result = results_list[0]

    # Get the 'alternatives' list. This may contain multiple alternative
    # transcriptions, depending on method recognize's arguments. We did
    # not ask for alternatives, so this list contains one element.
    alternatives_list = speech_recognition_result['alternatives']

    # Get the only alternative transcription from alternatives_list.
    first_alternative = alternatives_list[0]

    # Get the 'transcript' key's value, which contains the audio's
    # text transcription.
    transcript = first_alternative['transcript']

    return transcript  # return the audio's text transcription
Beispiel #23
0
def data_input(request):
    if request.method == "POST":
        image_file_name = request.FILES["image"].name
        audio_file_name = request.FILES["audio"].name
        image_file_binary = request.FILES["image"].read()
        audio_file_binary = request.FILES["audio"].read()
        speed = request.POST.get("speed")

        # Image Analysis
        dbx = dropbox.Dropbox(DROPBOX_ACCESS_TOKEN)
        image_path = "/images/" + str(uuid.uuid1()) + "__" + image_file_name
        dbx.files_upload(image_file_binary, image_path, mute=True)
        client = Algorithmia.client(ALGO_ACCESS_KEY)
        algo = client.algo(ALGO_EMOTION_API)
        params = {}
        params["image"] = "dropbox://" + image_path
        params["numResults"] = 7
        image_analysis = algo.pipe(params).result

        # speech Analysis
        speech_to_text = SpeechToTextV1(username=WATSON_SPT_SERVICE_USERNAME,
                                        password=WATSON_SPT_SERVICE_PASSWORD,
                                        x_watson_learning_opt_out=False)
        models = speech_to_text.models()
        us_model = speech_to_text.get_model('en-US_BroadbandModel')
        results = speech_to_text.recognize(audio_file_binary,
                                           content_type='audio/wav',
                                           timestamps=True,
                                           word_confidence=True,
                                           speaker_labels=True)

        transcripts = get_transcripts(json.dumps(results))
        transcripts_str = ". ".join(transcripts)
        tone_analysis = call_to_watson_tone_analysis_api(transcripts_str)

        response = format(image_analysis, tone_analysis)

        response.update({"speed": float(speed), "weather": 0})

        (score, (msg, aloc)) = decision_engine.decide(response)

        # a = decision_engine.decide(response)
        #
        # if a:
        #     score = a[0]
        #     msg = a[1][0]
        #     aloc = a[1][1]

        #import ipdb; ipdb.set_trace()

        return render(request, 'results.html', {
            "score": score,
            "msg": msg,
            "score_breakup": response
        })
        #return render(request, 'results.html')
        #return HttpResponseRedirect("/emoDrive/analyze/" + upload_path)
    else:
        return render(request, 'upload.html')
Beispiel #24
0
def post_audio():
    content = request.get_json(silent=True)
    print(content)
    state = load_obj()

    objectList = ['refrigerator', 'printer', 'coffee']
    commandList = ['status', 'buy', 'purchase', 'order', 'add', 'cart']

    wordsList = []
    jsonWords = []
    finalList = []
    sentence = ""
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)

    print(json.dumps(speech_to_text.models(), indent=2))

    print(
        json.dumps(speech_to_text.get_model('en-US_BroadbandModel'), indent=2))

    path = '/Users/andreaskarinam/Developer/React/Hermes/samples/response2.wav'
    if state["firstCommand"] == True:
        path = '/Users/andreaskarinam/Developer/React/Hermes/samples/response3.wav'

    with open(join(dirname(__file__), path), 'rb') as audio_file:
        text_dict = speech_to_text.recognize(audio_file,
                                             content_type='audio/wav',
                                             timestamps=True,
                                             word_confidence=True)
        wordsList = text_dict['results'][0]['alternatives'][0][
            'word_confidence']
        for lists in wordsList:
            jsonWords.append(lists[0])

    print(jsonWords)
    for word in jsonWords:
        if (word in commandList):
            print(word)

    for word in jsonWords:
        if (word in objectList):
            print(word)

    print(state["firstCommand"])
    if state["firstCommand"] == True:
        state["firstCommand"] = False
        state["shoppingcart"].append({
            "name": "Maui Coffee",
            "quantity": 1,
            "price": "$12.99"
        })
    else:
        data = state["devices"][0]["data"]
        state["shoppingcart"] = []
    save_obj(state)
    return json.dumps({"response": 200})
Beispiel #25
0
def transcribe_audio(path_to_audio_file):
    speech_to_text = SpeechToTextV1(
        username='******',
        password='******',
        x_watson_learning_opt_out=False)
    with open(join(dirname(__file__), path_to_audio_file), "rb") as audio_file:
        return speech_to_text.recognize(audio_file,
                                        content_type="audio/wav",
                                        word_confidence=True)
Beispiel #26
0
def connect_speechtext():
    speech_to_text = SpeechToTextV1(
        username="******",
        password='******',
        x_watson_learning_opt_out=False)

    # print(json.dumps(speech_to_text.models(), indent=2))
    # print(json.dumps(speech_to_text.get_model('en-US_BroadbandModel'), indent=2))
    return speech_to_text
def transcribe_audio():
    username = "******"
    path_to_audio_file = "file.wav"
    password = "******"
    speech_to_text = SpeechToTextV1(
        username='******',
        password="******")
    with open(path_to_audio_file, "rb") as audio_file:
        return (speech_to_text.recognize(audio_file, content_type="audio/wav"))
 def __init__(self):
     self.url = "https://stream.watsonplatform.net/speech-to-text/api"
     self.username = "******"
     self.password = "******"
     self.speech_to_text = SpeechToTextV1(
         username = self.username,
         password = self.password,
         url = self.url
     )
 def __init__(self,debug_mode=False):
     self.debug_mode=debug_mode
     f = open("key.txt", "r")
     f1 = f.read().splitlines()
     f.close()
     self.speech_to_text = SpeechToTextV1(
         iam_apikey=f[14],
         url=f[15]
     )
Beispiel #30
0
def convert_speech_to_text(audio):
    speech_to_text = SpeechToTextV1(
        username=os.environ["WATSON_TRANSCRIPTION_USERNAME"],
        password=os.environ["WATSON_TRANSCRIPTION_PASSWORD"],
    )

    return speech_to_text.recognize(
        audio, content_type="audio/mp3", timestamps=False, word_confidence=False
    )
 def __init__(self):
     self.config = SpeechToTextV1(
         # iam_apikey='6YzHFvvaDU6XJfvovrrYXxNrCfhI8Ee1enkGS-Crjouw',
         iam_apikey='z2T7SNluJPGEGStI2etKxLgBpj56kR_M6mOmYbyCilH4',
         url='https://stream.watsonplatform.net/speech-to-text/api')
     self.threshold = 500
     self.chunk_size = 1024
     self.format = pyaudio.paInt16
     self.rate = 16000
import json
from os.path import join, dirname
from watson_developer_cloud import SpeechToTextV1 as SpeechToText


speech_to_text = SpeechToText(username='******',
                              password='******')

print(json.dumps(speech_to_text.models(), indent=2))

with open(join(dirname(__file__), '../resources/speech.wav'), 'rb') as audio_file:
    print(json.dumps(speech_to_text.recognize(audio_file,
                                              content_type='audio/wav'), indent=2))