def transcribe_audio(audio_file, extension, model): username = os.environ.get("BLUEMIX-STT-USERNAME") password = os.environ.get("BLUEMIX-STT-PASSWORD") speech_to_text = SpeechToText( username = username, password = password, x_watson_learning_opt_out = False, ) with open(audio_file, "rb") as audio: try: result = speech_to_text.recognize( audio, content_type = "audio/" + extension, model = model ) except Exception as ex: print(ex) raise try: transcripted_text = result["results"][0]["alternatives"][0]["transcript"] except: print("I'm sorry, the audio is blank! If youre sure that there was" "an audio, it probably was below the microphone sensibility. Try " "speaking louder.") raise return transcripted_text.rstrip()
def transcribe_audio(path_to_audio_file): username = os.environ.get("BLUEMIX_USERNAME") password = os.environ.get("BLUEMIX_PASSWORD") speech_to_text = SpeechToText(username=username, password=password) with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file: return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(path_to_audio_file): username = config.SPEECH_TO_TEXT_USERNAME password = config.SPEECH_TO_TEXT_PASSWORD speech_to_text = SpeechToText(username=username, password=password) with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file: return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(path_to_audio_file): username = "******" password = "******" speech_to_text = SpeechToText(username=username, password=password) with open(path_to_audio_file, 'rb') as audio_file: return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(path_to_audio_file): # enter your info here username = "" password = "" speech_to_text = SpeechToText(username=username, password=password) with open(path_to_audio_file, 'rb') as audio_file: return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(path_to_audio_file): username = os.environ.get("BLUEMIX_USERNAME") password = os.environ.get("BLUEMIX_PASSWORD") speech_to_text = SpeechToText(username=username, password=password) with open(join(dirname(__file__), path_to_audio_file), 'rb') as audio_file: return speech_to_text.recognize(audio_file, content_type='audio/wav')
def transcribe_audio(self, path_to_audio_file): #username = os.environ.get("BLUEMIX_USERNAME") #password = os.environ.get("BLUEMIX_PASSWORD") username = "******" password = "******" speech_to_text = SpeechToText(username=username, password=password) with open(path_to_audio_file, 'rb') as audio_file: return speech_to_text.recognize(audio_file, content_type='audio/wav')
def get_watson_stt_object(self): """create Watson speech-to-text object""" stt = None if self.api_key is not None: stt = SpeechToTextV1(iam_apikey=self.api_key) elif self.username is not None and self.password is not None: stt = SpeechToTextV1(username=self.username, password=self.password) return stt
def initiate_watson(): """Establishes connection with API """ watson_dict = { 'ibm_api': 'kzL_ZWnn4T0xxQ_A6bUTBdqdh7yvljTJsWO2qraw-nDa', 'ibm_url': 'https://stream.watsonplatform.net/speech-to-text/api', 'ibm_version': '2018-08-01', 'ibm_api_ta': 'I5yNi3pszexgabF6cCc4BIM1_QS5Fm63jAwnnDGl7j-j', 'ibm_url_ta': 'https://gateway.watsonplatform.net/tone-analyzer/api', 'ibm_version_ta': '2016-05-19', 'ibm_api_per': 'VwVF3pMl1j_OWcKbKTB_teau5OA4hSj3KSizWYTXIrRQ', 'ibm_url_per': 'https://gateway.watsonplatform.net/personality-insights/api', 'ibm_version_per': '2017-10-13'} speech_to_text = SpeechToTextV1(iam_apikey=watson_dict['ibm_api']) tone_analyzer = ToneAnalyzerV3( url = watson_dict['ibm_url_ta'], version = watson_dict['ibm_version_ta'], iam_apikey = watson_dict['ibm_api_ta']) personality_analyzer = PersonalityInsightsV3( version = watson_dict['ibm_version_per'], url = watson_dict['ibm_url_per'], iam_apikey = watson_dict['ibm_api_per']) tone_analyzer.set_detailed_response(True) return speech_to_text, tone_analyzer, personality_analyzer
def stt(speech_file): sst_api_key = 'e_jevakkc6QsQcCmA41mMtKE2sJl1Ug0OEoKEi8oLIb1' sst_url = 'https://stream.watsonplatform.net/speech-to-text/api' speech_to_text = SpeechToTextV1(iam_apikey=sst_api_key, url=sst_url) class MyRecognizeCallback(RecognizeCallback): def __init__(self): RecognizeCallback.__init__(self) def on_data(self, data): print(json.dumps(data, indent=2)) def on_error(self, error): print('Error received: {}'.format(error)) def on_inactivity_timeout(self, error): print('Inactivity timeout: {}'.format(error)) myRecognizeCallback = MyRecognizeCallback() with open(join(dirname(__file__), './.', speech_file), 'rb') as audio_file: audio_source = AudioSource(audio_file) speech_to_text.recognize_using_websocket( audio=audio_source, interim_results=True, content_type='audio/wav', recognize_callback=myRecognizeCallback, keywords=[ 'ticket', 'speeding', 'limit', 'cell phone', 'cellphone', 'seatbelt', 'tailgating' ], keywords_threshold=0.8)
def convertSpeechToText(self, audioFile): # Read IBM watson SpeechToText service username and password from config file username = self.configObject.get('ibm_speech_to_text_service_username') password = self.configObject.get('ibm_speech_to_text_service_password') # Create SpeechToText service object stt = SpeechToTextV1(username=username, password=password) audio_file = open(audioFile, "rb") audio_data = json.dumps(stt.recognize(audio_file, content_type="audio/wav", model='en-US_NarrowbandModel', continuous=True), indent=2) print(audio_data) audio_text = "" try: audio_json_data = json.loads(audio_data) print(" Here is the audio text :::") if (len(audio_json_data["results"]) != 0): audio_text = audio_json_data["results"][0]["alternatives"][0][ "transcript"] return (audio_text) else: return (None) except Exception as e: print("Error was: ", e)
def getTextFromSpeech(): print('at speech to text') tts_kwargs = { 'username': speechToTextUser, 'password': speechToTextPassword, 'iam_apikey': speechToTextIAMKey, 'url': speechToTextUrl } sttService = SpeechToTextV1(**tts_kwargs) response = sttService.recognize(audio=request.get_data(cache=False), content_type='audio/wav', timestamps=True, word_confidence=True, smart_formatting=True).get_result() # Ask user to repeat if STT can't transcribe the speech if len(response['results']) < 1: return Response(mimetype='plain/text', response="Sorry, didn't get that. please try again!") text_output = response['results'][0]['alternatives'][0]['transcript'] text_output = text_output.strip() return Response(response=text_output, mimetype='plain/text')
def __init__(self): self.STT = SpeechToText( username='******', password='******') self.TTS = TextToSpeech( username='******', password='******')
def main(app): if request.method == 'POST': status = {} if 'file' not in request.files: status = {'valid': False, 'text': 'No file part'} file = request.files['file'] if file.filename == '': status = {'valid': False, 'text': 'No selected file'} if file and allowed_file(file.filename, allowed_extensions=set(['wav', 'mp3'])): file.save( '/home/sripravan/Projects/sadhana-mega/app/static/audio/test.wav' ) with io.open( '/home/sripravan/Projects/sadhana-mega/app/static/audio/test.wav', 'rb') as audio_file: speech_to_text = SpeechToTextV1( username='******', password='******') speech_recognition_results = speech_to_text.recognize( audio=audio_file, content_type="audio/wav", timestamps=True, smart_formatting=True).get_result() status = { 'valid': True, 'transcript': speech_recognition_results } return jsonify(status)
def __init__(self, uname, pword): self.user_name = uname self.url = 'https://stream.watsonplatform.net/speech-to-text/api' self.password = pword self.speech_to_text = SpeechToTextV1(username=self.user_name, password=self.password, url=self.url)
def call_speech2text(audioFileLocation): Config=configparser.ConfigParser() Config.read("watson.ini") userid=ConfigSectionMap(Config,"Speech to Text-RAI")['username'] pwd=ConfigSectionMap(Config,"Speech to Text-RAI")['password'] speech_to_text = SpeechToTextV1(username=userid,password=pwd,x_watson_learning_opt_out=False) #status = downloadFileandConvert(audioFileLocation) status=ffmpegconvert(audioFileLocation,'wav') # Convert to WAV file if status == "Not Ok": print("Error in File Conversion - In Watson") return "Error in File Conversion - In Watson" #audioFileLocation=join(dirname(__file__), audioFile) #audioFileLocation = audioFile speech_to_text.get_model('en-US_NarrowbandModel') #speech_to_text.get_custom_model('9c1d00a0-330c-11e7-94ad-3b2269260fbc') with open(status,'rb') as audio_file: returnedJSON = json.dumps(speech_to_text.recognize(audio_file, content_type='audio/wav', timestamps=True,word_confidence=True,model='en-US_NarrowbandModel',continuous=True),indent=2) #print(returnedJSON) #Deserialize returnedJSONStr = json.loads(returnedJSON) print(returnedJSONStr) try: returnMsg=returnedJSONStr['results'][0]['alternatives'][0]['transcript'] print(returnedJSONStr['results'][0]['alternatives'][0]['transcript']) return returnMsg except: return "Can't Convert Speech2Text"
def speech_to_text(usr, password): try: return SpeechToTextV1(username=usr, password=password, x_watson_learning_opt_out=False) except: return []
def diagraph(path): speech_to_text = SpeechToTextV1( username='******', password='******', x_watson_learning_opt_out=False) with open(join(dirname(__file__), path),'rb') as audio_file: x = speech_to_text.recognize( audio_file, content_type='audio/wav', timestamps=True,speaker_labels=True, word_confidence=True) speakers_stamp = x['speaker_labels'] checks = x['results'] speakers = {0:[], 1:[]} c = 0 for r in range(len(checks)): each_check = checks[r]["alternatives"] for i in range(len(each_check)): ex1 = checks[r]["alternatives"][i]["timestamps"] for w in ex1: speakers[int(speakers_stamp[c]["speaker"])].append(w[0]) c+=1 return speakers
def recognize_speech(username, password, audio_file_path, forced_mime_type, buffer_size=_4K, audio_model=None, inactivity_timeout=None, extra_options=None, progress_callback=None): stt = SpeechToTextV1(username=username, password=password) content_type = guess_mime_type(audio_file_path, forced_mime_type) kwargs = { 'content_type': content_type, 'continuous': True, 'timestamps': False, 'max_alternatives': 1 } default_options = build_default_options( audio_model=audio_model, inactivity_timeout=inactivity_timeout) kwargs.update(default_options) kwargs.update(extra_options or {}) return stt.recognize( chunked_upload(audio_file_path, buffer_size, progress_callback), **kwargs)
def getTextFromSpeech(): tts_kwargs = { 'username': speechToTextUser, 'password': speechToTextPassword, 'iam_apikey': speechToTextIAMKey, 'url': speechToTextUrl } sttService = SpeechToTextV1(**tts_kwargs) response = sttService.recognize(audio=request.get_data(cache=False), content_type='audio/wav', model='ja-JP_BroadbandModel', timestamps=True, word_confidence=True, smart_formatting=True).get_result() # Ask user to repeat if STT can't transcribe the speech if len(response['results']) < 1: return Response(mimetype='plain/text', response="聞こえなかったので、もう一度お願いします!") text_output = response['results'][0]['alternatives'][0]['transcript'] text_output = text_output.strip() return Response(response=text_output, mimetype='plain/text')
def save_audio(): #ACESSA API DE RECONHECIMENTO DE AUDIO speechtotext = SpeechToTextV1( username='******', password='******') #SALVANDO AUDIO Audio.rec.stop() url_voz = MEDIA_ROOT + "/usuario" + str( datetime.datetime.now()) + ".wav" Audio.rec.save(url_voz) vetor_audio = open(url_voz, 'rb').read() try: recognized_audio = speechtotext.recognize( audio=vetor_audio, content_type='audio/wav', model='pt-BR_BroadbandModel', interim_results=False, keywords=['conta', 'cooperativa', 'valor', 'transferir'], keywords_threshold=0.3, max_alternatives=3) print(recognized_audio) VozUsuario.objects.cria_voz(vozusuario_padrao=url_voz) return {'url_voz': url_voz, 'recognized_audio': recognized_audio} except WatsonApiException as ex: Audio.rec.delete(url_voz) print("Código de erro " + str(ex.code) + ": " + ex.message)
def speech_to_text(file_name, model_id): """Use Watson Speech to Text to convert audio file to text.""" # create Watson Speech to Text client stt = SpeechToTextV1(iam_apikey=keys.speech_to_text_key) # open the audio file with open(file_name, 'rb') as audio_file: # pass the file to Watson for transcription result = stt.recognize(audio=audio_file, content_type='audio/wav', model=model_id).get_result() # Get the 'results' list. This may contain intermediate and final # results, depending on method recognize's arguments. We asked # for only final results, so this list contains one element. results_list = result['results'] # Get the final speech recognition result--the list's only element. speech_recognition_result = results_list[0] # Get the 'alternatives' list. This may contain multiple alternative # transcriptions, depending on method recognize's arguments. We did # not ask for alternatives, so this list contains one element. alternatives_list = speech_recognition_result['alternatives'] # Get the only alternative transcription from alternatives_list. first_alternative = alternatives_list[0] # Get the 'transcript' key's value, which contains the audio's # text transcription. transcript = first_alternative['transcript'] return transcript # return the audio's text transcription
def data_input(request): if request.method == "POST": image_file_name = request.FILES["image"].name audio_file_name = request.FILES["audio"].name image_file_binary = request.FILES["image"].read() audio_file_binary = request.FILES["audio"].read() speed = request.POST.get("speed") # Image Analysis dbx = dropbox.Dropbox(DROPBOX_ACCESS_TOKEN) image_path = "/images/" + str(uuid.uuid1()) + "__" + image_file_name dbx.files_upload(image_file_binary, image_path, mute=True) client = Algorithmia.client(ALGO_ACCESS_KEY) algo = client.algo(ALGO_EMOTION_API) params = {} params["image"] = "dropbox://" + image_path params["numResults"] = 7 image_analysis = algo.pipe(params).result # speech Analysis speech_to_text = SpeechToTextV1(username=WATSON_SPT_SERVICE_USERNAME, password=WATSON_SPT_SERVICE_PASSWORD, x_watson_learning_opt_out=False) models = speech_to_text.models() us_model = speech_to_text.get_model('en-US_BroadbandModel') results = speech_to_text.recognize(audio_file_binary, content_type='audio/wav', timestamps=True, word_confidence=True, speaker_labels=True) transcripts = get_transcripts(json.dumps(results)) transcripts_str = ". ".join(transcripts) tone_analysis = call_to_watson_tone_analysis_api(transcripts_str) response = format(image_analysis, tone_analysis) response.update({"speed": float(speed), "weather": 0}) (score, (msg, aloc)) = decision_engine.decide(response) # a = decision_engine.decide(response) # # if a: # score = a[0] # msg = a[1][0] # aloc = a[1][1] #import ipdb; ipdb.set_trace() return render(request, 'results.html', { "score": score, "msg": msg, "score_breakup": response }) #return render(request, 'results.html') #return HttpResponseRedirect("/emoDrive/analyze/" + upload_path) else: return render(request, 'upload.html')
def post_audio(): content = request.get_json(silent=True) print(content) state = load_obj() objectList = ['refrigerator', 'printer', 'coffee'] commandList = ['status', 'buy', 'purchase', 'order', 'add', 'cart'] wordsList = [] jsonWords = [] finalList = [] sentence = "" speech_to_text = SpeechToTextV1( username='******', password='******', x_watson_learning_opt_out=False) print(json.dumps(speech_to_text.models(), indent=2)) print( json.dumps(speech_to_text.get_model('en-US_BroadbandModel'), indent=2)) path = '/Users/andreaskarinam/Developer/React/Hermes/samples/response2.wav' if state["firstCommand"] == True: path = '/Users/andreaskarinam/Developer/React/Hermes/samples/response3.wav' with open(join(dirname(__file__), path), 'rb') as audio_file: text_dict = speech_to_text.recognize(audio_file, content_type='audio/wav', timestamps=True, word_confidence=True) wordsList = text_dict['results'][0]['alternatives'][0][ 'word_confidence'] for lists in wordsList: jsonWords.append(lists[0]) print(jsonWords) for word in jsonWords: if (word in commandList): print(word) for word in jsonWords: if (word in objectList): print(word) print(state["firstCommand"]) if state["firstCommand"] == True: state["firstCommand"] = False state["shoppingcart"].append({ "name": "Maui Coffee", "quantity": 1, "price": "$12.99" }) else: data = state["devices"][0]["data"] state["shoppingcart"] = [] save_obj(state) return json.dumps({"response": 200})
def transcribe_audio(path_to_audio_file): speech_to_text = SpeechToTextV1( username='******', password='******', x_watson_learning_opt_out=False) with open(join(dirname(__file__), path_to_audio_file), "rb") as audio_file: return speech_to_text.recognize(audio_file, content_type="audio/wav", word_confidence=True)
def connect_speechtext(): speech_to_text = SpeechToTextV1( username="******", password='******', x_watson_learning_opt_out=False) # print(json.dumps(speech_to_text.models(), indent=2)) # print(json.dumps(speech_to_text.get_model('en-US_BroadbandModel'), indent=2)) return speech_to_text
def transcribe_audio(): username = "******" path_to_audio_file = "file.wav" password = "******" speech_to_text = SpeechToTextV1( username='******', password="******") with open(path_to_audio_file, "rb") as audio_file: return (speech_to_text.recognize(audio_file, content_type="audio/wav"))
def __init__(self): self.url = "https://stream.watsonplatform.net/speech-to-text/api" self.username = "******" self.password = "******" self.speech_to_text = SpeechToTextV1( username = self.username, password = self.password, url = self.url )
def __init__(self,debug_mode=False): self.debug_mode=debug_mode f = open("key.txt", "r") f1 = f.read().splitlines() f.close() self.speech_to_text = SpeechToTextV1( iam_apikey=f[14], url=f[15] )
def convert_speech_to_text(audio): speech_to_text = SpeechToTextV1( username=os.environ["WATSON_TRANSCRIPTION_USERNAME"], password=os.environ["WATSON_TRANSCRIPTION_PASSWORD"], ) return speech_to_text.recognize( audio, content_type="audio/mp3", timestamps=False, word_confidence=False )
def __init__(self): self.config = SpeechToTextV1( # iam_apikey='6YzHFvvaDU6XJfvovrrYXxNrCfhI8Ee1enkGS-Crjouw', iam_apikey='z2T7SNluJPGEGStI2etKxLgBpj56kR_M6mOmYbyCilH4', url='https://stream.watsonplatform.net/speech-to-text/api') self.threshold = 500 self.chunk_size = 1024 self.format = pyaudio.paInt16 self.rate = 16000
import json from os.path import join, dirname from watson_developer_cloud import SpeechToTextV1 as SpeechToText speech_to_text = SpeechToText(username='******', password='******') print(json.dumps(speech_to_text.models(), indent=2)) with open(join(dirname(__file__), '../resources/speech.wav'), 'rb') as audio_file: print(json.dumps(speech_to_text.recognize(audio_file, content_type='audio/wav'), indent=2))