from __future__ import print_function from os.path import join, dirname from ibm_watson import TextToSpeechV1 from ibm_watson import SpeechToTextV1 from ibm_watson import LanguageTranslatorV3 from playsound import playsound t2s = TextToSpeechV1( url='https://stream-fra.watsonplatform.net/text-to-speech/api', iam_apikey='Ia5Hm2hmHyofPhZw0iY6ZIehbE7miqUm1abf6SlTJ-7N') s2t = SpeechToTextV1( url='https://stream-fra.watsonplatform.net/speech-to-text/api', iam_apikey='KChSwZGxpjGPTQr2uXOPjEEj2G0IbaOK2FZe78eAuaG5') translator = LanguageTranslatorV3( version='2018-05-01', url='https://gateway-fra.watsonplatform.net/language-translator/api', iam_apikey='XWy87z-yWfCgk1fqRtpnntitN4N016pP0hox2g1BerCL') def loose_in_translation(text, voice): print('text to speech') with open(join(dirname(__file__), 'resources', voice + '.wav'), 'wb') as audio_file: response = t2s.synthesize(text, accept='audio/wav', voice=voice).get_result() audio_file.write(response.content) print('playing result...') playsound(join(dirname(__file__), 'resources', voice + '.wav'), ) print('speech to text')
import os import sys import pyaudio from ibm_watson import SpeechToTextV1 from ibm_watson.websocket import RecognizeCallback, AudioSource from threading import Thread from queue import Queue, Full stop_now = False # retrieve key from bash environment variable iam_apikey = os.environ['IAM_APIKEY'] # set up an instance of the STT service service = SpeechToTextV1( url='https://gateway-lon.watsonplatform.net/speech-to-text/api', iam_apikey=iam_apikey) CHUNK = 1024 # Note: It will discard if the websocket client can't consumme fast enough # So, increase the max size as per your choice BUF_MAX_SIZE = CHUNK * 10 # Buffer to store audio q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK))) # Create an instance of AudioSource audio_source = AudioSource(q, True, True) class MyRecognizeCallback(RecognizeCallback): def __init__(self):
import json from os.path import join, dirname from ibm_watson import SpeechToTextV1 from ibm_watson.websocket import RecognizeCallback, AudioSource import threading from ibm_cloud_sdk_core.authenticators import IAMAuthenticator from utils import Utils config = Utils.readYaml("config.yaml") # initialize speech to text service authenticator = IAMAuthenticator(config['watson']['API_KEY']) service = SpeechToTextV1(authenticator=authenticator) service.set_service_url(config['watson']['URL']) models = service.list_models().get_result() print(json.dumps(models, indent=2)) model = service.get_model('en-US_BroadbandModel').get_result() print(json.dumps(model, indent=2)) with open(join(dirname(__file__), 'speech.wav'), 'rb') as audio_file: print( json.dumps(service.recognize(audio=audio_file, content_type='audio/wav', timestamps=True, word_confidence=True).get_result(), indent=2)) # Example using websockets
from ibm_watson import SpeechToTextV1 from os.path import join, dirname import json speech_to_text = SpeechToTextV1( iam_apikey='PUfqdSXO-sKGCShdkJ_SX55gY35bm42s-aZqbbksSQUP', url='https://gateway-lon.watsonplatform.net/speech-to-text/api') with open(join(dirname(__file__), './.', 'audio-file2.flac'), 'rb') as audio_file: speech_recognition_results = speech_to_text.recognize( audio=audio_file, content_type='audio/flac', word_alternatives_threshold=0.9, keywords=['colorado', 'tornado', 'tornadoes'], keywords_threshold=0.5).get_result() print(json.dumps(speech_recognition_results, indent=2)) #print(speech_recognition_results['results'][0]['alternatives'][0]['transcript'])
from ibm_watson import SpeechToTextV1 import time import json import io speech_to_text = SpeechToTextV1( iam_apikey="api_key", url="https://stream.watsonplatform.net/speech-to-text/api") language_models = speech_to_text.list_language_models().get_result() customizations = language_models["customizations"] for customization in customizations: id = customization["customization_id"] print("deleting customization:", id) speech_to_text.delete_language_model(id) language_model = speech_to_text.create_language_model( 'Education language model', 'pt-BR_BroadbandModel', description='An education language model to use on distance learning' ).get_result() customization_id = language_model["customization_id"] print("creating customization:", customization_id) language_model = speech_to_text.get_language_model( customization_id).get_result() status = language_model["status"]
from ibm_watson import SpeechToTextV1 from os.path import join, dirname import json speech_to_text = SpeechToTextV1( iam_apikey='mRku2G3EtmnHGccaOihFJHvUI0VkhW07EEXij4HQtk5g', url='https://gateway-syd.watsonplatform.net/speech-to-text/api') def speechToText(file_name): with open(join(dirname(__file__), './.', file_name), 'rb') as audio_file: speech_recognition_results = speech_to_text.recognize( audio=audio_file, content_type='audio/mp3', ).get_result() return speech_recognition_results
T2S_TOKEN = config.T2S_TOKEN T2S_URL = config.T2S_URL S2T_TOKEN = config.S2T_TOKEN S2T_URL = config.S2T_URL logger = logging.getLogger('TelegramBot') t2sauth = IAMAuthenticator(T2S_TOKEN) s2tauth = IAMAuthenticator(S2T_TOKEN) text2speech = TextToSpeechV1(authenticator=t2sauth) text2speech.set_service_url(T2S_URL) speech2text = SpeechToTextV1(authenticator=s2tauth) speech2text.set_service_url(S2T_URL) def convert_voice(audio_file): response = speech2text.recognize(audio=audio_file, content_type='audio/ogg', model='pt-BR_NarrowbandModel') result = response.get_result() logger.info('Detectada frase: ' + result['results'][0]['alternatives'][0]['transcript']) return result['results'][0]['alternatives'][0]['transcript'] def convert_text(message):
def transcription(self): ''' Returns transcription of the inputed audio files ''' authenticator = IAMAuthenticator(WATSON_API_KEY) speech_to_text = SpeechToTextV1(authenticator=authenticator) speech_to_text.set_service_url(SERVICE_URL) results = [] #Iterating through all inputted files for item in self.audio: file_type = "audio/flac" if ".wav" in item: file_type = "audio/wav" elif ".mpeg" in item: file_type = "audio/mpeg" elif ".mp3" in item: file_type = "audio/mp3" file = open(item, "rb") #API CAlL response = speech_to_text.recognize(file, content_type=file_type, smart_formatting=True, timestamps=True, inactivity_timeout=90) results.append(response.get_result()) phrase = [] t_string = "" t_start = 0 temp = [] # Obtain the timestamps of each word to include periods. for speaker, item in enumerate(results): temp = [] for r_index, i in enumerate(item['results']): for j in i['alternatives']: for index, word in enumerate(j['timestamps']): #Take into account seperation in transcript from Watson IBM if index == 0: if temp: if (word[1]) - (temp[1][2]) < 0.45: t_string += " " + str(word[0]) else: t_string += ". " phrase.append([ speaker, t_string, t_start, temp[1][2] ]) t_string = "" t_start = word[1] t_string += str(word[0]) temp = [] continue if t_string: t_string += ". " phrase.append([ speaker, t_string, t_start, j['timestamps'][index - 1][2] ]) t_string = "" t_start = word[1] t_string += str(word[0]) continue t_string += word[0] t_start = word[1] # The amount of time to determine when a period is placed is decided here. elif (word[1]) - (j['timestamps'][index - 1][2]) < 0.45: t_string += " " + str(word[0]) if index == len(j['timestamps']) - 1: if (len(results) == 1) or (r_index == len(item['results']) - 1): t_string += ". " phrase.append( [speaker, t_string, t_start, word[2]]) t_string = "" else: temp = [index, word, speaker] else: t_string += ". " phrase.append([ speaker, t_string, t_start, j['timestamps'][index - 1][2] ]) t_string = "" t_start = word[1] t_string += str(word[0]) if temp: t_string += ". " phrase.append([temp[2], t_string, t_start, temp[1][2]]) #Sort the phrases from all the audio transcriptions in chronological order phrase.sort(key=lambda x: x[2]) return phrase
from ibm_watson import TextToSpeechV1, SpeechToTextV1 from ibm_cloud_sdk_core.authenticators import IAMAuthenticator import json # Síntesis tts_authenticator = IAMAuthenticator( '16brHHpIvclEZodkhLMGcoq4FKN7_dTh601Gh9HauYRH') tts = TextToSpeechV1(authenticator=tts_authenticator) tts.set_service_url('https://stream.watsonplatform.net/text-to-speech/api') with open('hello_world.wav', 'wb') as audio_file: audio_file.write( tts.synthesize('Hello world', voice='en-US_AllisonVoice', accept='audio/wav').get_result().content) # Reconocimiento stt_authenticator = IAMAuthenticator( 'bXmuzE7OGUPlDTgQfqt_0RlmlfwcZSmVbpsGi8_7KEQ0') stt = SpeechToTextV1(authenticator=stt_authenticator) stt.set_service_url('https://stream.watsonplatform.net/speech-to-text/api') with open('hello_world.wav', 'rb') as audio_file: print( json.dumps(stt.recognize(audio=audio_file, content_type='audio/wav', timestamps=True, word_confidence=True).get_result(), indent=2))
from __future__ import print_function import json from os.path import join, dirname from ibm_watson import SpeechToTextV1 from ibm_watson.websocket import RecognizeCallback, AudioSource import threading # If service instance provides API key authentication service = SpeechToTextV1( ## url is optional, and defaults to the URL below. Use the correct URL for your region. url='https://stream.watsonplatform.net/speech-to-text/api', iam_apikey='YOUR APIKEY') # service = SpeechToTextV1( # username='******', # password='******', # url='https://stream.watsonplatform.net/speech-to-text/api') models = service.list_models().get_result() print(json.dumps(models, indent=2)) model = service.get_model('en-US_BroadbandModel').get_result() print(json.dumps(model, indent=2)) with open(join(dirname(__file__), '../resources/speech.wav'), 'rb') as audio_file: print(json.dumps( service.recognize( audio=audio_file, content_type='audio/wav', timestamps=True,
audio = video.audio audio.write_audiofile('./test.wav') from pydub import AudioSegment import os song = AudioSegment.from_wav( "C:/Users/Nimish/Documents/Flasktut/hirebot/test.wav") song.export("test.flac", format="flac") from ibm_watson import SpeechToTextV1 from ibm_watson.websocket import RecognizeCallback, AudioSource from os.path import join, dirname import json speech_to_text = SpeechToTextV1( iam_apikey='API_KEY', url='https://gateway-lon.watsonplatform.net/speech-to-text/api') class MyRecognizeCallback(RecognizeCallback): def __init__(self): RecognizeCallback.__init__(self) def on_data(self, data): print(json.dumps(data, indent=2)) def on_error(self, error): print('Error received: {}'.format(error)) def on_inactivity_timeout(self, error): print('Inactivity timeout: {}'.format(error))
import speech_recognition as sr from ibm_watson import SpeechToTextV1 import json r = sr.Recognizer() speech = sr.Microphone() speech_to_text = SpeechToTextV1(iam_apikey="YOUR_API_KEY", url="YOUR_URL") print('speech_to_text: ' + str(speech_to_text)) with speech as source: print("say something!!...") audio_file = r.adjust_for_ambient_noise(source) audio_file = r.listen(source) print('audio_file: ' + str(audio_file)) speech_recognition_results = speech_to_text.recognize( audio=audio_file.get_wav_data(), content_type='audio/wav').get_result() print(json.dumps(speech_recognition_results, indent=2))
def upload_audio(): audio = request.files["audio"] language = request.form["language"] allowed_mimes = ["audio/mp3", "audio/mpeg", "audio/ogg", "audio/wav"] allowed_language_models = ["en-US_BroadbandModel"] songs = [] # Data JSON untuk ditampilkan # Cek MIME Type #return audio.content_type.split("/",1)[1] if audio.content_type not in allowed_mimes: return "File Harus {0}".format(",".join(allowed_mimes)), 400 # Cek Bahasa if language not in allowed_language_models: return "Maaf Bahasa Tidak Disupport", 400 # Save audio file filename = secure_filename(audio.filename) save_path = os.path.join(settings.UPLOAD_FOLDER, filename) # Simpan audio file ke folder uploads audio.save(save_path) # Manipulate audio song = AudioSegment.from_file(save_path, format=audio.content_type.split("/",1)[1]) song_duration_in_seconds = len(song) / 1000 # Cek durasi audio if int(song_duration_in_seconds) > 15: os.remove(save_path) # Hapus audio file return "Durasi maksimal audio adalah 15 detik", 400 # Normalize audio volume save_path = os.path.join(settings.UPLOAD_FOLDER, "normalize-" + filename) normalizing_song = song.normalize() normalizing_song.export(save_path, format=audio.content_type.split("/",1)[1]) # Mengirim file audio ke IBM Speech to Text API # Authentication authenticator = IAMAuthenticator(settings.WATSON_API_KEY) speech_to_text = SpeechToTextV1( authenticator=authenticator ) speech_to_text.set_service_url(settings.WATSON_SERVICE_URL) # Sending/Recognizing with open(save_path, "rb") as audio_file: speech_recognition_results = speech_to_text.recognize( audio=audio_file, content_type=audio.content_type, model=language, speech_detector_sensitivity=0.5, background_audio_suppression=0.5, smart_formatting=True, profanity_filter=False ).get_result() logging.error(json.dumps(speech_recognition_results, indent=2)) # Cek apakah mendapatkan hasil transkrip if len(speech_recognition_results["results"]) > 0: # Simpan transkrip audio transcripts = {"data": []} for transcript in speech_recognition_results["results"]: transcripts["data"].append(transcript["alternatives"][0]) max_confidence_transcript = max(transcripts["data"], key=lambda x:x["confidence"]) # Ambil transkrip audio dengan nilai confidence tertinggi # Cari lagu berdasarkan lirik yang didapat dari hasil max_confidence_transcript for song in Song.query.filter(Song.lyrics.match(max_confidence_transcript["transcript"])).limit(10).all(): logging.error(song) songs.append({"artist": song.artist, "releases": song.releases, "title": song.title}) # Masukkan data lagu return render_template("ResultPage.html", songs=songs), 200 # Tampilkan hasil
return dialog_counter # Initialize Watson Speech to Text Service speech_to_text_credentials = { "apikey": "UF2I5P9NXt1HXnmye2fkDeP2kxp_tb9VVSWF3i5qjuZ3", "iam_apikey_description": "Auto-generated for key 16171d30-82c7-4b51-8f0e-fe656da5cdcd", "iam_apikey_name": "wdp-writer", "iam_role_crn": "crn:v1:bluemix:public:iam::::serviceRole:Writer", "iam_serviceid_crn": "crn:v1:bluemix:public:iam-identity::a/eed7635af9de4ec1a02ed80b7edae9dc::serviceid:ServiceId-445f4b12-5e0d-45d4-a284-7f2a7138cf52", "url": "https://api.us-east.speech-to-text.watson.cloud.ibm.com/instances/687866d5-4515-46ca-8789-9191fb6e41e3", } speech_to_text_authenticator = IAMAuthenticator(speech_to_text_credentials['apikey']) speech_to_text_service = SpeechToTextV1(authenticator=speech_to_text_authenticator) speech_to_text_service.set_service_url(speech_to_text_credentials['url']) def speech_to_text(file_name): with open(file_name, "rb") as audio_file: result = speech_to_text_service.recognize( audio_file, content_type="audio/wav", continuous=True, timestamps=False, max_alternatives=1 ) try: return result.result['results'][0]['alternatives'][0]['transcript'] except: return None
import sys import json import time import os.path from os import path from beeply import notes from PyQt5 import QtCore, QtGui, QtWidgets from PyQt5.QtWidgets import QApplication, QWidget, QInputDialog, QLineEdit, QFileDialog, QMainWindow,QListWidget,QListWidgetItem from PyQt5.QtGui import QIcon from PyQt5.QtCore import Qt from ibm_watson import TextToSpeechV1 from ibm_watson import SpeechToTextV1 from ibm_watson import ApiException from ibm_cloud_sdk_core.authenticators import IAMAuthenticator api=IAMAuthenticator("xxxxxxxxxxxxxxxxxxxxxxxxxxx") #your authenticator id speech2 = SpeechToTextV1(authenticator=api) speech2.set_service_url("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") # your service url api1=IAMAuthenticator("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") #your authenticator id text2 = TextToSpeechV1(authenticator=api1) text2.set_service_url("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") # your service url mybeep=notes.beeps(200) class MainWindow(QMainWindow): fileName="" def __init__(self): super(MainWindow,self).__init__() self.setObjectName("MainWindow") self.setWindowModality(QtCore.Qt.ApplicationModal) self.setFixedSize(590, 597) sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed) sizePolicy.setHorizontalStretch(0) sizePolicy.setVerticalStretch(0)
import messager import firebase_admin from firebase_admin import credentials from firebase_admin import firestore import zone # Use the application default credentials cred = credentials.Certificate('./cred1.json') firebase_admin.initialize_app(cred) db = firestore.client() hot = ["intoxicated", "intoxicate", "toxic", "shooting","shoot","shot","blood","bleeding","virus","killed","kill","murder","murdering","murdered","injured","injury","harm","harmed","harming","attacker","offender","armed","arms","gun","steal","stole","robbery","punching","fired","fire","infected","STI","sexually","rape","raping","raped","explosion","explode","exploded","food-borne","illness","salmonella","ebola","coli","gunned","loose","disaster","tornado","hurricane","storm","sex","harrassment","harrasser","offender","killer","serial","bomb","bombing","threat","threatened","threatening","closed","close","fled","flee","escaped","flood","flooding","contaminated","contamination","contaminate","exposing","danger","lose","fire","naked","broke","substance","fight", "stolen"] speech_to_text = SpeechToTextV1( iam_apikey='V_g8OgIsLNpHPQ9PBTF1i_0LnflSXmsiJiMQOZ6HOjTH', url='https://stream.watsonplatform.net/speech-to-text/api' ) def getTextAndKeywords(json): text = '' keywords = [] for result in json['results']: text += result['alternatives'][0]['transcript'].strip() + ' ' if ('keywords_result' in result.keys()): keywords = keywords + (list(result['keywords_result'].keys())) return (text.strip(), keywords) while(True): filenames = sorted([f for f in listdir('audio') if not f.startswith('.')], key= lambda x : int(x.split('.')[0])) if (len(filenames) > 1): # if there is only one file, it is probably being currently written to
import assistant_setup # One time initialization engine = pyttsx3.init() # Set properties _before_ you add things to say engine.setProperty('rate', 150) # Speed percent (can go over 100) engine.setProperty('volume', 0.5) # Volume 0-1 load_dotenv() authenticator = (get_authenticator_from_environment('assistant') or get_authenticator_from_environment('conversation')) assistant = AssistantV1(version="2019-11-06", authenticator=authenticator) workspace_id = assistant_setup.init_skill(assistant) speech_to_text = SpeechToTextV1() language = 'en' def play_mp3(path): subprocess.Popen(['mpg123', '-q', path]).wait() def record_audio(): CHUNK = 1024 FORMAT = pyaudio.paInt16 CHANNELS = 2 RATE = 44100 RECORD_SECONDS = 5 WAVE_OUTPUT_FILENAME = "record.wav"
from __future__ import print_function import json from os.path import join, dirname from ibm_watson import SpeechToTextV1 from ibm_watson.websocket import RecognizeCallback, AudioSource import threading # If service instance provides API key authentication service = SpeechToTextV1( ## url is optional, and defaults to the URL below. Use the correct URL for your region. url='https://stream.watsonplatform.net/speech-to-text/api', iam_apikey='DBxOesEcwYTQK9-dvcaxTwBICWk0s3RwwEW6m-2eppDn') # service = SpeechToTextV1( # username='******', # password='******', # url='https://stream.watsonplatform.net/speech-to-text/api') models = service.list_models().get_result() print(json.dumps(models, indent=2)) model = service.get_model('en-US_BroadbandModel').get_result() print(json.dumps(model, indent=2)) with open(join(dirname(__file__), '../resources/speech.wav'), 'rb') as audio_file: print( json.dumps(service.recognize(audio=audio_file, content_type='audio/wav', timestamps=True, word_confidence=True).get_result(),
def main(args): # Parse incoming request headers _c_type, p_dict = parse_header( args['__ow_headers']['content-type'] ) # Decode body (base64) decoded_string = b64decode(args['__ow_body']) # Set Headers for multipart_data parsing p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8") p_dict['CONTENT-LENGTH'] = len(decoded_string) # Parse incoming request data multipart_data = parse_multipart( BytesIO(decoded_string), p_dict ) try: # Build flac file from stream of bytes fo = open("audio_sample.flac", 'wb') fo.write(multipart_data.get('audio')[0]) fo.close() teste=False except: teste=True #teste = multipart_data.items #Pegando o Carro carro=multipart_data.get('car')[0] if teste == False: # Basic Authentication with Watson STT API stt_authenticator = BasicAuthenticator( 'apikey', 'apikey' ) #Autenticacao STT # Construct a Watson STT client with the authentication object stt = SpeechToTextV1(authenticator=stt_authenticator) # Set the URL endpoint for your Watson STT client stt.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com') # Read audio file and call Watson STT API: with open( os.path.join( os.path.dirname(__file__), './.', 'audio_sample.flac' ), 'rb' ) as audio_file: # Transcribe the audio.flac with Watson STT # Recognize method API reference: # https://cloud.ibm.com/apidocs/speech-to-text?code=python#recognize stt_result = stt.recognize( audio=audio_file, content_type='audio/flac', model='pt-BR_BroadbandModel' ).get_result() authenticator_nlu = BasicAuthenticator( 'apikey', 'apikey' ) natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-09-16',authenticator=authenticator_nlu) natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com') texto_stt=stt_result['results'][0]['alternatives'][0]['transcript'] try: nlu_resp = natural_language_understanding.analyze(text=texto_stt,features=Features(entities=EntitiesOptions(sentiment=True, model ='54f2d12a-54fb-4683-b89f-c76c8b93de3f'))).get_result() except ApiException as ex: print ("Method failed with status code " + str(ex.code) + ": " + ex.message) elif teste == True : #Pegando o Text texto=multipart_data.get('text')[0] carro=multipart_data.get('car')[0] authenticator_nlu = BasicAuthenticator( 'apikey', 'apikey' ) natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-09-16',authenticator=authenticator_nlu) natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com') #Definindo features try: nlu_resp = natural_language_understanding.analyze(text=texto,features=Features(entities=EntitiesOptions(sentiment=True, model ='54f2d12a-54fb-4683-b89f-c76c8b93de3f'))).get_result() except ApiException as ex: print ("Method failed with status code " + str(ex.code) + ": " + ex.message) sent_rec=[] sent_json=[] score_rec=[] score_json=[] ent_rec=[] ent_json=[] ment_json=[] #Pegando a lista de sentimentos negativos try: for x in range(50): aux=nlu_resp['entities'][x]['sentiment']['label'] sent_json.append(nlu_resp['entities'][x]['sentiment']['label']) score_json.append(nlu_resp['entities'][x]['sentiment']['score']) ent_json.append(nlu_resp['entities'][x]['type']) ment_json.append(nlu_resp['entities'][x]['text']) #print(aux) if aux != 'neutral': if aux !='positive': sent_rec.append(nlu_resp['entities'][x]['sentiment']['label']) score_rec.append(nlu_resp['entities'][x]['sentiment']['score']) ent_rec.append(nlu_resp['entities'][x]['type']) #print("entrou") except: saiu=1 #lista de carros que podemos usar lista= ["FIAT 500","DUCATO","ARGO","FIORINO","MAREA","RENEGADE","CRONOS"] lista_seg_op=["TORO","ARGO","DUCATO","FIAT 500","CRONOS","CRONOS","ARGO"] lista_prioridade=["SEGURANCA","CONSUMO","DESEMPENHO","MANUTENCAO","CONFORTO","DESIGN","ACESSORIOS"] for x in range(len(lista)): if carro == lista[x]: lista[x]=lista_seg_op[x] #Decidindo qual carro escolher if sent_rec !=[]: #entidade.append("MANUTENCAO") #Sentimento.append(-1) #cont=0 entidade_aux=0 sent_aux=0 for x in range(len(score_rec)): dif=abs(sent_aux-score_rec[x]) if dif > 0.1: if score_rec[x] < sent_aux: sent_aux= score_rec[x] entidade_aux=ent_rec[x] print(sent_aux,entidade_aux) elif dif < 0.1: #Desempate #print("aqui") for y in range(len(lista)): if entidade_aux == lista_prioridade[y]: sent_aux=sent_aux entidade_aux=entidade_aux elif ent_rec[x] == lista_prioridade[y]: sent_aux= score_rec[x] entidade_aux=ent_rec[x] for x in range(len(lista)): if lista_prioridade[x] == entidade_aux: sugest=lista[x] else: sugest="" list_json=[] for x in range(len(sent_json)): list_json.append({"entity":ent_json[x], "sentiment": score_json[x],"mention": ment_json[x]}) return { "recommendation":sugest, "entities":list_json }
def instantiate_stt(api_key, url_service): """Link a SDK instance with a IBM STT instance.""" authenticator = IAMAuthenticator(api_key) speech_to_text = SpeechToTextV1(authenticator=authenticator) speech_to_text.set_service_url(url_service) return speech_to_text
def main(args): # Parse incoming request headers _c_type, p_dict = parse_header(args['__ow_headers']['content-type']) # Decode body (base64) decoded_string = b64decode(args['__ow_body']) # Set Headers for multipart_data parsing p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8") p_dict['CONTENT-LENGTH'] = len(decoded_string) # Parse incoming request data multipart_data = parse_multipart(BytesIO(decoded_string), p_dict) # Build flac file from stream of bytes fo = open("audio_sample.flac", 'wb') fo.write(multipart_data.get('audio')[0]) fo.close() car = multipart_data.get('car')[0] text = multipart_data.get('text')[0] """## Serviço NLU Você precisará de 3 coisas: A key e a URL do seu serviço de `Natural Language Understanding` e o model_id do seu Knowledge Studio treinado. """ nlu_apikey = "R5Kq3Z4sJbPaepfWCC1d3iYch2kIEHJkF1sqnHZTC-C3" nlu_service_url = "https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/d26c8f6f-666f-44eb-a631-cb8b161f0c48" nlu_entity_model = "a52546bf-6061-4fd0-a3ec-f2e6aa6d19b9" """Agora instanciamos os serviços com as suas credenciais.""" # Cria-se um autenticador nlu_authenticator = IAMAuthenticator(apikey=nlu_apikey) # Criamos o serviço passando esse autenticador nlu_service = NaturalLanguageUnderstandingV1( version='2018-03-16', authenticator=nlu_authenticator) # Setamos a URL de acesso do nosso serviço nlu_service.set_service_url(nlu_service_url) ## Serviço STT stt_apikey = "-pCzIHgC12ljTpVXELSfx71BAP2yUmAlacQaD1YXdZqM" stt_service_url = "https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/2dda5ef8-4933-4096-8fb6-ad817e0e105c" """E agora instanciamos o serviço com as suas credenciais.""" stt_authenticator = IAMAuthenticator(apikey=stt_apikey) stt_service = SpeechToTextV1(authenticator=stt_authenticator) stt_service.set_service_url(stt_service_url) stt_model = 'pt-BR_BroadbandModel' if audio: # Read audio file and call Watson STT API: with open( os.path.join(os.path.dirname(__file__), './.', 'audio_sample.flac'), 'rb') as audio_file: # Transcribe the audio.flac with Watson STT # Recognize method API reference: # https://cloud.ibm.com/apidocs/speech-to-text?code=python#recognize stt_result = stt.recognize( audio=audio_file, content_type='audio/flac', model='pt-BR_BroadbandModel').get_result() results_stt = json.loads( json.dumps(stt_results, indent=2, ensure_ascii=False)) text = results_stt['results'][0]['alternatives'][0]['transcript'] # Return a dictionary with the transcribed text #return { # "transcript": stt_result['results'][0]['alternatives'][0]['transcript'] #} # O método analyze cuida de tudo nlu_response = nlu_service.analyze( text=text, features=Features( entities=EntitiesOptions(model=nlu_entity_model, sentiment=True)), language='pt').get_result() results_nlu = json.loads((json.dumps(nlu_response, indent=2, ensure_ascii=False))) return results_nlu
# In[11]: pip install ibm-cloud-sdk-core # In[21]: from ibm_watson import SpeechToTextV1 from ibm_watson import LanguageTranslatorV3 from ibm_cloud_sdk_core.authenticators import IAMAuthenticator iam_apikey_s2t = IAMAuthenticator('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') s2t=SpeechToTextV1(authenticator=iam_apikey_s2t) s2t.set_service_url("https://api.us-south.speech-to-text.watson.cloud.ibm.com/instances/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") filename='whatstheweatherlike.wav' with open(filename,mode="rb") as wav: response = s2t.recognize(audio=wav,content_type="audio/wav") response.result # In[22]: recognized_text=response.result['results'][0]["alternatives"][0]["transcript"] recognized_text # In[33]:
from ibm_watson import SpeechToTextV1 url_s2t = "https://stream.watsonplatform.net/speech-to-text/api" iam_apikey_s2t = "..." # create a speech-to-text adapter object s2t = SpeechToTextV1(iam_apikey=iam_apikey_s2t, url=url_s2t) filename = "hello_this_is_python.wav" # read the file in binary format with open(filename, mode="rb") as wav: response = s2t.recognize(audio=wav, content_type='audio/wav') # response.result # {'results': [{'alternatives': [{'confidence': 0.91, 'transcript': 'hello this is python'}], # 'final': True}], 'result_index': 0} recognized_text = response.result['results'][0]["alternatives"][0][ "transcript"] # recognized_text : 'hello this is python'
import speech_recognition as sr from ibm_watson import SpeechToTextV1 import json r = sr.Recognizer() speech = sr.Microphone() speech_to_text = SpeechToTextV1( iam_apikey="0uxeZa-MaLmGWOXhOSZOB5SuNHQlxXqW-f60vqb34h62", url="https://gateway-syd.watsonplatform.net/speech-to-text/api") print('speech_to_text: ' + str(speech_to_text)) with speech as source: print("say something!!...") speech_model = speech_to_text.get_model( 'pt-BR_NarrowbandModel').get_result() audio_file = r.adjust_for_ambient_noise(source) audio_file = r.listen(source) print('audio_file: ' + str(audio_file)) speech_recognition_results = speech_to_text.recognize( audio=audio_file.get_wav_data(), content_type='audio/wav').get_result() print(json.dumps(speech_recognition_results, indent=2))
def main(args): model_id = '956978fa-b5e6-4108-96ad-3367bde3478b'; #NLU authenticatorNLU = IAMAuthenticator('bbTi93KoBLq60M_Lj5fMpXInVoYI_CJFp66VBBTtsmhE') natural_language_understanding = NaturalLanguageUnderstandingV1(version='2020-08-01',authenticator=authenticatorNLU) natural_language_understanding.set_service_url('https://api.us-south.natural-language-understanding.watson.cloud.ibm.com/instances/340adba1-4277-46f0-aca3-412077e9b53d') _c_type, p_dict = parse_header( args['__ow_headers']['content-type'] ) decoded_string = b64decode(args['__ow_body']) p_dict['boundary'] = bytes(p_dict['boundary'], "utf-8") p_dict['CONTENT-LENGTH'] = len(decoded_string) multipart_data = parse_multipart(BytesIO(decoded_string), p_dict) name_audio = uuid.uuid4().hex.upper()[0:50]+'.flac' try: fo = open(name_audio, 'wb') fo.write(multipart_data.get('audio')[0]) fo.close() except: fo = False if fo: # file audio stt_authenticator = BasicAuthenticator( 'apikey', 'MaKHsSDKPKgfvQRPDfbFhXSMfvY-JtogeRyQIZn6WPem' ) stt = SpeechToTextV1(authenticator=stt_authenticator) stt.set_service_url('https://api.us-south.speech-to-text.watson.cloud.ibm.com') with open( os.path.join( os.path.dirname(__file__), './.', name_audio ), 'rb' ) as audio_file: stt_result = stt.recognize( audio=audio_file, content_type='audio/flac', model='pt-BR_BroadbandModel' ).get_result() # print(json.dumps(stt_result, indent=2)) transcript_audio = stt_result['results'][0]['alternatives'][0]['transcript'] entities = getEntities(model_id, natural_language_understanding, transcript_audio) else: text = multipart_data.get('text')[0] entities = getEntities(model_id, natural_language_understanding, text) #entities[1]['sentiment'] = -0.92 #entities[2]['sentiment'] = -0.98 #entities[3]['sentiment'] = -0.92 #entities[4]['sentiment'] = -0.96 if general_sentiment > 0: return { "recommendation": "", "entities": entities } elif general_sentiment < 0: nums = [] repetidos = [] definidos = [] for i, item in enumerate(entities): nums.append(item['sentiment']) min_sentiment = min(nums) if len(nums) == len(set(nums)): definidos.append(min_sentiment) else: for idx,sentiment in enumerate(nums): if sentiment == min_sentiment: repetidos.append(idx) if len(repetidos) > 1: definidos.append(entities[min(repetidos)]) elif len(repetidos) == 1: definidos.append(entities[repetidos[0]]) #min_sentiment #vector_new = removeElement(nums, min_sentiment) #second_min_sentiment = min(vector_new) #difference = vector_new - second_min_sentiment #if difference < 0.1: # definidos.append(min_sentiment) #else: # definidos.append(min_sentiment) #definidos.append(min_sentiment) #definidos.append(min(vector_new)) recommendation = "" for item in recommendations: if item['entity'] == definidos[0]['entity']: recommendation = item['car'] return { 'recommendation': recommendation, #entities[3]['sentiment'] 'entities': entities } #return entities return { "recommendation": "", #"general_sentiment": general_sentiment, "entities": entities }
def get_service(): """Gets speech-to-text service""" authenticator = IAMAuthenticator(API_KEY) speech_to_text = SpeechToTextV1(authenticator=authenticator) speech_to_text.set_service_url(URL) return speech_to_text
import os from os.path import join, dirname from dotenv import load_dotenv from ibm_watson import SpeechToTextV1 from ibm_cloud_sdk_core.authenticators import IAMAuthenticator #leitura dos arquivos .env dotenv_path = join(dirname(__file__), '.env') load_dotenv(dotenv_path) #integração com ibm watson ibm_token = os.environ.get("API_KEY_IBM") api = IAMAuthenticator(ibm_token) speech_to_text = SpeechToTextV1(authenticator=api) url_service_ibm = os.environ.get("URL_IBM") speech_to_text.set_service_url(url_service_ibm) def Audio_To_Text(fileName): with open(fileName, 'rb') as audio_file: result = speech_to_text.recognize( audio=audio_file, content_type="audio/mp3" ).get_result() print(result)
from ibm_watson import SpeechToTextV1 from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions, KeywordsOptions from os.path import join, dirname import json speech_to_text = SpeechToTextV1( iam_apikey='****', url='https://stream.watsonplatform.net/speech-to-text/api') def get_per_user_transcript(fileLocation): with open(join(dirname(__file__), './.', fileLocation), 'rb') as audio_file: result = speech_to_text.recognize( audio=audio_file, content_type='audio/wav', speaker_labels='true' ).get_result() results = result['results'] timestamps = [] for res in results: current_timestamps = res['alternatives'][0]['timestamps'] current_timestamps[-1][0] += '.' for t in current_timestamps: timestamps.append(t) start_to_wordMap = {} for stamp in timestamps: start_to_wordMap[stamp[1]] = stamp[0]
from dotenv import load_dotenv from pathlib import Path # python3 only import os env_path = Path('./resources/ibm-credentials-s2t.env') print(env_path.absolute()) load_dotenv(dotenv_path=env_path) url_s2t = os.getenv("SPEECH_TO_TEXT_URL") iam_apikey_s2t = os.getenv("SPEECH_TO_TEXT_IAM_APIKEY") print(url_s2t, iam_apikey_s2t) # You create a Speech To Text Adapter object the parameters are the endpoint and API key. # http://watson-developer-cloud.github.io/python-sdk/v0.25.0/apis/watson_developer_cloud.speech_to_text_v1.html authenticator = IAMAuthenticator(iam_apikey_s2t) s2t = SpeechToTextV1(authenticator=authenticator) s2t.set_service_url(url_s2t) # the audio file that we will use to convert into text. audioFilePath = './resources/PolynomialRegressionandPipelines.mp3' # "rb" , this is similar to read mode, but it ensures the file is in binary mode. # We use the method <code>recognize</code> to return the recognized text. with open(audioFilePath, mode="rb") as wav: response = s2t.recognize(audio=wav, content_type='audio/mp3') # The attribute result contains a dictionary that includes the translation: print(response.result) from pandas import json_normalize
from ibm_watson import SpeechToTextV1 from os.path import join, dirname import json from ibm_watson.websocket import RecognizeCallback, AudioSource speech_to_text = SpeechToTextV1( iam_apikey='GFT9-N0g7zSU9FIM1YLNL7ZyzLdIJ1s_EkluUjYK8B1s', url='https://gateway-lon.watsonplatform.net/speech-to-text/api') #with open(join(dirname(__file__), './.', 'audio1.wav'), # 'rb') as audio_file: # speech_to_text.add_audio( # '{customization_id}', # 'audio1', # audio_file, # content_type='audio/wav' # ) # Poll for audio status. class MyRecognizeCallback(RecognizeCallback): def __init__(self): RecognizeCallback.__init__(self) def on_data(self, data): print( json.dumps(data['results'][0]['alternatives'][0]['transcript'], indent=2)) def on_error(self, error): print('Error received: {}'.format(error))