def record(filename, duration): mic = MutableMicrophone() recognizer = Recognizer() with mic as source: audio = recognizer.record(source, duration=duration) with open(filename, 'wb') as f: f.write(audio.get_wav_data())
def wav_to_text(wav_file_path, language="es-ES", show_all=False): r = Recognizer() with WavFile(wav_file_path) as source: audio = r.record(source) try: return r.recognize_google(audio_data=audio, language=language, show_all=show_all) except UnknownValueError: raise GolemException("Could not understand audio")
def recognize(source): r = Recognizer() source.stream.start_stream() audio = r.listen(source) source.stream.stop_stream() vprint(4, "Finished recording.") try: vprint(0, "You said " + r.recognize(audio)) except LookupError: vprint(0, "Could not understand audio")
import speech_recognition as sr from speech_recognition import Microphone, RequestError, Recognizer, UnknownValueError dir(sr) import pyaudio import webbrowser as wb r1 = Recognizer() # this is the recognizer class from speechRecognition r2 = Recognizer() r3 = Recognizer() with sr.Microphone as source: print('[search Google: search YouTube]') print('Speak Now!!') audio = r3.listen(source) if 'video' in r1.recognize_google(audio): r1 = Recognizer() url = 'https://www.youtube.com/results?search_query=' with sr.Microphone() as source: print('search your query') audio = r1.listen(source) try: get = r1.recognize_google(audio) print(get) wb.get().open_new(url + get) except UnknownValueError: print('error ') except RequestError as e: print('failed'.format(e))
def recognize_wav(filename, language="en-US", show_all=True): recognizer = Recognizer(language=language) with WavFile(filename) as source: audio_data = recognizer.record(source) return recognizer.recognize(audio_data, show_all)
def __init__(self): self.recognizer = Recognizer() self.microphone = Microphone(device_index=0, sample_rate=16000, chunk_size=256) self.is_trigger_listener_active = None self.start_listener()
from speech_recognition import Microphone, Recognizer recog=Recognizer() mic=Microphone() with mic: print("talk") audio=recog.listen(mic) recognize=recog.recognize_google(audio) print(recognize)
async def speech_to_text(event): """ Note: telethon may borrow a different DC id to download audio """ if event.reply_to_msg_id: msg = await event.get_reply_message() else: await event.edit(msgRep.REPLY_TO_VM) return filename, file_format = (None, ) * 2 voice_note = False if msg.media and hasattr(msg.media, "document") and \ isinstance(msg.media.document, Document) and \ msg.media.document.mime_type.startswith("audio"): for attribute in msg.media.document.attributes: if isinstance(attribute, DocumentAttributeAudio): if not voice_note: # set only if not True already voice_note = attribute.voice if isinstance(attribute, DocumentAttributeFilename): if not file_format: # set only if none string = attribute.file_name.split(".") file_format = string[-1] if not voice_note: await event.edit(msgRep.WORKS_WITH_VM_ONLY) return if not file_format: # alternative way file_format = msg.media.document.mime_type.split("/")[1] filename = join(TEMP_DL_DIR, "audio." + file_format) await event.edit(msgRep.CONVERT_STT) try: await msg.download_media(file=filename) except Exception as e: log.warning(e) await event.edit(msgRep.FAILED_LOAD_AUDIO) return else: await event.edit(msgRep.REPLY_TO_VM) return try: audio_file = AudioSegment.from_file(filename, file_format) audio_wav = join(TEMP_DL_DIR, "audio.wav") audio_file.export(audio_wav, "wav") r = Recognizer() with AudioFile(audio_wav) as source: audio = r.record(source) result = r.recognize_google(audio) text = f"**{msgRep.STT}**\n\n" text += f"{msgRep.STT_TEXT}:\n" text += f"__{result}__" await event.edit(text) except UnknownValueError: await event.edit(msgRep.STT_NOT_RECOGNIZED) except RequestError: await event.edit(msgRep.STT_REQ_FAILED) except MessageTooLongError: await event.edit(msgRep.STT_OUTPUT_TOO_LONG) except Exception as e: log.warning(e) await event.edit(msgRep.UNABLE_TO_STT) try: remove(filename) remove(audio_wav) except Exception as e: log.warning(f"Unable to delete audio(s): {e}") return
def get_audio(): """ Função que resolve a fala do usuário. """ # Função que reconhece a entrada da fala. mic_in = Recognizer() # Pega a entrada do microfone. # Ele vai ouvir o comando e traduzir em escrita para que as demais funções possam iterar com o comando. with Microphone() as source: # Ajusta a entrada eliminando ruidos. mic_in.adjust_for_ambient_noise(source) # Limpa informações de inicialização do terminal. system('clear') # Mostra na tela os comandos padrões. print('Comandos do sistema:') for system_commands in COMMAND_LIST: print(system_commands) user_commands = [info for info in get_command_list()] if user_commands: print('Comandos do usuário:') for comand in user_commands: print(comand) # Armazena a informação de audio na variável. audio = mic_in.listen(source) try: system('clear') print('Processando ...') # Passa o aúdio para o reconhecedor de padrões do google trasformar em frase. phrase = mic_in.recognize_google(audio, language='pt-BR') print(f'Entrada de aúdio: {phrase}') # Condição para o caso de o usuário use o comando ADD. if str(phrase).upper() == COMMAND_LIST[0]: set_command() return get_audio() if str(phrase).upper() == COMMAND_LIST[1]: return system('exit') if str(phrase).upper() not in get_command_list(): insult = get_insult(phrase) record_audio(insult) return get_audio() record_audio(get_response(phrase)) return get_audio() except UnknownValueError: insult = get_insult('Vox') record_audio(insult) return get_audio()
from speech_recognition import Recognizer, Microphone, UnknownValueError from Techmodule.MusicPlayer import * from Techmodule.FunModule import * R = Recognizer() while True: try: with Microphone() as source: print("Adjust-noise...") R.adjust_for_ambient_noise(source) print('ready to listen!') S1 = R.listen(source) print('Done!') Lis = R.recognize_google(S1, language="th-TH") T = DialogFlow(Lis) if T == "Ok": playSound("InYourCommand.wav") print("Adjust-noise...") R.adjust_for_ambient_noise(source) print("In your command!") S2 = R.listen(source) print('Done!') T2 = R.recognize_google(S2, language="th-TH") CommandLine(DialogFlow(T2)) elif T == "M-ON": print("Adjust-noise...") R.adjust_for_ambient_noise(source) print("Listen Musicname") S3 = R.listen(source) print('Done!')
def __init__(self): self.lang = config.get("Base", "lang") self.recognizer = Recognizer()
def listen(recog: sr.Recognizer): with sr.Microphone() as source: print('Speak: ') audio = recog.listen(source=source, phrase_time_limit=5000) print('Time is up') return audio
class Assistant(object): __defaultCommands = { "open": 1, "search": 2, "go to": 3, "calculate": 4, "add path": 5, "add directory": 5, "remove path": 6, "remove directory": 6, "command list": 7, "minimize": 8, "maximize": 9, "close": 10, "add command": 11, "edit command": 12, } __defaultCommands_PT_BR = { "abrir": __defaultCommands["open"], "pesquisar": __defaultCommands["search"], "procurar": __defaultCommands["search"], "ir para": __defaultCommands["go to"], "calcular": __defaultCommands["calculate"], "adicionar caminho": __defaultCommands["add path"], "adicionar diretório": __defaultCommands["add directory"], "remover caminho": __defaultCommands["remove path"], "remover diretório": __defaultCommands["remove directory"], "lista de comandos": __defaultCommands["command list"], "minimizar": __defaultCommands["minimize"], "maximizar": __defaultCommands["maximize"], "fechar": __defaultCommands["close"], "adicionar comando": __defaultCommands["add command"], "editar comando": __defaultCommands["edit command"] } __defaultLanguage = "en-us" __info = [ "Open:Run a program or open a file or folder.", "Search:Opens the browser with a search.", "Go to:Opens the browser directly to a site.", "Calculate:Calculates a simple arithmetic expression.", "Add path:Adds a directory to locate programs, files, and folders.", "Remove path:Removes a directory added by the user.", "Command list:Opens a list of all voice commands.", "Minimize:This minimizes an active window or a window with a title you specify.", "Maximize:This maximizes a window with a title you specify.", "Close:This closes an active window or a window with a title you specify.", "Add command:Opens a window for defining a command and associating a program with it.", "Edit command:Opens a window for editing the information of a command." ] __info_PT_BR = [ "Abrir:Executa um programa ou abre um arquivo ou pasta.", "Pesquisar:Abre o navegador com uma busca.", "Vá para:Abre o navegador diretamente em um site.", "Calcular:Calcula uma expressão aritmética simples.", "Adicionar caminho:Adiciona um diretório para o assistente localizar arquivos e pastas.", "Remover caminho:Remove um diretório adicionado pelo usuário.", "Lista de comandos:Abre uma lista com todos os comandos de voz.", "Minimizar:Minimiza uma janela ativa ou uma janela com um título específico.", "Maximizar:Maximiza uma janela com um título específico.", "Fechar:Fecha uma janela ativa ou uma janela com um título específico.", "Adicionar comando:Abre uma janela para definir um comando e um programa associado a ele.", "Editar comando:Abre uma janela para editar as informações de um comando." ] __settings = { "Assistant": { "Language": "EN-US", "Name": "Jarvis", }, "System": { "Paths": [], "Press to speak": "left windows + s", "Sounds": True, "ShowInput": True } } __userCommands = {} __settingsFile = "settings.json" __commandsFile = "commands.json" __help = False def __init__(self, messageBox, settingsPath, icon=None): """ O parâmetro "messageBox" deve ser um objeto de MessageBox ou de uma subclasse dela. O parâmetro "settingsPath" deve ser um diretório para carregar e salvar as configurações do assistente. """ # Verifica se messageBox é um objeto que vem de MessageBox ou sua subclasse. if not issubclass(type(messageBox), MessageBox): raise TypeError("a MessageBox object is required (got {})".format( type(messageBox))) self.__messageBox = messageBox self.__icon = icon # Carrega as configurações do assistente. self.__settingsFile = os.path.join(settingsPath, self.__settingsFile) self.__commandsFile = os.path.join(settingsPath, self.__commandsFile) self.loadSettings(self.__settingsFile) self.loadUserCommands(self.__commandsFile) # Salva as configurações do assistente. # Isso faz com que as informações do arquivo fiquem atualizadas caso alguma informação esteja faltando. self.saveSettings(self.__settingsFile) # Inicializa um objeto para reconhecimento de voz e tradução de texto. self.__recognizer = Recognizer() self.__translator = Translator() # Inicializa objeto para gerar sons. self.__sounds = Sounds() # Cria objeto da classe AssistantCommands para realizar as ações do assistente. self.__assistantCommands = AssistantCommands(self, self.__settings, self.__userCommands) # Define o idioma do assistente. language = self.__settings["Assistant"]["Language"] self.changeLanguage(language) def changeLanguage(self, language): """ Método para trocar o idioma do assistente. """ if language.lower() == "pt-br": self.__settings["Assistant"]["Language"] = "PT-BR" self.__commands = self.__defaultCommands_PT_BR else: self.__settings["Assistant"]["Language"] = "EN-US" self.__commands = self.__defaultCommands def getAssistantCommands(self): """ Método para retornar uma instância de AssistantCommands. """ return self.__assistantCommands @staticmethod def getCommandsFile(): """ Método para retornar o nome do arquivo que guarda os comandos adicionados pelo usuário. """ return Assistant.__commandsFile @staticmethod def getDefaultSettings(): """ Retorna as configurações padrão do assistente. """ return Assistant.__settings def getInfo(self): """ Método para retornar as informações de todos os comandos do assistente. """ # Obtém o idioma atual do assistente. language = self.getLanguage().lower() # Obtém as informações no idioma atual. if language == "pt-br": default_info = self.__info_PT_BR.copy() else: default_info = self.__info.copy() info = [] # Adiciona um espaço no texto. for command in default_info: info.append(" " + command) # Adiciona à lista os comandos criados pelo usuário. for command in self.__userCommands.keys(): info.append( " " + self.__userCommands[command]["Command"].capitalize() + ":" + self.__userCommands[command]["Description"].capitalize()) return info def getLanguage(self): """ Retorna o idioma atual do assistente. """ return self.__settings["Assistant"]["Language"] @staticmethod def getSettingsFile(): """ Método para retornar o nome do arquivo de configurações do assistente. """ return Assistant.__settingsFile def getTranslator(self): """ Método para retornar uma instância de Translator. """ return self.__translator def getStatus(self): """ Informa se foi solicitado ou não a parada do método run(). """ return self.__stop def help(self): """ Método para abrir uma lista com todos os comandos do assistente. """ self.__help = True def __helpList(self, info, sep=":"): """ Método para abrir lista de ajuda. """ # Separa os títulos e as descrições. commands = [] for item in info: commands.append(item.split(sep, maxsplit=1)) commands.sort(reverse=True) # Ajusta o tamanho da lista. height = len(commands) if len(commands) < 10 else 10 itemList = ItemList("Assistant Commands", height=height, icon=self.__icon) itemList.run(commands, self.getStatus) self.__help = False def isRunning(self): """ Informa se o método run() está em execução ou não. """ return self.__running def loadSettings(self, settingsFile): """ Método para carregar as configurações do assistente. """ # Cria um arquivo com as configurações padrão caso o mesmo não exista. if not os.path.exists(settingsFile): self.saveSettings(settingsFile) return self.__settings # Carrega as configurações do arquivo. with open(settingsFile) as file: settings = json.loads(file.read()) if "Assistant" in settings: self.__settings["Assistant"].update(settings["Assistant"]) if "System" in settings: self.__settings["System"].update(settings["System"]) return self.__settings def loadUserCommands(self, commandsFile): """ Método para carregar os comandos adicionados pelo usuário. """ # Cria um arquivo para que os comandos criados pelo usuário possam ser salvos. if not os.path.exists(commandsFile): self.saveUserCommands(commandsFile) return self.__userCommands # Carrega os comandos do arquivo. with open(commandsFile) as file: self.__userCommands = json.loads(file.read()) return self.__userCommands def __press_to_speak(self): """ Espera até que o usuário aperte uma determinada tecla para falar com o assistente. """ # Obtém a tecla que o usuário deve pressionar para falar com o assistente. press_to_speak = self.__settings["System"]["Press to speak"] while True: keyboard.wait(press_to_speak) if self.__stop: break if not self.__listening: self.__speak = True def run(self): """ Método para iniciar o assistente. """ self.__stop = False self.__running = True self.__speak = False self.__listening = False # Obtém a tecla que o usuário deve pressionar para falar com o assistente. press_to_speak = self.__settings["System"]["Press to speak"] # Obtém a opção de som habilitado ou não. sounds = self.__settings["System"]["Sounds"] # Obtém o idioma e o nome do assistente. language = self.getLanguage() name = self.__settings["Assistant"]["Name"] # Informa o que o usuário deve fazer para chamar o assistente. if press_to_speak: # Inicializa uma thread para esperar até que o usuário aperte uma determinada tecla. Thread(target=self.__press_to_speak).start() text, substring = 'Press "{}" to speak.', press_to_speak.replace( "+", " + ") else: text, substring = "Speak my name to talk to me.", "" self.speak(name, text, language.split("-")[0], substrings=[ substring, ], wait=True, sound=sounds) # Ouve o usuário enquanto não for pedido para o método run() parar. while not self.__stop: # Informa que o assistente não está ouvindo. self.__listening = False if self.__stop: break if self.__help: self.__helpList(self.getInfo()) self.__speak = False # Aguarda um tempo de milisegundos para que o programa não consuma muito processamento. time.sleep(0.1) # Verifica se o usuário apertou o botão ou não para falar. if press_to_speak and not self.__speak: continue elif press_to_speak: self.__speak = False self.__listening = True # Inicializa o microfone para ouvir o aúdio. with Microphone() as microphone: # Obtém as configurações do assistente. language = self.getLanguage() name = self.__settings["Assistant"]["Name"] # Obtém as configurações do sistema. paths = self.__settings["System"]["Paths"] showInput = self.__settings["System"]["ShowInput"] sounds = self.__settings["System"]["Sounds"] showInput = self.__settings["System"]["ShowInput"] # Ajusta o Recognizer para o som do ambiente atual. self.__recognizer.adjust_for_ambient_noise(microphone) # Reproduz um som, informando que o assistente está ouvindo o usuário. if sounds: self.__sounds.play_sound(self.__sounds.listening_fn) # Informa que o assistente está ouvindo. self.speak(name, "I'm listening...", language.split("-")[0], duration=2, wait=False, sound=None) # Tenta ouvir o usuário e realizar o reconhecimento de voz. try: # Obtém o aúdio do microfone. audio = self.__recognizer.listen(microphone) if self.__stop: break # Realiza o reconhecimento de voz. text = self.__recognizer.recognize_google( audio, language=language) except Exception as e: if press_to_speak: # Informa que houve algum problema ao realizar o reconhecimento de voz. self.speak( name, "I'm sorry, but I can't understand what you said.", language.split("-")[0], wait=True, sound=sounds) continue # Obtém o idioma para realizar a tradução do assistente para o usuário. language = language.split("-")[0] # Mostra o que o reconhecimento entendeu do que foi dito pelo usuário. if showInput: title = self.__translator.translate( "You said:", language, self.__defaultLanguage.split("-")[0]).text self.speak(title, '"%s"' % text, duration=2, wait=True) # Caso a opção "Aperte para falar" esteja desativada, o assistente entrará # em ação somente se o usuário falar seu nome. if not press_to_speak: # Verifica se o usuário chamou o assistente. if text.lower().find(name.lower()) == -1: continue # Caso o usuário tenha chamado pelo assistente, será obtido o texto sem o nome dele. if text.lower().find(name.lower()) != -1: text = text[text.lower().find(name.lower()) + len(name) + 1:] # Verifica se é um comando definido pelo usuário. userCommand = False for key in self.__userCommands.keys(): # Se sim, o programa obtém as informações deste comando. if text.lower() == key.lower(): command = self.__userCommands[key] userCommand = True # Verifica se o caminho do programa existe. if os.path.exists(command["Path"]): # Executa o programa. file = os.path.split(command["Path"])[-1] self.speak(name, 'Starting "{}"...', language, [ file, ], sound=sounds) self.__assistantCommands.start( command["Path"], command["Arguments"]) else: # Informa que não foi possível executar o programa. self.speak( name, "I'm sorry, but I can't run the program associated with this command.", language, duration=3, wait=True, sound=sounds) break # Volta caso tenha sido executado um comando definido pelo usuário. if userCommand: continue # Comando 0 significa que não existe um comando do assistente para o pedido do usuário. command = 0 # Tenta tranformar a primeira palavra (comando) do texto em um verbo no infinitivo. word = text.split(maxsplit=1)[0] verb = self.__translator.getVerbs(word, language, 0) if verb: text = text.replace(word, verb[0].lower()) # Obtém uma chave para executar um comando do assistente. for key in self.__commands.keys(): if text.lower().find(key) == 0: command = self.__commands[key] # Separa o comando do resto do texto. text = text[len(key) + 1:] break # Se o comando for "open", ele tentará abrir um programa, arquivo ou pasta # com o nome que o usuário disse. if command == self.__defaultCommands["open"]: try: # Procura por um programa, arquivo ou pasta com o nome que o usuário disse. path = self.__assistantCommands.find( text, paths.copy()) # Informa que o programa está sendo aberto. self.speak(name, 'Starting "{}"...', language, [ text, ], sound=sounds) # Abre o programa. AssistantCommands.start(path, "") except: # Informa que não foi possível encontrar o programa. self.speak( name, """I'm sorry, but I can't find "{}" on your computer.""", language, [ text, ], wait=True, sound=sounds) # Se o comando for "go to", ele irá abrir o site no navegador. # Caso não seja possível, ele irá pesquisar na internet pelo mesmo. elif command == self.__defaultCommands["go to"]: # Informa que ele está pesquisando pelo site. self.speak(name, 'Searching for "{}"...', language, [ text, ], sound=sounds) self.__assistantCommands.searchOnTheInternet(text) # Se o comando for "search", ele irá abrir o navegador com a busca. elif command == self.__defaultCommands["search"]: # Informa que ele está pesquisando na internet. self.speak(name, 'Searching for "{}"...', language, [ text, ], sound=sounds) self.__assistantCommands.searchOnTheInternet(text, False) # Se o comando for "calculate", ele irá tentar calcular uma expressão e dizer o resultado. elif command == self.__defaultCommands["calculate"]: try: # Tenta obter o resultado. result = self.__assistantCommands.calculate(text) # Informa o resultado. if int(result) != result: self.speak(name, "The result is {}.", language, [ str(result), ], sound=sounds) else: self.speak(name, "The result is {}.", language, [ str(int(result)), ], sound=sounds) except Exception: # Informa que não foi possível realizar o cálculo. self.speak( name, "Oops, I can't seem to do that kind of calculation.", language, wait=True, sound=sounds) # Se o comando for "add path", será aberto uma janela para o usuário definir um caminho. elif command == self.__defaultCommands["add path"]: # Se o usuário selecionou um diretório para ser adicionado, esse diretório será salvo. if self.__assistantCommands.addPath(): self.saveSettings(self.__settingsFile) self.speak( name, "This directory has been successfully added to the system.", language, sound=sounds) # Se o comando for "remove path", será aberto uma janela para o usuário # remover um diretório da lista de diretórios. elif command == self.__defaultCommands["remove path"]: # Verifica se o usuário selecionou um diretório para ser deletado. if self.__assistantCommands.deletePath(): self.saveSettings(self.__settingsFile) self.speak( name, "This directory has been successfully deleted from the system.", language, sound=sounds) # Se o comando for "command list", será aberto uma janela com a lista de todos os comandos. elif command == self.__defaultCommands["command list"]: self.help() # Se o comando for "minimize", uma janela do computador do usuário será minimizada. elif command == self.__defaultCommands["minimize"]: # Caso não exista um nome, será minimizado a janela ativa. if not text: text = None # Caso exista um nome, será verificado se existe uma janela com este nome. else: if not self.__assistantCommands.isWindow(text): self.speak( name, """I'm sorry,' but I can't find a window named "{}".""", [text], sound=sounds) return self.__assistantCommands.minimizeProgram(title=text) # Se o comando for "maximize", uma janela do computador do usuário será maximizada. elif command == self.__defaultCommands["maximize"]: if not text: return # Verifica se existe uma janela com o nome que o usuário disse. if not self.__assistantCommands.isWindow(text): self.speak( name, """I'm sorry,' but I can't find a window named "{}".""", [text], sound=sounds) self.__assistantCommands.maximizeProgram(title=text) # Se o comando for "close", será fechado um programa do computador do usuário. elif command == self.__defaultCommands["close"]: # Caso não exista um nome, será fechado a janela ativa. if not text: text = None # Caso exista um nome, será verificado se existe uma janela com este nome. else: if not self.__assistantCommands.isWindow(text): self.speak( name, """I'm sorry,' but I can't find a window named "{}".""", [text], sound=sounds) return self.__assistantCommands.closeProgram(text) # Se o comando for "add command", será aberto uma janela para que o usuário defina um comando. elif command == self.__defaultCommands["add command"]: self.__assistantCommands.setCommand(icon=self.__icon) # Se o comando for "edit command", será aberto uma janela para que o usuário edite um comando. elif command == self.__defaultCommands["edit command"]: # Define título da janela para selecionar o comando. title = self.__translator.translate( "Select a command", language, "en").text # Cria uma lista para que o usuário selecione o comando que ele deseja editar. itemList = ItemList(title, width=40, height=10, icon=self.__icon, selection=True) command = itemList.run([ " " + key.capitalize() for key in self.__userCommands.keys() ], self.getStatus) # Caso um comando tenha sido selecionado, uma janela será aberta # com as informações deste comando para edição. if command: self.__assistantCommands.setCommand( self.__userCommands[command.lower()], icon=self.__icon) else: # Informa que não foi possível executar nenhum comando. self.speak( name, "I'm sorry, but I didn't understand what you said.", language, wait=True, sound=sounds) # Informa que acabou a execução do método. self.__running = False def save(self): """ Método para savar todas as informações. """ self.saveSettings() self.saveUserCommands() def saveSettings(self, settingsFile=None): """ Método para salvar as configurações atuais do assistente. """ if not settingsFile: settingsFile = self.__settingsFile # Cria diretório caso não exista. path = os.path.split(settingsFile)[0] if not os.path.exists(path): os.mkdir(path) # Salva o arquivo. with open(settingsFile, 'w') as file: file.write(json.dumps(self.__settings, indent=2)) def saveUserCommands(self, commandsFile=None): """ Método para salvar os comandos criados pelo usuário. """ if not commandsFile: commandsFile = self.__commandsFile # Cria diretório caso não exista. path = os.path.split(commandsFile)[0] if not os.path.exists(path): os.mkdir(path) # Salva o arquivo. with open(commandsFile, 'w') as file: file.write(json.dumps(self.__userCommands, indent=2)) def speak(self, title, text, language=None, substrings=None, duration=5, wait=False, sound=False): """ Método para o assistente se comunicar com o usuário. """ # Traduz a linguagem caso exista algo no parâmetro language. if language: try: msg = self.__translator.translate( text, language, self.__defaultLanguage.split("-")[0]).text except: msg = text else: msg = text # Adiciona substrings que não foram traduzidas. if substrings: msg = msg.format(*substrings) self.__messageBox.send(title=title, message=msg, duration=duration, threaded=True) # Executa um som de mensagem. if sound: self.__sounds.play_sound(self.__sounds.message_fn) # Aguarda a mensagem fechar para prosseguir. if wait: time.sleep(duration) def stop(self): """ Encerra o método run(). """ self.__stop = True self.save()
def __init__(self, name, mic): self.name = name self.mic = mic self.recognizer = Recognizer() self.awake = False
def __init__(self): super(GoogleSTTEndpoint, self).__init__() self.google_stt_key = self.config['GOOGLE_STT_KEY'] self.recognizer = Recognizer() self.account = None self.account_shares_data = False
class GoogleSTTEndpoint(PublicEndpoint): """Endpoint to send a flac audio file with voice and get back a utterance""" def __init__(self): super(GoogleSTTEndpoint, self).__init__() self.google_stt_key = self.config['GOOGLE_STT_KEY'] self.recognizer = Recognizer() self.account = None self.account_shares_data = False def post(self): self._authenticate() self._get_account() self._check_for_open_dataset_agreement() self._write_flac_audio_file() stt_response = self._call_google_stt() response = self._build_response(stt_response) self._write_stt_result_file(response) return response, HTTPStatus.OK def _get_account(self): if self.device_id is not None: account_repo = AccountRepository(self.db) self.account = account_repo.get_account_by_device_id( self.device_id) def _check_for_open_dataset_agreement(self): for agreement in self.account.agreements: if agreement.type == OPEN_DATASET: self.account_shares_data = True def _write_flac_audio_file(self): """Save the audio file for STT tagging""" self._write_open_dataset_file(self.request.data, file_type='flac') def _write_stt_result_file(self, stt_result): """Save the STT results for tagging.""" file_contents = '\n'.join(stt_result) self._write_open_dataset_file(file_contents.encode(), file_type='stt') def _write_open_dataset_file(self, content, file_type): if self.account is not None: file_name = '{account_id}_{time}.{file_type}'.format( account_id=self.account.id, file_type=file_type, time=time()) file_path = os.path.join(SELENE_DATA_DIR, file_name) with open(file_path, 'wb') as flac_file: flac_file.write(content) def _call_google_stt(self): """Use the audio data from the request to call the Google STT API We need to replicate the first 16 bytes in the audio due a bug with the Google speech recognition library that removes the first 16 bytes from the flac file we are sending. """ lang = self.request.args['lang'] audio = self.request.data with AudioFile(BytesIO(audio[:16] + audio)) as source: recording = self.recognizer.record(source) response = self.recognizer.recognize_google(recording, key=self.google_stt_key, language=lang, show_all=True) return response def _build_response(self, stt_response): """Build the response to return to the device. Return n transcripts with the higher confidence. That is useful for the case when send a ambiguous voice file and the correct utterance is not the utterance with highest confidence and the API. """ limit = int(self.request.args['limit']) if isinstance(stt_response, dict): alternative = stt_response.get("alternative") if 'confidence' in alternative: # Sorting by confidence: alternative = sorted(alternative, key=lambda alt: alt['confidence'], reverse=True) alternative = [alt['transcript'] for alt in alternative] # client is interested in test the utterances found. if len(alternative) <= limit: response = alternative else: response = alternative[:limit] else: response = [alternative[0]['transcript']] else: response = [] return response
def __init__(self) -> None: self._recogniser = Recognizer()
def __init__(self): self.lang = LANG self.module = STT_CONFIG.get("module") self.config = STT_CONFIG.get(self.module) or {} self.credential = self.config.get("credential", {}) self.recognizer = Recognizer()
def gettingWordsFromAudio(): print(version) r = Recognizer() print("captures any speech") harvard = AudioFile('harvard.wav') with harvard as source: audio = r.record(source) print(type(audio)) print(r.recognize_google(audio)) print("") print("") print("captures any speech in the first four seconds of the file") with harvard as source: audio = r.record(source, duration=4) print(r.recognize_google(audio)) print("") print("") print( "The record() method, when used inside a with block, always moves ahead in the file stream." ) with harvard as source: audio1 = r.record(source, duration=4) audio2 = r.record(source, duration=4) print(r.recognize_google(audio1)) print(r.recognize_google(audio2)) print("") print("") print( "To capture only the second phrase in the file, you could start with an offset of four seconds and record for, say, three seconds." ) with harvard as source: audio = r.record(source, offset=4, duration=3) print(r.recognize_google(audio)) print("") print("") print("****************") print("noisy audio") jackhammer = AudioFile('jackhammer.wav') with jackhammer as source: audio = r.record(source) print(r.recognize_google(audio)) print("") print("") print( "The adjust_for_ambient_noise() method reads the first second of the file stream and calibrates the recognizer to the noise level of the audio." ) with jackhammer as source: r.adjust_for_ambient_noise(source, duration=1) audio = r.record(source) print(r.recognize_google(audio)) print("") print("") print("Prints all json alternatives") print(r.recognize_google(audio, show_all=True))
def do(): username = "" password = "" user_id = "" recognizer = Recognizer() engine = pyttsx3.init() # rate = engine.getProperty('rate') # engine.setProperty('rate', rate) voices = engine.getProperty('voices') engine.setProperty('voice', voices[2].id) data = {} engine.say("Слушаю") engine.runAndWait() # phrases = ["войти", "напиши . работает"] i = -1 with Microphone() as source: while True: i += 1 print("Слушаю Вас") try: audio = recognizer.listen(source) engine.say("услышала") engine.runAndWait() phrase = recognizer.recognize_google(audio, language=LANG) # phrase = phrases[i] print("Ваша фраза:", phrase) phrase = phrase.lower().strip() if phrase == "завершить": engine.say("Приятно было пообщаться") engine.runAndWait() break if user_id: if phrase != "": print("Обрабатываю") engine.say("Слушаю и повинуюсь") engine.runAndWait() process_command(phrase, data, engine, user_id) engine.say("Готова к новой задаче") engine.runAndWait() else: if phrase != "войти": engine.say("Не знаю, кто вы") engine.runAndWait() else: with open("auth.json", 'r') as auth: creds = json.load(auth) resp = requests.post(SERVER_AUTH + "login", json={ 'username': creds['username'], 'password': creds['password'] }) if resp.status_code == 200: user_id = resp.json()['id'] engine.say(f"Здравствуйте, {creds['username']}") engine.runAndWait() else: engine.say("Ошибка входа") engine.runAndWait() print(resp.text) except UnknownValueError: print("Говорите членораздельней") engine.say("Не понимаю вас") engine.runAndWait() except Exception as e: engine.say("Возникла ошибка с командой") engine.runAndWait() print(e)
def __init__(self, renderer=None): try: import RPi.GPIO as GPIO GPIO.setmode(GPIO.BCM) GPIO.setup(17, GPIO.OUT) GPIO.setup(27, GPIO.OUT) GPIO.setup(22, GPIO.OUT) except ImportError: logger.warning("This device doesn't have GPIO port") except RuntimeError as e: logger.error(e) pass recognizer = Recognizer() recognizer.dynamic_energy_threshold = False recognizer.energy_threshold = 1000 self.recognizer = recognizer self.microphone = Microphone() self.susi = susi self.renderer = renderer try: res = requests.get('http://ip-api.com/json').json() self.susi.update_location(longitude=res['lon'], latitude=res['lat'], country_name=res['country'], country_code=res['countryCode']) except ConnectionError as e: logger.error(e) self.config = json_config.connect('config.json') if self.config['usage_mode'] == 'authenticated': try: susi.sign_in( email=self.config['login_credentials']['email'], password=self.config['login_credentials']['password']) except Exception as e: logger.error( 'Some error occurred in login. Check you login details in config.json.\n%s', e) if self.config['hotword_engine'] == 'Snowboy': from main.hotword_engine.snowboy_detector import SnowboyDetector self.hotword_detector = SnowboyDetector() else: from main.hotword_engine.sphinx_detector import PocketSphinxDetector self.hotword_detector = PocketSphinxDetector() if self.config['WakeButton'] == 'enabled': logger.info("Susi has the wake button enabled") if self.config['Device'] == 'RaspberryPi': logger.info("Susi runs on a RaspberryPi") from ..hardware_components import RaspberryPiWakeButton self.wake_button = RaspberryPiWakeButton() else: logger.warning("Susi is not running on a RaspberryPi") self.wake_button = None else: logger.warning("Susi has the wake button disabled") self.wake_button = None
""" import sys from typing import Union import requests.exceptions from playsound import playsound from speech_recognition import (Microphone, Recognizer, RequestError, UnknownValueError, WaitTimeoutError) from executors.logger import logger from modules.models import models from modules.utils import support indicators = models.Indicators() recognizer = Recognizer() # initiates recognizer that uses google's translation def listen(timeout: Union[int, float], phrase_limit: Union[int, float], sound: bool = True, stdout: bool = True) -> Union[str, None]: """Function to activate listener, this function will be called by most upcoming functions to listen to user input. Args: timeout: Time in seconds for the overall listener to be active. phrase_limit: Time in seconds for the listener to actively listen to a sound. sound: Flag whether to play the listener indicator sound. Defaults to True unless set to False. stdout: Flag whether to print the listener status on screen. Returns: str: - Returns recognized statement from the microphone.
class Listener: def __init__(self, pin: int = 17, language: str = "en", api: str = "", mic=None) -> None: """ Initialize LED lights, button, language, microphone, and recognizer. pin: tell Pi what pin to use for LED and set it's default value to 0 (off) language: set language to the language you are translating from microphone: initialize microphone so it can be used in this program recognizer: take input from microphone and decipher it into text Microphone.list_microphone_names() to list all microphones Pass the needed device as device_index=n if program can't pick up device automatically """ self.microphone = Microphone( device_index=mic) if mic else Listener.get_mic() self.recognizer = Recognizer() self.led = PWMLED(pin) self.src = language self.target = "es" self.api = api @staticmethod def get_mic(): for index, mic in enumerate(Microphone.list_microphone_names()): if "usb" in mic.lower(): return Microphone(device_index=index) raise ValueError("USB microphone required to run.") def breathe(self, step: float = 0.0001, iterations: int = 2, rest: float = 0.5) -> None: """ Pulsating effect for LED button. step: amount to increase/decrease LED brightness by iterations: number of pulses rest: time between each pulse """ if step < 0 or step > 1: raise ValueError("Step needs to be a value between zero and one.") if rest <= 0: raise ValueError( "Rest time needs to be a positive value greater than zero.") if iterations < 0: raise ValueError("Iterations needs to be zero or greater.") # First set LED to zero (off) self.led.value = 0 for _ in range(iterations): while self.led.value <= 1 - step: self.led.value = round(self.led.value + step, 5) print(self.led.value) self.led.value = 0.90 sleep(rest) while self.led.value - step >= 0.15: self.led.value = round(self.led.value - step, 5) print(self.led.value) sleep(rest) self.led.value = 0.15 self.led.value = 0 sleep(1) def confirmation(self) -> None: """ Blinks twice and beeps when ready. """ def _confirmation(): while self.led.value < 1: self.led.value = round(self.led.value + 0.005, 5) self.led.value = 1 sleep(0.15) while self.led.value > 0: self.led.value = round(self.led.value - 0.005, 5) self.led.value = 0 sleep(0.15) self.led.value = 0 _confirmation() _confirmation() def listen(self) -> None: """Read in voice and send it to Google. Once Google transcribes it, sanitize the result.""" print("Listening to user voice now...") with self.microphone as source: self.recognizer.adjust_for_ambient_noise(source) audio = self.recognizer.listen(source) print("Now trying to interpret user voice") # If Google cloud API is explicitly given, use that. (Gives users # control over logging or not) if self.api: text = self.recognizer.recognize_google_cloud(audio, self.api) # Otherwise use Google Web API else: text = self.recognizer.recognize_google(audio) print("Sanitizing transciption") # This may be unnecessary # text = "".join([char for char in text if char.isalnum() or char.isspace()]) print("Returning transcription back to main.py") return text def set_brightness(self, target: float, increase: bool, step: float = 0.001): """ Set the brightness to LED. Brightness can go either way: up (starting at zero) or down (starting at one) and 'work' it's way into the desired target brightness. """ if target < 0 or target > 1: raise ValueError("Please enter valid target value.") if step < -1 or step > 1: raise ValueError("Please enter valid step value.") # Setting step to absolute makes the if/else statement easier # to understand step = abs(step) if increase: self.led.value = 0 while self.led.value < target: self.led.value = round(self.led.value + step, 5) else: self.led.value = 1 while self.led.value > target: self.led.value = round(self.led.value - step, 5) def set_src(self, new_lang: str) -> None: try: self.src = googletrans.LANGUAGES[new_lang] except IndexError as e: print( f"Error: Unable to find language.Keeping current source language to {self.src}.\n{e}" ) def set_target(self, new_lang: str) -> None: try: self.target = googletrans.LANGUAGES[new_lang] except IndexError as e: print( f"Error: Unable to find language. Keeping current target language to {self.target}.\n{e}" )
def __init__(self, rt): super().__init__(rt) from speech_recognition import Recognizer self.recognizer = Recognizer()
def __init__(self): self.__recognizer = Recognizer()
if "dance party" in str(recognized_audio).lower(): dance_party_audio.play() except sr.UnknownValueError: print("Google Speech Recognition could not understand audio") except sr.RequestError as e: print( "Could not request results from Google Speech Recognition service; {0}" .format(e)) # Listen for voice audio in a loop and wait for the word 'Dance party' if __name__ == '__main__': try: dance_party_audio.load_audio() recognizer = Recognizer() recognizer.energy_threshold = 200 microphone = Microphone() with microphone as source: recognizer.adjust_for_ambient_noise(source) # start listening on the mic stop_listening = recognizer.listen_in_background( microphone, onAudioReceived) while True: # "Block" the main thread ... will run additional logic to handle # Hardware here - TODO time.sleep(0.1) # Kill the background listener stop listening stop_listening()
def __init__(self): self.lang = Config.LANG self.recognizer = Recognizer()
class CommandsRecognition(ApiState): def __init__(self, callback, language="pl-PL", api_option=ApiOption.GOOGLE): super().__init__() self.callback_command_detected = callback self.api_option = api_option self.language = language self.listen_thread = None self.phrase_time_limit = 3 try: self.recognizer = Recognizer() self.microphone = Microphone() """ adjust the recognizer sensitivity to ambient noise and record audio from the microphone """ with self.microphone as source: self.recognizer.adjust_for_ambient_noise(source) except OSError as ose_err: Logger.critical("OSError: {0}".format(ose_err)) self.api_runs = False self.api_info = GLO_MSG['MICROPHONE_FAILURE'] return except Exception as err: Logger.critical("Exception: {0}".format(err)) self.api_runs = False self.api_info = GLO_MSG['MICROPHONE_FAILURE'] return Logger.debug("Initialization of CommandsRecognition class") self.api_info = GLO_MSG['MICROPHONE_INITIALIZED'] def validate_command(self, command): if command.lower() in GLO_CMD.values(): detected_command_id = GET_COMMAND(command.lower()) Logger.info("GLO_CMD command available -> {0}".format(command.lower())) self.callback_command_detected(detected_command_id) else: Logger.info("Detected command: {0}".format(command.lower())) def callback_recognition(self, recognizer, audio): try: command = self._api_recognition(audio) self.validate_command(command) except UnboundLocalError as err: Logger.warning("UnboundLocalError : {0} ".format(err)) except RequestError as err: Logger.warning("RequestError : {0} ".format(err)) except UnknownValueError as err: Logger.debug("UnknownValueError : {0} ".format(err)) def background_listen(self): self.listen_thread = self.recognizer.listen_in_background( source=self.microphone, callback=self.callback_recognition, phrase_time_limit=self.phrase_time_limit) def _api_recognition(self, audio): if self.api_option is ApiOption.GOOGLE: return self.recognizer.recognize_google(audio, language=self.language) elif self.api_option is ApiOption.GOOGLE_CLOUD: # Support languages: https://cloud.google.com/speech-to-text/docs/languages return self.recognizer.recognize_google_cloud(audio, credentials_json='', language=self.language) elif self.api_option is ApiOption.SPHINX: # Support languages : https://sourceforge.net/projects/cmusphinx/files/Acoustic%20and%20Language%20Models/ return self.recognizer.recognize_sphinx(audio, language=self.language) elif self.api_option is ApiOption.WIT: # Support languages : https://wit.ai/faq, login required return self.recognizer.recognize_wit(audio, key='',) elif self.api_option is ApiOption.AZURE_BING: # Support languages : https://docs.microsoft.com/en-us/azure/cognitive-services/bing-web-search/language-support, login required self.recognizer.recognize_bing(audio, key='', language=self.language) elif self.api_option is ApiOption.LEX: # Support languages: ONLY ENG -> https://docs.aws.amazon.com/lex/latest/dg/gl-limits.html return self.recognizer.recognize_lex(audio) elif self.api_option is ApiOption.HOUNDIFY: # Support languages: ONLY ENG, login required return self.recognizer.recognize_houndify(audio, client_id='', client_key='') elif self.api_option is ApiOption.IBM: # Support languages : https://www.ibm.com/watson/services/language-translator/, login required return self.recognizer.recognize_ibm(audio, username='', password='', language=self.language) else: Logger.error("Api recognition option is not defined")
from nltk.tokenize import sent_tokenize # from cam_recorder2 import main tts_engine = pyttsx3.init('sapi5') rate = tts_engine.getProperty('rate') tts_engine.setProperty('rate', 140) def speak_devis(text): var.set(text) interface.update() tts_engine.say(text) tts_engine.runAndWait() sr_engine = Recognizer() def record_audio(ask=False): if ask: speak_devis(ask) with Microphone() as source: var.set("Speak-up! I am listening...") interface.update() audio = sr_engine.listen(source) audio_text = '' try: var.set("Now hold-on! I am recognizing...") interface.update() audio_text = sr_engine.recognize_google(audio, language='en-in') var1.set(audio_text)
def get_audio_transcript(path_to_file, recognizer: sr.Recognizer): with sr.AudioFile(path_to_file) as source: return recognizer.recognize_google(recognizer.record(source))
def __init__(self, renderer=None): if GPIO: try: GPIO.setmode(GPIO.BCM) GPIO.setup(27, GPIO.OUT) GPIO.setup(22, GPIO.OUT) except RuntimeError as e: logger.error(e) thread1 = Thread(target=self.server_checker, name="ServerCheckerThread") thread1.daemon = True thread1.start() recognizer = Recognizer() # this was False in the old state machine, but reading the API docs # https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst # it seems that True is actually better! recognizer.dynamic_energy_threshold = True recognizer.energy_threshold = 2000 self.recognizer = recognizer self.susi = susi self.renderer = renderer self.server_url = "https://127.0.0.1:4000" self.action_schduler = ActionScheduler() self.action_schduler.start() self.event_queue = queue.Queue() self.idle = True try: res = requests.get('http://ip-api.com/json').json() self.susi.update_location( longitude=res['lon'], latitude=res['lat'], country_name=res['country'], country_code=res['countryCode']) except ConnectionError as e: logger.error(e) self.susi_config = SusiConfig() self.path_base = self.susi_config.get('path.base') self.sound_detection = os.path.abspath( os.path.join(self.path_base, self.susi_config.get('path.sound.detection'))) self.sound_problem = os.path.abspath( os.path.join(self.path_base, self.susi_config.get('path.sound.problem'))) self.sound_error_recognition = os.path.abspath( os.path.join(self.path_base, self.susi_config.get('path.sound.error.recognition'))) self.sound_error_timeout = os.path.abspath( os.path.join(self.path_base, self.susi_config.get('path.sound.error.timeout'))) if self.susi_config.get('susi.mode') == 'authenticated': try: susi.sign_in(email=self.susi_config.get('susi.user'), password=self.susi_config.get('susi.pass')) except Exception as e: logger.error('Some error occurred in login. Check you login details with susi-config.\n%s', e) if self.susi_config.get('hotword.engine') == 'Snowboy': from .hotword_engine.snowboy_detector import SnowboyDetector hotword_model = "susi.pmdl" if self.susi_config.get('hotword.model'): logger.debug("Using configured hotword model: " + self.susi_config.get('hotword.model')) hotword_model = self.susi_config.get('hotword_model') self.hotword_detector = SnowboyDetector(model=hotword_model) else: from .hotword_engine.sphinx_detector import PocketSphinxDetector self.hotword_detector = PocketSphinxDetector() if self.susi_config.get('wakebutton') == 'enabled': logger.info("Susi has the wake button enabled") if self.susi_config.get('device') == 'RaspberryPi': logger.info("Susi runs on a RaspberryPi") from .hardware_components.rpi_wake_button import RaspberryPiWakeButton self.wake_button = RaspberryPiWakeButton() else: logger.warning("Susi is not running on a RaspberryPi") self.wake_button = None else: logger.warning("Susi has the wake button disabled") self.wake_button = None if self.susi_config.get('stt') == 'deepspeech-local': self.microphone = Microphone(sample_rate=16000) else: self.microphone = Microphone() if self.hotword_detector is not None: self.hotword_detector.subject.subscribe( on_next=lambda x: self.hotword_detected_callback()) if self.wake_button is not None: self.wake_button.subject.subscribe( on_next=lambda x: self.hotword_detected_callback()) if self.renderer is not None: self.renderer.subject.subscribe( on_next=lambda x: self.hotword_detected_callback()) if self.action_schduler is not None: self.action_schduler.subject.subscribe( on_next=lambda x: self.queue_event(x))
obj = gTTS(text=greetings, lang=lang) obj.save('greetings.mp3') os.system('greetings.mp3') time.sleep(1.6) pyautogui.moveTo(1910, 10) pyautogui.click() #speech recog print('-----------------------------------') print() print('1. SHOW OPERATIONS') print('2. START OPERATIONS') print('3. THANKS JARVIS') #to end the process time.sleep(2) r = Recognizer() mic = Microphone() while True: try: print() print('-----------------------------------') print('Speak now') print('-----------------------------------') print() with mic as source: audio = r.listen(source) voice_text = r.recognize_google(audio) try: if voice_text == 'start operations': print(voice_text) r1 = Recognizer()
print(resp.text) else: js = resp.json() # print(js) if js['args'] is not None: data.update(js['args']) for code in js['codes']: exec(code, {'data': data, 'temp': temp}) if 'error' in data: print("Ошибка:", data['error']) break def temp(data): exec(data['code']) if __name__ == "__main__": r = Recognizer() audio_queue = JoinableQueue() listen_thread = Process(target=listen, args=(audio_queue, r)) listen_thread.start() recognize_thread = Process(target=recognize, args=(audio_queue, r, listen_thread.pid)) recognize_thread.start() print("Listen pid", listen_thread.pid) print("Recognize pid", recognize_thread.pid) recognize_thread.join() listen_thread.join()
redirect_uri=redirect_uri, scope=scope, username=username) spotify = sp.Spotify(auth_manager=auth_manager) # Selecting device to play from devices = spotify.devices() deviceID = None for d in devices['devices']: d['name'] = d['name'].replace('’', '\'') if d['name'] == device_name: deviceID = d['id'] break # Setup microphone and speech recognizer r = Recognizer() m = None input_mic = 'Scarlett 2i4 USB' # Use whatever is your desired input for i, microphone_name in enumerate(Microphone.list_microphone_names()): if microphone_name == input_mic: m = Microphone(device_index=i) while True: """ Commands will be entered in the specific format explained here: - the first word will be one of: 'album', 'artist', 'play' - then the name of whatever item is wanted """ with m as source: r.adjust_for_ambient_noise(source=source) audio = r.listen(source=source)