def __setup(rate, size): model_path = get_model_path() try: if (core_mode == 0): speech = LiveSpeech(verbose=False, sampling_rate=rate, buffer_size=size, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'es-es'), lm=os.path.join(model_path, 'es-20k.lm.bin'), dic=os.path.join(model_path, 'es.dict')) else: speech = LiveSpeech(audio_device=device, verbose=False, sampling_rate=rate, buffer_size=size, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'es-es'), lm=os.path.join(model_path, 'es-20k.lm.bin'), dic=os.path.join(model_path, 'es.dict')) except: print('Error al importar el disposivo de audio o modelos: ', device, ' ', model_path) sys.exit(0) return speech
class GreenHat(object): """Class to add keyword spotting functionality""" def __init__(self): self.pub_ = rospy.Publisher("/chatter", String, queue_size=10) rospy.init_node("psa_control") self.model_path = get_model_path() self.speech = LiveSpeech() # Call custom function on node shutdown """Created by pyae soan aung""" self.speech = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(self.model_path, 'en-us'), lm=os.path.join(self.model_path, 'en-us.lm.bin'), dic=os.path.join(self.model_path, 'cmudict-en-us.dict')) # All params satisfied. Starting recognizer self.start_recognizer() def start_recognizer(self): for phrase in self.speech: print(phrase) self.pub_.publish(str(phrase)) self.speech.end_utt() def stop_recognizer(self): pass
def setup_live_speech(lm, dict_path, jsgf_path, kws_threshold): global live_speech live_speech = LiveSpeech(lm=lm, hmm=os.path.join(model_path, 'en-us'), dic=dict_path, jsgf=jsgf_path, kws_threshold=kws_threshold)
def __init__(self): # Bot is listening self.SLEEPING = False # initializes the ibm tts service self.tts = start_speaker(configs['TTS_API_KEY'], configs['TTS_URL']) # starts the speech recognizer self.speech = LiveSpeech(verbose=False, sampling_rate=44100, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm="TAR8856/8856.lm", dic="TAR8856/8856.dic") self.speak("Hello! My name is Sbotify!") try: self.deviceId = SpotifyAPI().get_deviceID() except: self.speak( "You need to have Spotify opened for me to help you! Bye") sys.exit() # listening for input for sentence in self.speech: print(sentence) if self.speech.buffer_size != 0: self.handle(str(sentence))
def voice_detection(self): if self.google_key is None or self.force_pocketsphinx: for phrase in LiveSpeech(): # just one phrase!! print(phrase) return str(phrase) pass else: with sr.Microphone() as source: audio = self.r.listen(source) try: phrase = self.r.recognize_google(audio, key=self.google_key) print(phrase) return str(phrase) pass except sr.UnknownValueError: print( "Google Speech Recognition could not understand audio") except sr.RequestError as e: print(self.google_key) print( "Could not request results from Google Speech Recognition service; {0}" .format(e)) pass
def wating_keyword(self): """Start listening and return True if is called""" import os from pocketsphinx import LiveSpeech # ESPAÑOL model_path = './models/' speech = LiveSpeech( verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join( model_path, 'cmusphinx-es-5.2/model_parameters/voxforge_es_sphinx.cd_ptm_4000' ), lm=os.path.join(model_path, 'es-20k.lm.gz'), dic=os.path.join(model_path, 'es.dict')) for phrase in speech: listened = str(phrase) # print('wating: "{}"'.format(listened)) if self.activation_word in listened: return True
def run(self): if (os.getenv("SKIP_INTRO", "False") == "False"): self.tts.speak("oh, its you") time.sleep(0.25) self.tts.speak("it's been a long time") time.sleep(1.5) self.tts.speak("how have you been") if (os.getenv("SKIP_TRIGGER", "False") == "False"): speech = LiveSpeech( keyphrase=os.getenv('TRIGGERWORD', "hey alexa"), kws_threshold=1e-20, lm=False ) for phrase in speech: try: # Listen for command command = self.take_command() # Execute command self.process_command(command) except Exception as e: # Something failed print(e) else: while(True): try: # Listen for command command = self.take_command() # Execute command self.process_command(command) time.sleep(2) except Exception as e: print(e) print("finished")
def shyna_offline(): for phrase in LiveSpeech(): print("offline") res = str(phrase) print(res) if res.__contains__("SHYNA"): Shyna_convodb.check_key(res) shyna_match()
def listen_pocketsphinx(): for phrase in LiveSpeech(): print("Listening!") res = str(phrase) print(res) if res.__contains__("book meeting"): print("meeting booked") break
def liveSpeech(): for phrase in LiveSpeech(): print("Users message: '" + str(phrase) + "'") # answer as text message """t = threading.Thread(target=getAssistantResponse, args=(phrase)) t.start() t.join()""" getAssistantResponse(phrase)
def recongitionInstruction(): for phrase in LiveSpeech(): phrase = str(phrase) print(phrase) if phrase == 'open terminal': os.system('gnome-terminal') if phrase == 'google': print('open google !!') os.system('google-chrome-stable')
def resume(self): print('== START RECOGNITION ==') self.speech = LiveSpeech( verbose=False, sampling_rate=8000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(self.model_path, 'en-us'), lm=False, dic=os.path.join(self.dictionary_path, 'yes_no_sphinx.dict'), jsgf=os.path.join(self.dictionary_path, "yes_no_sphinx.gram") )
def resume(self): print("== START RECOGNITION ==") self.speech = LiveSpeech( verbose=False, sampling_rate=8000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(self.model_path, "en-us"), lm=False, dic=os.path.join(self.dictionary_path, self.dict), jsgf=os.path.join(self.dictionary_path, self.gram) )
def worker(queue): speech = LiveSpeech(sampling_rate=16000, lm='assets/dictionary/3306.lm', dic='assets/dictionary/3306.dic', audio_device='0') for phrase in speech: logging.debug('Voice input: {}, probability: {}'.format( phrase.hypothesis(), phrase.probability())) queue.put(phrase.hypothesis())
def talker(): pub = rospy.Publisher('chatter', String, queue_size=10) rospy.init_node('talker', anonymous=True) r = rospy.Rate(10) # 10hz while not rospy.is_shutdown(): speech = LiveSpeech(dic=os.path.join(model_path, 'zisyo.dict')) for i in speech: rospy.loginfo(i) pub.publish(str(i)) r.sleep()
def createRUS(self): speech = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'zero_ru.cd_cont_4000'), lm=os.path.join(model_path, 'ru.lm'), dic=os.path.join(model_path, 'ru.dic')) return speech
def QandA(number): #Difine path file_path = os.path.abspath(__file__) dic_path = file_path.replace('module/module_sub_QandA.py', 'dictionary/robocup_2019_sphinx.dict') gram_path = file_path.replace('module/module_sub_QandA.py', 'dictionary/robocup_2019_sphinx.gram') csv_path = file_path.replace('module/module_sub_QandA.py', 'dictionary/QandA/robocup_2019.csv') model_path = get_model_path() # setting qa_dictionary = {} counter = 0 # setting pocketsphinx Live_speech = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=False, dic=dic_path, jsgf=gram_path) #Make qa_dictionary from csv file with open(csv_path, 'r') as f: for line in csv.reader(f): qa_dictionary.setdefault(str(line[0]), str(line[1])) # start riddle game while counter < number: # Live_speech = LiveSpeech(no_search=False) module_beep.beep("start") # 発話開始の合図 print("- " + str(counter + 1) + " cycle -") print("\n[*] LISTENING ...") for question in Live_speech: question = str(question) print(question) if not question in qa_dictionary.keys(): continue else: module_beep.beep("stop") # 発話終了の合図 # Live_speech = LiveSpeech(no_search=True) # 音声認識ストップ print("\n----------------------------\n", question, "\n----------------------------\n") module_pico.speak(qa_dictionary[question]) counter += 1 break return 1
def __init__(self): self.pub_ = rospy.Publisher("/chatter", String, queue_size=10) rospy.init_node("psa_control") self.model_path = get_model_path() self.speech = LiveSpeech() # Call custom function on node shutdown """Created by pyae soan aung""" self.speech = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(self.model_path, 'en-us'), lm=os.path.join(self.model_path, 'en-us.lm.bin'), dic=os.path.join(self.model_path, 'cmudict-en-us.dict')) # All params satisfied. Starting recognizer self.start_recognizer()
def loader(self, lang_code): """Load language files for recognition.""" self.log.info('load language files for recognition...') code_path = os.path.join(self.config.recognition_dir, lang_code) jsgf_path = os.path.join(code_path, self.config.jsgf_dir) variants_path = os.path.join(code_path, self.config.variants_dir) self.variants = {} self.keywords = configs.language_obj(code_path, self.config.keywords_name) hmm_path = os.path.join(code_path, self.config.hmm_name) dic_file = os.path.join(code_path, self.config.dic_name + '.dic') self.voice = LiveSpeech(hmm=hmm_path, lm=False, dic=dic_file) for name in self.config.jsgf_names: file_name = os.path.join(jsgf_path, name + '.jsgf') self.voice.set_jsgf_file(name, file_name) file_name = os.path.join(variants_path, name + '.dat') with open(file_name, 'rb') as variant: self.variants[name] = pickle.load(variant).split('\n')
def voice_detection(self): speech = LiveSpeech( verbose=False, sampling_rate=RATE, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=os.path.join(model_path, 'en-us.lm.bin'), dic=os.path.join(model_path, 'cmudict-en-us.dict'), #audio_device='0' ) speech.silence_limit = 1.5 ## does this even work?? speech.prev_audio = 0.5 speech.threshold = 4500 out = '' for z in speech: out = z break return str(out)
def actuar(self): ruta = get_model_path() frases = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(ruta, 'es-es'), lm=os.path.join(ruta, 'es-20k.lm.bin'), dict=os.path.join(ruta, 'es.dict')) for frase in frases: print(frase)
def pause(): ############### # # use this module to stop lecognition # # param >> None # # return >> None # ############### global live_speech live_speech = LiveSpeech(no_search=True)
def __init__(self): self.self.preceed_list = ['and','and(2)','you','or', 'an(2)','ok', 'okay'] self.self.single_list = ['just','just(2)','normally'] self.speech= LiveSpeech( verbose=False, sampling_rate=16000, buffer_size=512,#2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=os.path.join(model_path, 'en-us.lm.bin'), dic=os.path.join(model_path, 'cmudict-en-us.dict') )
def AskQuestion(self, question, isSilent=False): if not isinstance(question, Question): raise TypeError("AskQuestion takes a Question object") if not isSilent: self.SpeakText(question.Message) model_path = get_model_path() speech = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=os.path.join(model_path, 'en-us.lm.bin'), dic=os.path.join(model_path, 'johnsell-en-us.dict')) quit = False tries = 1 response = "" for phrase in speech: response = "" if quit: speech.end_utt() speech.stop() break words = str(phrase).split(" ") print(words) numWords = 0 for wordPosition in question.Responses: isAMatch = False for word in wordPosition: if words[numWords] == word: isAMatch = True response += words[numWords] + " " if (numWords >= question.WordsInResponse - 1) and isAMatch: quit = True break if not isAMatch: break numWords = numWords + 1 tries = tries + 1 if quit: break else: # If not quit here, the spoken phrase does not match the # response anticipated. Tell the user! self.SpeakText("What was that again?") print("QUIT: " + str(quit)) return response
def sphinx_start(): model_path = get_model_path() speech = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=os.path.join(model_path, 'en-us.lm.bin'), dic=os.path.join(model_path, 'cmudict-en-us.dict')) for phrase in speech: print(phrase)
def sphinx(): speech = LiveSpeech(verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'zero_ru.cd_cont_4000'), lm=False, jsgf=os.path.join(model_path, 'grammar.jsgf'), dic=os.path.join(model_path, 'russian.dict')) def speech(): for phrase in speech: return phrase
def listenWakeWord(self): print('Waiting Wake Word...') speech = LiveSpeech( hmm=self.hmm, lm=self.models['min']['lm'], dic=self.models['min']['dic'] ) for phrase in speech: if self.config['wake_word'] == self.treatCommand(phrase): os.system("espeak 'Yes?'") self.listenCommands() break
def sendcommand(): #-- terminal REGION while True: logging.info("Ready") print("Ready") data = input(">") if data == "talk": speech = LiveSpeech( verbose=False, sampling_rate=16000, buffer_size=2048, no_search=False, full_utt=False, hmm=os.path.join(model_path, 'en-us'), lm=os.path.join(model_path, 'en-us.lm.bin'), dic=os.path.join(model_path, 'cmudict-en-us.dict') ) for phrase in speech: logging.info(phrase) print("toi la van") break logging.info("Sending command..") print("Sending command..") try: c.send(data.encode()) #encode -- nhớ decode :)) except: logging.error("Cannot sent command to client:",c.getsockname()) print("Cannot sent command to client:",c.getsockname()) else: datacode = c.recv(1024) datacode = datacode.decode() logging.info("Client code: ",datacode) print("Client code: ",datacode) try: datacode = int(datacode) except : logging.info("Unexpected error") print("Unexpected error") else: if datacode == 0: logging.info("Command not found") print("Command not found") if datacode == 9: c.close() break
def listen(self): self.cosmo.logger.debug("Preparing Trigger Handler") from pocketsphinx import LiveSpeech speech = LiveSpeech(lm=False, keyphrase=self.trigger, kws_threshold=1e-20) self.cosmo.logger.debug("Listening for Trigger Word") for phrase in speech: print(phrase.segments(detailed=True)) from pydub import AudioSegment from pydub.playback import play import os play(AudioSegment.from_file(os.path.abspath("cosmo/assets/sounds/activate.wav"), format="wav")) self.cosmo.logger.debug("Trigger Word Found") self.callback_func()
def entrada(): entrada = rospy.Publisher("entrada", String, queue_size=1) #mudei o tamanho da fila controle = rospy.Publisher("controle", String, queue_size=1) #mudei o tamanho da fila rospy.init_node('comandos', anonymous=True) rate = rospy.Rate(1) #Taxa de 1Hz(1 vez por segundo) while not rospy.is_shutdown(): for phrase in LiveSpeech(): com = phrase hello_str = str(com) rospy.loginfo(hello_str) entrada.publish(str(com)) controle.publish(str(com)) rate.sleep()