def process_file(self, path): sys.path.append(_path('rhino/binding/python')) from rhino import Rhino rhino = Rhino(library_path=self._library_path, model_path=self._model_path, context_path=self._context_path) pcm, sample_rate = soundfile.read(path, dtype='int16') assert pcm.ndim == 1 assert sample_rate == rhino.sample_rate is_finalized = False start_index = 0 while start_index < (len(pcm) - rhino.frame_length) and not is_finalized: end_index = start_index + rhino.frame_length is_finalized = rhino.process(pcm[start_index:end_index]) start_index = end_index if not is_finalized: result = None else: if rhino.is_understood(): intent, slot_values = rhino.get_intent() result = dict(intent=intent, slots=slot_values) else: result = None return result
def process_file(path): rhino = Rhino( library_path=LIB_PATH, model_file_path=MODEL_PATH, context_file_path=CONTEXT_PATH) pcm, sample_rate = soundfile.read(path, dtype='int16') assert pcm.ndim == 1 assert sample_rate == rhino.sample_rate is_finalized = False start_index = 0 while start_index < (len(pcm) - rhino.frame_length) and not is_finalized: end_index = start_index + rhino.frame_length is_finalized = rhino.process(pcm[start_index: end_index]) start_index = end_index if not is_finalized: intent = None else: if rhino.is_understood(): intent, slot_values = rhino.get_intent() intent = dict(intent=intent, slots=slot_values) else: intent = None return intent
def main(): parser = argparse.ArgumentParser() parser.add_argument('--input_audio_file_path', help='absolute path to input audio file', required=True) parser.add_argument('--context_file_path', help="absolute path to Rhino's context file", required=True) parser.add_argument('--library_path', help="absolute path to dynamic library", default=RHINO_LIBRARY_PATH) parser.add_argument('--model_file_path', help='absolute path to model parameter file', default=RHINO_MODEL_FILE_PATH) args = parser.parse_args() rhino = Rhino(library_path=args.library_path, model_path=args.model_file_path, context_path=args.context_file_path) def _frame_index_to_sec(frame_index): return float(frame_index * rhino.frame_length) / float( rhino.sample_rate) audio, sample_rate = soundfile.read(args.input_audio_file_path, dtype='int16') assert sample_rate == rhino.sample_rate num_frames = len(audio) // rhino.frame_length for i in range(num_frames): frame = audio[i * rhino.frame_length:(i + 1) * rhino.frame_length] is_finalized = rhino.process(frame) if is_finalized: if rhino.is_understood(): intent, slot_values = rhino.get_intent() print() print('intent : %s at time: %f' % (intent, _frame_index_to_sec(i))) for slot, value in slot_values.items(): print('%s: %s' % (slot, value)) else: print("didn't understand the command") break rhino.delete()
def setUpClass(cls): _language_to_contexts = { 'en': ['coffee_maker'], 'es': ['iluminación_inteligente'], 'de': ['beleuchtung'] } cls.rhinos = dict() for language in _language_to_contexts: cls.rhinos[language] = dict() for context in _language_to_contexts[language]: cls.rhinos[language][context] = Rhino( access_key=sys.argv[1], library_path=pv_library_path('../..'), model_path=cls.__pv_model_path_by_language( '../..', language), context_path=cls.__context_path(context, language))
def run(self): """ Creates an input audio stream, initializes wake word detection (Porcupine) and speech to intent (Rhino) engines, and monitors the audio stream for occurrences of the wake word and then infers the intent from speech command that follows. """ porcupine = None rhino = None pa = None audio_stream = None wake_phrase_detected = False intent_extraction_is_finalized = False try: porcupine = Porcupine( library_path=self._porcupine_library_path, model_file_path=self._porcupine_model_file_path, keyword_file_paths=[self._porcupine_keyword_file_path], sensitivities=[self._porcupine_sensitivity]) rhino = Rhino(library_path=self._rhino_library_path, model_file_path=self._rhino_model_file_path, context_file_path=self._rhino_context_file_path) print(rhino.context_expressions) pa = pyaudio.PyAudio() audio_stream = pa.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length, input_device_index=self._input_device_index) # NOTE: This is true now and will be correct possibly forever. If it changes the logic below need to change. assert porcupine.frame_length == rhino.frame_length while True: pcm = audio_stream.read(porcupine.frame_length) pcm = struct.unpack_from("h" * porcupine.frame_length, pcm) if self._output_path is not None: self._recorded_frames.append(pcm) if not wake_phrase_detected: wake_phrase_detected = porcupine.process(pcm) if wake_phrase_detected: print('detected wake phrase') elif not intent_extraction_is_finalized: intent_extraction_is_finalized = rhino.process(pcm) else: if rhino.is_understood(): intent, slot_values = rhino.get_intent() print('intent: %s' % intent) print('---') for slot, value in slot_values.items(): print('%s: %s' % (slot, value)) print() else: print("didn't understand the command") rhino.reset() wake_phrase_detected = False intent_extraction_is_finalized = False except KeyboardInterrupt: print('stopping ...') finally: if porcupine is not None: porcupine.delete() if rhino is not None: rhino.delete() if audio_stream is not None: audio_stream.close() if pa is not None: pa.terminate() if self._output_path is not None and len( self._recorded_frames) > 0: recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16) soundfile.write(self._output_path, recorded_audio, samplerate=porcupine.sample_rate, subtype='PCM_16')
def run(self): def _frame_index_to_sec(frame_index): return (float(frame_index * rhino.frame_length) / float(rhino.sample_rate)) - float(1) """ Creates an input audio stream, initializes wake word detection (Porcupine) and speech to intent (Rhino) engines, and monitors the audio stream for occurrences of the wake word and then infers the intent from speech command that follows. """ porcupine = None rhino = None pa = None audio_stream = None wake_phrase_detected = True intent_extraction_is_finalized = False Apath = Video_to_Audio(self._video_path) wf = wave.Wave_read(Apath) ww, sr = soundfile.read(Video_to_Audio(self._video_path)) print(len(ww)) try: porcupine = Porcupine( library_path=self._porcupine_library_path, model_file_path=self._porcupine_model_file_path, keyword_file_paths=[self._porcupine_keyword_file_path], sensitivities=[0.5], ) rhino = Rhino( library_path=self._rhino_library_path, model_path=self._rhino_model_file_path, context_path=self._rhino_context_file_path, sensitivity=0.6, ) print() print( "****************************** context ******************************" ) print(rhino.context_info) print( "*********************************************************************" ) print() pa = pyaudio.PyAudio() audio_stream = pa.open( rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length, input_device_index=self._input_device_index, ) test = 0 Tpath = Apath.replace("wav", "txt") f = open(Tpath, "w") ouput = "" classtr = "" startcount = 0 endcount = 0 cango = 1 checkfirst = 0 data_csv = [["Class_num", "Start_time", "End_time"]] ClassNum = None Start_time = None Start_time2 = None ClassNum2 = None rm = None # NOTE: This is true now and will be correct possibly forever. If it changes the logic below need to change. assert porcupine.frame_length == rhino.frame_length try: while True: date = wf.readframes(porcupine.frame_length) pcm = audio_stream.read(porcupine.frame_length, exception_on_overflow=False) pcm = struct.unpack_from("h" * porcupine.frame_length, date) if self._output_path is not None: self._recorded_frames.append(pcm) if not wake_phrase_detected: wake_phrase_detected = porcupine.process(pcm) if wake_phrase_detected: print("detected wake phrase") elif not intent_extraction_is_finalized: intent_extraction_is_finalized = rhino.process(pcm) else: if rhino.is_understood(): cango = 1 intent, slot_values = rhino.get_intent() print() if intent == "EndWork": endcount += 1 classstr = " - %s" % _frame_index_to_sec(test) else: checkfirst += 1 startcount += 1 endcount = 0 for slot, value in slot_values.items(): print("%s: %s" % (slot, value)) classstr = ("%s: %s" % (slot, value)) + ( " start time is %s" % _frame_index_to_sec(test)) if startcount == 2: Start_time2 = Start_time ClassNum2 = ClassNum Start_time = _frame_index_to_sec(test) ClassNum = value print() print("intent : %s at time: %f" % (intent, _frame_index_to_sec(test))) print() else: print("didn't understand the command") cango = 0 rhino.reset() wake_phrase_detected = True intent_extraction_is_finalized = False print(startcount, endcount) print(ouput) if cango: if endcount == 1 and startcount == 0: ouput = classstr f.write("-1 class end at" + ouput + "\n") endcount = 0 ouput = "" data_csv.append( ["-1", "-1", _frame_index_to_sec(test)]) elif ouput == "" and endcount == 0 and startcount == 1: ouput = classstr elif ouput != "" and endcount == 1: try: data_csv.remove(rm) except: pass data_csv.append([ ClassNum, Start_time, _frame_index_to_sec(test) ]) ouput += classstr endcount = 0 startcount = 0 f.write(ouput + "\n") ouput = "" elif endcount == 0 and startcount == 2: if checkfirst == 2: data_csv.append( [ClassNum2, Start_time2, "-1"]) f.write(ouput + "\n") data_csv.append([ClassNum, Start_time, "-1"]) rm = [ClassNum, Start_time, "-1"] ouput = classstr f.write(ouput + "\n") startcount = 1 test += 1 except: print("EOF") print(_frame_index_to_sec(test)) data_csv.append(["Maybe miss", classstr, classstr]) f.write("Могла быть упущенная метка : %s" % classstr) with open("sw_data_new.csv", "w") as f: writer = csv.writer(f) for row in data_csv: writer.writerow(row) except KeyboardInterrupt: print("stopping ...") finally: if porcupine is not None: porcupine.delete() if rhino is not None: rhino.delete() if audio_stream is not None: audio_stream.close() if pa is not None: pa.terminate() if self._output_path is not None and len( self._recorded_frames) > 0: recorded_audio = np.concatenate(self._recorded_frames, axis=0).astype(np.int16) soundfile.write( os.path.expanduser(self._output_path), recorded_audio, samplerate=porcupine.sample_rate, subtype="PCM_16", )
def setUpClass(cls): cls.rhino = Rhino( library_path=cls._library_path(), model_file_path=cls._abs_path('lib/common/rhino_params.pv'), context_file_path=cls._context_file_path())
def setUpClass(cls): cls.rhino = Rhino(library_path=pv_library_path('../..'), model_path=pv_model_path('../..'), context_path=cls._context_path())
target_device_desc = None for idx in range(0, host_device_count): desc = pa.get_device_info_by_index(idx) print(idx, desc['name']) if target_device_name in desc['name']: print('Found target device', idx, desc['name'], desc) target_device_desc = desc break if target_device_desc == None: print('Could not find target device', target_device_name) exit(1) porcupine_handle = pvporcupine.create(keywords=['computer'], sensitivities=[0.9]) rhino_handle = Rhino(context_file_path="/host_disk/src/rhino.rhn", model_file_path="/host_disk/src/rhino.pv", library_path="/host_disk/src/libpv_rhino.so") frame_len = porcupine_handle.frame_length g = {} g['woke'] = False g['timer'] = time.time() g['intent'] = False def _audio_callback(in_data, frame_count, time_info, status): if frame_count >= frame_len: pcm = struct.unpack_from("h" * frame_len, in_data) should_wake = porcupine_handle.process(pcm) if should_wake and not g['intent']: g['woke'] = True
def setUpClass(cls): cls.rhino = Rhino( library_path=RHINO_LIBRARY_PATH, model_path=RHINO_MODEL_FILE_PATH, context_path=CONTEXT_FILE_PATHS['coffee_maker'])