def process_file(self, path):
        sys.path.append(_path('rhino/binding/python'))
        from rhino import Rhino

        rhino = Rhino(library_path=self._library_path,
                      model_path=self._model_path,
                      context_path=self._context_path)

        pcm, sample_rate = soundfile.read(path, dtype='int16')
        assert pcm.ndim == 1
        assert sample_rate == rhino.sample_rate

        is_finalized = False
        start_index = 0
        while start_index < (len(pcm) -
                             rhino.frame_length) and not is_finalized:
            end_index = start_index + rhino.frame_length
            is_finalized = rhino.process(pcm[start_index:end_index])
            start_index = end_index

        if not is_finalized:
            result = None
        else:
            if rhino.is_understood():
                intent, slot_values = rhino.get_intent()
                result = dict(intent=intent, slots=slot_values)
            else:
                result = None

        return result
Example #2
0
def process_file(path):
    rhino = Rhino(
        library_path=LIB_PATH,
        model_file_path=MODEL_PATH,
        context_file_path=CONTEXT_PATH)

    pcm, sample_rate = soundfile.read(path, dtype='int16')
    assert pcm.ndim == 1
    assert sample_rate == rhino.sample_rate

    is_finalized = False
    start_index = 0
    while start_index < (len(pcm) - rhino.frame_length) and not is_finalized:
        end_index = start_index + rhino.frame_length
        is_finalized = rhino.process(pcm[start_index: end_index])
        start_index = end_index

    if not is_finalized:
        intent = None
    else:
        if rhino.is_understood():
            intent, slot_values = rhino.get_intent()
            intent = dict(intent=intent, slots=slot_values)
        else:
            intent = None

    return intent
Example #3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--input_audio_file_path',
                        help='absolute path to input audio file',
                        required=True)

    parser.add_argument('--context_file_path',
                        help="absolute path to Rhino's context file",
                        required=True)

    parser.add_argument('--library_path',
                        help="absolute path to dynamic library",
                        default=RHINO_LIBRARY_PATH)

    parser.add_argument('--model_file_path',
                        help='absolute path to model parameter file',
                        default=RHINO_MODEL_FILE_PATH)

    args = parser.parse_args()

    rhino = Rhino(library_path=args.library_path,
                  model_path=args.model_file_path,
                  context_path=args.context_file_path)

    def _frame_index_to_sec(frame_index):
        return float(frame_index * rhino.frame_length) / float(
            rhino.sample_rate)

    audio, sample_rate = soundfile.read(args.input_audio_file_path,
                                        dtype='int16')
    assert sample_rate == rhino.sample_rate

    num_frames = len(audio) // rhino.frame_length
    for i in range(num_frames):
        frame = audio[i * rhino.frame_length:(i + 1) * rhino.frame_length]
        is_finalized = rhino.process(frame)
        if is_finalized:
            if rhino.is_understood():
                intent, slot_values = rhino.get_intent()
                print()
                print('intent : %s at time: %f' %
                      (intent, _frame_index_to_sec(i)))
                for slot, value in slot_values.items():
                    print('%s: %s' % (slot, value))
            else:
                print("didn't understand the command")
            break

    rhino.delete()
Example #4
0
    def setUpClass(cls):
        _language_to_contexts = {
            'en': ['coffee_maker'],
            'es': ['iluminación_inteligente'],
            'de': ['beleuchtung']
        }

        cls.rhinos = dict()
        for language in _language_to_contexts:
            cls.rhinos[language] = dict()
            for context in _language_to_contexts[language]:
                cls.rhinos[language][context] = Rhino(
                    access_key=sys.argv[1],
                    library_path=pv_library_path('../..'),
                    model_path=cls.__pv_model_path_by_language(
                        '../..', language),
                    context_path=cls.__context_path(context, language))
Example #5
0
    def run(self):
        """
         Creates an input audio stream, initializes wake word detection (Porcupine) and speech to intent (Rhino)
         engines, and monitors the audio stream for occurrences of the wake word and then infers the intent from speech
         command that follows.
         """

        porcupine = None
        rhino = None
        pa = None
        audio_stream = None

        wake_phrase_detected = False
        intent_extraction_is_finalized = False

        try:
            porcupine = Porcupine(
                library_path=self._porcupine_library_path,
                model_file_path=self._porcupine_model_file_path,
                keyword_file_paths=[self._porcupine_keyword_file_path],
                sensitivities=[self._porcupine_sensitivity])

            rhino = Rhino(library_path=self._rhino_library_path,
                          model_file_path=self._rhino_model_file_path,
                          context_file_path=self._rhino_context_file_path)
            print(rhino.context_expressions)

            pa = pyaudio.PyAudio()

            audio_stream = pa.open(rate=porcupine.sample_rate,
                                   channels=1,
                                   format=pyaudio.paInt16,
                                   input=True,
                                   frames_per_buffer=porcupine.frame_length,
                                   input_device_index=self._input_device_index)

            # NOTE: This is true now and will be correct possibly forever. If it changes the logic below need to change.
            assert porcupine.frame_length == rhino.frame_length

            while True:
                pcm = audio_stream.read(porcupine.frame_length)
                pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)

                if self._output_path is not None:
                    self._recorded_frames.append(pcm)

                if not wake_phrase_detected:
                    wake_phrase_detected = porcupine.process(pcm)
                    if wake_phrase_detected:
                        print('detected wake phrase')
                elif not intent_extraction_is_finalized:
                    intent_extraction_is_finalized = rhino.process(pcm)
                else:
                    if rhino.is_understood():
                        intent, slot_values = rhino.get_intent()
                        print('intent: %s' % intent)
                        print('---')
                        for slot, value in slot_values.items():
                            print('%s: %s' % (slot, value))
                        print()
                    else:
                        print("didn't understand the command")

                    rhino.reset()
                    wake_phrase_detected = False
                    intent_extraction_is_finalized = False

        except KeyboardInterrupt:
            print('stopping ...')

        finally:
            if porcupine is not None:
                porcupine.delete()

            if rhino is not None:
                rhino.delete()

            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if self._output_path is not None and len(
                    self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames,
                                                axis=0).astype(np.int16)
                soundfile.write(self._output_path,
                                recorded_audio,
                                samplerate=porcupine.sample_rate,
                                subtype='PCM_16')
Example #6
0
    def run(self):
        def _frame_index_to_sec(frame_index):
            return (float(frame_index * rhino.frame_length) /
                    float(rhino.sample_rate)) - float(1)

        """
         Creates an input audio stream, initializes wake word detection (Porcupine) and speech to intent (Rhino)
         engines, and monitors the audio stream for occurrences of the wake word and then infers the intent from speech
         command that follows.
         """

        porcupine = None
        rhino = None
        pa = None
        audio_stream = None

        wake_phrase_detected = True
        intent_extraction_is_finalized = False
        Apath = Video_to_Audio(self._video_path)
        wf = wave.Wave_read(Apath)
        ww, sr = soundfile.read(Video_to_Audio(self._video_path))
        print(len(ww))
        try:
            porcupine = Porcupine(
                library_path=self._porcupine_library_path,
                model_file_path=self._porcupine_model_file_path,
                keyword_file_paths=[self._porcupine_keyword_file_path],
                sensitivities=[0.5],
            )

            rhino = Rhino(
                library_path=self._rhino_library_path,
                model_path=self._rhino_model_file_path,
                context_path=self._rhino_context_file_path,
                sensitivity=0.6,
            )

            print()
            print(
                "****************************** context ******************************"
            )
            print(rhino.context_info)
            print(
                "*********************************************************************"
            )
            print()

            pa = pyaudio.PyAudio()

            audio_stream = pa.open(
                rate=porcupine.sample_rate,
                channels=1,
                format=pyaudio.paInt16,
                input=True,
                frames_per_buffer=porcupine.frame_length,
                input_device_index=self._input_device_index,
            )

            test = 0
            Tpath = Apath.replace("wav", "txt")
            f = open(Tpath, "w")

            ouput = ""
            classtr = ""
            startcount = 0
            endcount = 0
            cango = 1
            checkfirst = 0
            data_csv = [["Class_num", "Start_time", "End_time"]]
            ClassNum = None
            Start_time = None
            Start_time2 = None
            ClassNum2 = None
            rm = None
            # NOTE: This is true now and will be correct possibly forever. If it changes the logic below need to change.
            assert porcupine.frame_length == rhino.frame_length
            try:
                while True:

                    date = wf.readframes(porcupine.frame_length)
                    pcm = audio_stream.read(porcupine.frame_length,
                                            exception_on_overflow=False)

                    pcm = struct.unpack_from("h" * porcupine.frame_length,
                                             date)

                    if self._output_path is not None:
                        self._recorded_frames.append(pcm)

                    if not wake_phrase_detected:
                        wake_phrase_detected = porcupine.process(pcm)

                        if wake_phrase_detected:
                            print("detected wake phrase")
                    elif not intent_extraction_is_finalized:
                        intent_extraction_is_finalized = rhino.process(pcm)

                    else:

                        if rhino.is_understood():
                            cango = 1
                            intent, slot_values = rhino.get_intent()
                            print()
                            if intent == "EndWork":

                                endcount += 1
                                classstr = " - %s" % _frame_index_to_sec(test)

                            else:
                                checkfirst += 1
                                startcount += 1
                                endcount = 0
                                for slot, value in slot_values.items():
                                    print("%s: %s" % (slot, value))
                                    classstr = ("%s: %s" % (slot, value)) + (
                                        " start time is %s" %
                                        _frame_index_to_sec(test))
                                    if startcount == 2:
                                        Start_time2 = Start_time
                                        ClassNum2 = ClassNum
                                    Start_time = _frame_index_to_sec(test)
                                    ClassNum = value
                            print()

                            print("intent : %s at time: %f" %
                                  (intent, _frame_index_to_sec(test)))
                            print()
                        else:
                            print("didn't understand the command")
                            cango = 0

                        rhino.reset()
                        wake_phrase_detected = True
                        intent_extraction_is_finalized = False
                        print(startcount, endcount)
                        print(ouput)

                        if cango:
                            if endcount == 1 and startcount == 0:
                                ouput = classstr
                                f.write("-1 class end at" + ouput + "\n")
                                endcount = 0
                                ouput = ""
                                data_csv.append(
                                    ["-1", "-1",
                                     _frame_index_to_sec(test)])
                            elif ouput == "" and endcount == 0 and startcount == 1:
                                ouput = classstr

                            elif ouput != "" and endcount == 1:
                                try:
                                    data_csv.remove(rm)
                                except:
                                    pass
                                data_csv.append([
                                    ClassNum, Start_time,
                                    _frame_index_to_sec(test)
                                ])
                                ouput += classstr
                                endcount = 0
                                startcount = 0
                                f.write(ouput + "\n")
                                ouput = ""
                            elif endcount == 0 and startcount == 2:
                                if checkfirst == 2:
                                    data_csv.append(
                                        [ClassNum2, Start_time2, "-1"])

                                    f.write(ouput + "\n")
                                data_csv.append([ClassNum, Start_time, "-1"])
                                rm = [ClassNum, Start_time, "-1"]
                                ouput = classstr
                                f.write(ouput + "\n")
                                startcount = 1

                    test += 1
            except:
                print("EOF")
                print(_frame_index_to_sec(test))
                data_csv.append(["Maybe miss", classstr, classstr])
                f.write("Могла быть упущенная метка : %s" % classstr)
                with open("sw_data_new.csv", "w") as f:
                    writer = csv.writer(f)
                    for row in data_csv:
                        writer.writerow(row)

        except KeyboardInterrupt:
            print("stopping ...")

        finally:
            if porcupine is not None:
                porcupine.delete()

            if rhino is not None:
                rhino.delete()

            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if self._output_path is not None and len(
                    self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames,
                                                axis=0).astype(np.int16)
                soundfile.write(
                    os.path.expanduser(self._output_path),
                    recorded_audio,
                    samplerate=porcupine.sample_rate,
                    subtype="PCM_16",
                )
Example #7
0
 def setUpClass(cls):
     cls.rhino = Rhino(
         library_path=cls._library_path(),
         model_file_path=cls._abs_path('lib/common/rhino_params.pv'),
         context_file_path=cls._context_file_path())
Example #8
0
 def setUpClass(cls):
     cls.rhino = Rhino(library_path=pv_library_path('../..'),
                       model_path=pv_model_path('../..'),
                       context_path=cls._context_path())
Example #9
0
target_device_desc = None
for idx in range(0, host_device_count):
    desc = pa.get_device_info_by_index(idx)
    print(idx, desc['name'])
    if target_device_name in desc['name']:
        print('Found target device', idx, desc['name'], desc)
        target_device_desc = desc
        break
if target_device_desc == None:
    print('Could not find target device', target_device_name)
    exit(1)

porcupine_handle = pvporcupine.create(keywords=['computer'],
                                      sensitivities=[0.9])
rhino_handle = Rhino(context_file_path="/host_disk/src/rhino.rhn",
                     model_file_path="/host_disk/src/rhino.pv",
                     library_path="/host_disk/src/libpv_rhino.so")
frame_len = porcupine_handle.frame_length

g = {}
g['woke'] = False
g['timer'] = time.time()
g['intent'] = False


def _audio_callback(in_data, frame_count, time_info, status):
    if frame_count >= frame_len:
        pcm = struct.unpack_from("h" * frame_len, in_data)
        should_wake = porcupine_handle.process(pcm)
        if should_wake and not g['intent']:
            g['woke'] = True
Example #10
0
 def setUpClass(cls):
     cls.rhino = Rhino(
         library_path=RHINO_LIBRARY_PATH,
         model_path=RHINO_MODEL_FILE_PATH,
         context_path=CONTEXT_FILE_PATHS['coffee_maker'])