Example #1
0
    def __init__(self,
                 keyword_path,
                 context_path,
                 porcupine_library_path=None,
                 porcupine_model_path=None,
                 porcupine_sensitivity=0.5,
                 rhino_library_path=None,
                 rhino_model_path=None,
                 rhino_sensitivity=0.5,
                 output_path=None):
        super(PicovoiceDemo, self).__init__()

        self._picovoice = Picovoice(
            keyword_path=keyword_path,
            wake_word_callback=self._wake_word_callback,
            context_path=context_path,
            inference_callback=self._inference_callback,
            porcupine_library_path=porcupine_library_path,
            porcupine_model_path=porcupine_model_path,
            porcupine_sensitivity=porcupine_sensitivity,
            rhino_library_path=rhino_library_path,
            rhino_model_path=rhino_model_path,
            rhino_sensitivity=rhino_sensitivity)

        self.output_path = output_path
        if self.output_path is not None:
            self._recorded_frames = list()
    def setUp(self):
        self._pv = Picovoice(
            keyword_path=pvporcupine.KEYWORD_PATHS['picovoice'],
            wake_word_callback=self._wake_word_callback,
            context_path=self._context_path(),
            inference_callback=self._inference_callback)

        self._is_wake_word_detected = False
        self._inference = None
Example #3
0
    def __init__(self,
                 keyword_path,
                 context_path,
                 porcupine_sensitivity=0.75,
                 rhino_sensitivity=0.25):
        super(PicovoiceDemo, self).__init__()

        def inference_callback(inference):
            return self._inference_callback(inference)

        self._picovoice = Picovoice(
            keyword_path=keyword_path,
            wake_word_callback=self._wake_word_callback,
            context_path=context_path,
            inference_callback=inference_callback,
            porcupine_sensitivity=porcupine_sensitivity,
            rhino_sensitivity=rhino_sensitivity)

        self._context = self._picovoice.context_info

        self._color = 'blue'
Example #4
0
    def run(self):
        pv = None
        py_audio = None
        audio_stream = None

        try:
            pv = Picovoice(keyword_path=self._keyword_path(),
                           porcupine_sensitivity=0.75,
                           wake_word_callback=self._wake_word_callback,
                           context_path=self._context_path(),
                           inference_callback=self._inference_callback)

            print(pv.context_info)

            py_audio = pyaudio.PyAudio()
            audio_stream = py_audio.open(rate=pv.sample_rate,
                                         channels=1,
                                         format=pyaudio.paInt16,
                                         input=True,
                                         frames_per_buffer=pv.frame_length)

            self._is_ready = True

            while not self._stop:
                pcm = audio_stream.read(pv.frame_length)
                pcm = struct.unpack_from("h" * pv.frame_length, pcm)
                pv.process(pcm)
        finally:
            if audio_stream is not None:
                audio_stream.close()
            if py_audio is not None:
                py_audio.terminate()

            if pv is not None:
                pv.delete()

        self._is_stopped = True
Example #5
0
class PicovoiceDemo(Thread):
    def __init__(self,
                 keyword_path,
                 context_path,
                 porcupine_sensitivity=0.75,
                 rhino_sensitivity=0.25):
        super(PicovoiceDemo, self).__init__()

        def inference_callback(inference):
            return self._inference_callback(inference)

        self._picovoice = Picovoice(
            keyword_path=keyword_path,
            wake_word_callback=self._wake_word_callback,
            context_path=context_path,
            inference_callback=inference_callback,
            porcupine_sensitivity=porcupine_sensitivity,
            rhino_sensitivity=rhino_sensitivity)

        self._context = self._picovoice.context_info

        self._color = 'blue'

    @staticmethod
    def _set_color(color):
        for i in range(12):
            driver.set_pixel(i, color[0], color[1], color[2])
        driver.show()

    @staticmethod
    def _wake_word_callback():
        print('[wake word]\n')

    def _inference_callback(self, inference):
        print('{')
        print("  is_understood : '%s'," %
              'true' if inference.is_understood else 'false')
        if inference.is_understood:
            print("  intent : '%s'," % inference.intent)
            if len(inference.slots) > 0:
                print('  slots : {')
                for slot, value in inference.slots.items():
                    print("    '%s' : '%s'," % (slot, value))
                print('  }')
        print('}\n')

        if inference.is_understood:
            if inference.intent == 'turnLights':
                if inference.slots['state'] == 'off':
                    self._set_color((0, 0, 0))
                else:
                    self._set_color(COLORS_RGB[self._color])
            elif inference.intent == 'changeColor':
                self._color = inference.slots['color']
                self._set_color(COLORS_RGB[self._color])
            else:
                raise NotImplementedError()

    def run(self):
        pa = None
        audio_stream = None

        try:
            pa = pyaudio.PyAudio()

            audio_stream = pa.open(
                rate=self._picovoice.sample_rate,
                channels=1,
                format=pyaudio.paInt16,
                input=True,
                frames_per_buffer=self._picovoice.frame_length)

            print(self._context)

            print('[Listening ...]')

            while True:
                pcm = audio_stream.read(self._picovoice.frame_length)
                pcm = struct.unpack_from("h" * self._picovoice.frame_length,
                                         pcm)

                self._picovoice.process(pcm)
        except KeyboardInterrupt:
            sys.stdout.write('\b' * 2)
            print('Stopping ...')
        finally:
            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            self._picovoice.delete()
Example #6
0

context_path = abs_path + "/context/Lights_fr_windows_2021-05-07-utc_v1_6_0.rhn"
context_model_path = abs_path + "/context/rhino_params_fr.pv"


def inference_callback(inference):
    print("GOT inference")
    print(inference.is_understood)
    if inference.is_understood:
        control = inference.intent
        print(control)
        if control == "power":
            for state, location in inference.slots.items():
                # print(control[location] + ":"+control[state])
                print("    %s : '%s'" % (state, location))


handle = Picovoice(keyword_path=keyword_path,
                   porcupine_model_path=model_path,
                   porcupine_sensitivity=0.5,
                   wake_word_callback=wake_word_callback,
                   context_path=context_path,
                   rhino_sensitivity=0.5,
                   rhino_model_path=context_model_path,
                   inference_callback=inference_callback)


def get_next_audio_frame():
    pass
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('--input_audio_path', help='Absolute path to input audio file.', required=True)

    parser.add_argument('--keyword_path', help="Absolute path to a Porcupine keyword file.", required=True)

    parser.add_argument('--context_path', help="Absolute path to a Rhino context file.", required=True)

    parser.add_argument('--porcupine_library_path', help="Absolute path to Porcupine's dynamic library.", default=None)

    parser.add_argument('--porcupine_model_path', help="Absolute path to Porcupine's model file.", default=None)

    parser.add_argument(
        '--porcupine_sensitivity',
        help="Sensitivity for detecting wake word. Each value should be a number within [0, 1]. A higher sensitivity " +
             "results in fewer misses at the cost of increasing the false alarm rate.",
        default=0.5)

    parser.add_argument('--rhino_library_path', help="Absolute path to Rhino's dynamic library.", default=None)

    parser.add_argument('--rhino_model_path', help="Absolute path to Rhino's model file.", default=None)

    parser.add_argument(
        '--rhino_sensitivity',
        help="Inference sensitivity. It should be a number within [0, 1]. A higher sensitivity value results in fewer" +
             "misses at the cost of (potentially) increasing the erroneous inference rate.",
        default=0.5)

    args = parser.parse_args()

    def wake_word_callback():
        print('[wake word]\n')

    def inference_callback(inference):
        if inference.is_understood:
            print('{')
            print("  intent : '%s'" % inference.intent)
            print('  slots : {')
            for slot, value in inference.slots.items():
                print("    %s : '%s'" % (slot, value))
            print('  }')
            print('}\n')
        else:
            print("Didn't understand the command.\n")

    pv = Picovoice(
        keyword_path=args.keyword_path,
        wake_word_callback=wake_word_callback,
        context_path=args.context_path,
        inference_callback=inference_callback,
        porcupine_library_path=args.porcupine_library_path,
        porcupine_model_path=args.porcupine_model_path,
        porcupine_sensitivity=args.porcupine_sensitivity,
        rhino_library_path=args.rhino_library_path,
        rhino_model_path=args.rhino_model_path,
        rhino_sensitivity=args.rhino_sensitivity)

    audio, sample_rate = soundfile.read(args.input_audio_path, dtype='int16')
    if audio.ndim == 2:
        print("Picovoice processes single-channel audio but stereo file is provided. Processing left channel only.")
        audio = audio[0, :]

    if sample_rate != pv.sample_rate:
        raise ValueError("Input audio file should have a sample rate of %d. got %d" % (pv.sample_rate, sample_rate))

    for i in range(len(audio) // pv.frame_length):
        frame = audio[i * pv.frame_length:(i + 1) * pv.frame_length]
        pv.process(frame)

    pv.delete()
Example #8
0
    hash = hashlib.md5(text.encode()).hexdigest()
    file = 'speech-cache/{}.wav'.format(hash)
    cmd = 'play {}'.format(file)
    if not os.path.isfile(file):
        cmd = 'pico2wave -w {} "{}" && {}'.format(file, text, cmd)
    os.system(cmd)


def joke():
    j = requests.get('https://v2.jokeapi.dev/joke/Any?format=txt').text
    print(j)
    say(j)


handle = Picovoice(keyword_path=keyword_path,
                   wake_word_callback=wake_word_callback,
                   context_path=context_path,
                   inference_callback=inference_callback)

pa = pyaudio.PyAudio()

audio_stream = pa.open(rate=16000,
                       channels=1,
                       format=pyaudio.paInt16,
                       input=True,
                       frames_per_buffer=512,
                       input_device_index=6)

while True:
    pcm = audio_stream.read(512, exception_on_overflow=False)
    pcm = struct.unpack_from("h" * 512, pcm)
class PicovoiceTestCase(unittest.TestCase):
    @staticmethod
    def _context_path():
        if platform.system() == 'Darwin':
            return os.path.join(
                os.path.dirname(__file__),
                '../../resources/rhino/resources/contexts/mac/coffee_maker_mac.rhn'
            )
        elif platform.system() == 'Linux':
            if platform.machine() == 'x86_64':
                return os.path.join(
                    os.path.dirname(__file__),
                    '../../resources/rhino/resources/contexts/linux/coffee_maker_linux.rhn'
                )
            else:
                cpu_info = subprocess.check_output(['cat',
                                                    '/proc/cpuinfo']).decode()
                hardware_info = [
                    x for x in cpu_info.split('\n') if 'Hardware' in x
                ][0]

                if 'BCM' in hardware_info:
                    return os.path.join(
                        os.path.dirname(__file__),
                        '../../resources/rhino/resources/contexts/raspberry-pi/coffee_maker_raspberry-pi.rhn'
                    )
                elif 'AM33' in hardware_info:
                    return os.path.join(
                        os.path.dirname(__file__),
                        '../../resources/rhino/resources/contexts/beaglebone/coffee_maker_beaglebone.rhn'
                    )
                else:
                    raise NotImplementedError('Unsupported CPU:\n%s' %
                                              cpu_info)
        elif platform.system() == 'Windows':
            return os.path.join(
                os.path.dirname(__file__),
                '../../resources/rhino/resources/contexts/windows/coffee_maker_windows.rhn'
            )
        else:
            raise NotImplementedError('Unsupported platform')

    def _wake_word_callback(self):
        self._is_wake_word_detected = True

    def _inference_callback(self, inference):
        self._inference = inference

    def setUp(self):
        self._pv = Picovoice(
            keyword_path=pvporcupine.KEYWORD_PATHS['picovoice'],
            wake_word_callback=self._wake_word_callback,
            context_path=self._context_path(),
            inference_callback=self._inference_callback)

        self._is_wake_word_detected = False
        self._inference = None

    def tearDown(self):
        self._pv.delete()

    def test_process(self):
        audio, sample_rate = \
            soundfile.read(
                os.path.join(os.path.dirname(__file__), '../../resources/audio_samples/picovoice-coffee.wav'),
                dtype='int16')

        for i in range(len(audio) // self._pv.frame_length):
            frame = audio[i * self._pv.frame_length:(i + 1) *
                          self._pv.frame_length]
            self._pv.process(frame)

        self.assertTrue(self._is_wake_word_detected)
        self.assertEqual(self._inference.intent, 'orderBeverage')
        self.assertEqual(self._inference.slots,
                         dict(size='large', beverage='coffee'))

    def test_process_again(self):
        self.test_process()
Example #10
0
class PicovoiceDemo(Thread):
    def __init__(self,
                 keyword_path,
                 context_path,
                 porcupine_library_path=None,
                 porcupine_model_path=None,
                 porcupine_sensitivity=0.5,
                 rhino_library_path=None,
                 rhino_model_path=None,
                 rhino_sensitivity=0.5,
                 output_path=None):
        super(PicovoiceDemo, self).__init__()

        self._picovoice = Picovoice(
            keyword_path=keyword_path,
            wake_word_callback=self._wake_word_callback,
            context_path=context_path,
            inference_callback=self._inference_callback,
            porcupine_library_path=porcupine_library_path,
            porcupine_model_path=porcupine_model_path,
            porcupine_sensitivity=porcupine_sensitivity,
            rhino_library_path=rhino_library_path,
            rhino_model_path=rhino_model_path,
            rhino_sensitivity=rhino_sensitivity)

        self.output_path = output_path
        if self.output_path is not None:
            self._recorded_frames = list()

    @staticmethod
    def _wake_word_callback():
        print('[wake word]\n')

    @staticmethod
    def _inference_callback(inference):
        if inference.is_understood:
            print('{')
            print("  intent : '%s'" % inference.intent)
            print('  slots : {')
            for slot, value in inference.slots.items():
                print("    %s : '%s'" % (slot, value))
            print('  }')
            print('}\n')
        else:
            print("Didn't understand the command.\n")

    def run(self):
        pa = None
        audio_stream = None

        try:
            pa = pyaudio.PyAudio()

            audio_stream = pa.open(
                rate=self._picovoice.sample_rate,
                channels=1,
                format=pyaudio.paInt16,
                input=True,
                frames_per_buffer=self._picovoice.frame_length)

            print('[Listening ...]')

            while True:
                pcm = audio_stream.read(self._picovoice.frame_length)
                pcm = struct.unpack_from("h" * self._picovoice.frame_length,
                                         pcm)

                if self.output_path is not None:
                    self._recorded_frames.append(pcm)

                self._picovoice.process(pcm)
        except KeyboardInterrupt:
            sys.stdout.write('\b' * 2)
            print('Stopping ...')
        finally:
            if audio_stream is not None:
                audio_stream.close()

            if pa is not None:
                pa.terminate()

            if self.output_path is not None and len(self._recorded_frames) > 0:
                recorded_audio = np.concatenate(self._recorded_frames,
                                                axis=0).astype(np.int16)
                soundfile.write(self.output_path,
                                recorded_audio,
                                samplerate=self._picovoice.sample_rate,
                                subtype='PCM_16')

            self._picovoice.delete()

    @classmethod
    def show_audio_devices(cls):
        fields = ('index', 'name', 'defaultSampleRate', 'maxInputChannels')

        pa = pyaudio.PyAudio()

        for i in range(pa.get_device_count()):
            info = pa.get_device_info_by_index(i)
            print(', '.join("'%s': '%s'" % (k, str(info[k])) for k in fields))

        pa.terminate()