Esempio n. 1
0
def do_recognition(args, recorder, recognizer, player, status_ui):
    """Configure and run the recognizer."""
    say = aiy.audio.say
    actor = action.make_actor(say)

    if args.cloud_speech:
        action.add_commands_just_for_cloud_speech_api(actor, say)

    recognizer.add_phrases(actor)
    recognizer.set_audio_logging_enabled(args.audio_logging)

    if args.trigger == 'gpio':
        import triggers.gpio
        triggerer = triggers.gpio.GpioTrigger(channel=23)
        msg = 'Press the button on GPIO 23'
    elif args.trigger == 'clap':
        import triggers.clap
        triggerer = triggers.clap.ClapTrigger(recorder)
        msg = 'Clap your hands'
    else:
        logger.error("Unknown trigger '%s'", args.trigger)
        return

    mic_recognizer = SyncMicRecognizer(
        actor, recognizer, recorder, player, say, triggerer, status_ui,
        args.assistant_always_responds)

    with mic_recognizer:
        if sys.stdout.isatty():
            print(msg + ' then speak, or press Ctrl+C to quit...')

        # wait for KeyboardInterrupt
        while True:
            time.sleep(1)
Esempio n. 2
0
def do_recognition(args, recorder, recognizer, player, status_ui):
    """Configure and run the recognizer."""
    say = aiy.audio.say
    actor = action.make_actor(say)

    if args.cloud_speech:
        action.add_commands_just_for_cloud_speech_api(actor, say)

    recognizer.add_phrases(actor)
    recognizer.set_audio_logging_enabled(args.audio_logging)

    if args.trigger == 'gpio':
        import triggers.gpio
        triggerer = triggers.gpio.GpioTrigger(channel=23)
        msg = 'Press the button on GPIO 23'
    elif args.trigger == 'clap':
        import triggers.clap
        triggerer = triggers.clap.ClapTrigger(recorder)
        msg = 'Clap your hands'
    else:
        logger.error("Unknown trigger '%s'", args.trigger)
        return

    mic_recognizer = SyncMicRecognizer(actor, recognizer, recorder, player,
                                       say, triggerer, status_ui,
                                       args.assistant_always_responds)

    with mic_recognizer:
        if sys.stdout.isatty():
            print(msg + ' then speak, or press Ctrl+C to quit...')

        # wait for KeyboardInterrupt
        while True:
            time.sleep(1)
Esempio n. 3
0
def do_recognition(args, recorder, recognizer, player):
    """Configure and run the recognizer."""

    global Dplayer, Dsay, Drecorder, Drecognizer, Dtriggerer

    say = tts.create_say(player)
    actor = action.make_actor(say)

    Dplayer = player
    Dsay = say
    Drecorder = recorder
    Drecognizer = recognizer

    if args.cloud_speech:
        action.add_commands_just_for_cloud_speech_api(actor, say)

    recognizer.add_phrases(actor)
    recognizer.set_audio_logging_enabled(args.audio_logging)

    if args.trigger == 'gpio':
        import triggers.gpio
        triggerer = triggers.gpio.GpioTrigger(channel=23)
        msg = 'Press the button on GPIO 23'
    elif args.trigger == 'clap':
        import triggers.clap
        triggerer = triggers.clap.ClapTrigger(recorder)
        msg = 'Clap your hands'
    else:
        logger.error("Unknown trigger '%s'", args.trigger)
        return
    Dtriggerer = triggerer

    mic_recognizer = SyncMicRecognizer(actor,
                                       recognizer,
                                       recorder,
                                       player,
                                       say,
                                       triggerer,
                                       led_fifo=args.led_fifo)

    with mic_recognizer:
        if sys.stdout.isatty():
            print(msg + ' then speak, or press Ctrl+C to quit...')

        # wait for KeyboardInterrupt
        i = 0
        while True:
            i += 1
            if mqttclient:
                mqttclient.loop(
                    timeout=0.2
                )  # needs to be called regularly (otherwise would need its own thread)
            else:
                time.sleep(0.2)
            if (i % 100) == 0:
                logger.info("tick")
Esempio n. 4
0
def do_assistant_library(args, recognizer, credentials, player, status_ui):
    """Run a recognizer using the Google Assistant Library.

    The Google Assistant Library has direct access to the audio API, so this
    Python code doesn't need to record audio.
    """

    try:
        from google.assistant.library import Assistant
        from google.assistant.library.event import EventType
    except ImportError:
        print('''
ERROR: failed to import the Google Assistant Library. This is required for
"OK Google" hotwording, but is only available for Raspberry Pi 2/3. It can be
installed with:
    env/bin/pip install google-assistant-library==0.0.2''')
        sys.exit(1)

    say = tts.create_say(player)
    actor = action.make_actor(say)
    action.add_commands_just_for_cloud_speech_api(actor, say)

    recognizer.add_phrases(actor)

    def process_event(event):
        logging.info(event)

        if event.type == EventType.ON_START_FINISHED:
            status_ui.status('ready')
            if sys.stdout.isatty():
                print(
                    'Say "OK, Google" then speak, or press Ctrl+C to quit...')

        elif event.type == EventType.ON_CONVERSATION_TURN_STARTED:
            status_ui.status('listening')

        elif event.type == EventType.ON_END_OF_UTTERANCE:
            status_ui.status('thinking')

        elif event.type == EventType.ON_RECOGNIZING_SPEECH_FINISHED and \
                event.args and actor.can_handle(event.args['text']):
            if not args.assistant_always_responds:
                assistant.stop_conversation()
            actor.handle(event.args['text'])

        elif event.type == EventType.ON_CONVERSATION_TURN_FINISHED:
            status_ui.status('ready')

        elif event.type == EventType.ON_ASSISTANT_ERROR and \
                event.args and event.args['is_fatal']:
            sys.exit(1)

    with Assistant(credentials) as assistant:
        for event in assistant.start():
            process_event(event)
Esempio n. 5
0
def do_assistant_library(args, credentials, player, status_ui):
    """Run a recognizer using the Google Assistant Library.

    The Google Assistant Library has direct access to the audio API, so this
    Python code doesn't need to record audio.
    """

    try:
        from google.assistant.library import Assistant
        from google.assistant.library.event import EventType
    except ImportError:
        print('''
ERROR: failed to import the Google Assistant Library. This is required for
"OK Google" hotwording, but is only available for Raspberry Pi 2/3. It can be
installed with:
    env/bin/pip install google-assistant-library==0.0.2''')
        sys.exit(1)

    say = aiy.audio.say
    actor = action.make_actor(say)

    def process_event(event):
        logging.info(event)

        if event.type == EventType.ON_START_FINISHED:
            status_ui.status('ready')
            if sys.stdout.isatty():
                print('Say "OK, Google" then speak, or press Ctrl+C to quit...')

        elif event.type == EventType.ON_CONVERSATION_TURN_STARTED:
            status_ui.status('listening')

        elif event.type == EventType.ON_END_OF_UTTERANCE:
            status_ui.status('thinking')

        elif event.type == EventType.ON_RECOGNIZING_SPEECH_FINISHED and \
                event.args and actor.can_handle(event.args['text']):
            if not args.assistant_always_responds:
                assistant.stop_conversation()
            actor.handle(event.args['text'])

        elif event.type == EventType.ON_CONVERSATION_TURN_FINISHED:
            status_ui.status('ready')

        elif event.type == EventType.ON_ASSISTANT_ERROR and \
                event.args and event.args['is_fatal']:
            sys.exit(1)

    with Assistant(credentials) as assistant:
        for event in assistant.start():
            process_event(event)
Esempio n. 6
0
def do_recognition(args, recorder, recognizer, player, credentials):
    """Configure and run the recognizer."""
    say = tts.create_say(player)

    actor = action.make_actor(say)

    if args.cloud_speech:
        action.add_commands_just_for_cloud_speech_api(actor, say)

    recognizer.add_phrases(actor)
    recognizer.set_audio_logging_enabled(args.audio_logging)

    if args.trigger == 'gpio':
        import triggers.gpio
        triggerer = triggers.gpio.GpioTrigger(channel=23)
        msg = 'Press the button on GPIO 23'
    elif args.trigger == 'clap':
        import triggers.clap
        triggerer = triggers.clap.ClapTrigger(recorder)
        msg = 'Clap your hands'
    elif args.trigger == 'hotword':
        import triggers.hotword
        triggerer = triggers.hotword.HotwordTrigger(credentials)
        msg = 'Say "Ok Google"'
    else:
        logger.error("Unknown trigger '%s'", args.trigger)
        return

    mic_recognizer = SyncMicRecognizer(actor,
                                       recognizer,
                                       recorder,
                                       player,
                                       say,
                                       triggerer,
                                       led_fifo=args.led_fifo)

    with mic_recognizer:
        if sys.stdout.isatty():
            print(msg + ' then speak, or press Ctrl+C to quit...')

        # wait for KeyboardInterrupt
        while True:
            time.sleep(1)
Esempio n. 7
0
def main():
    parser = configargparse.ArgParser(
        default_config_files=CONFIG_FILES,
        description="Act on voice commands using Google's speech recognition")
    parser.add_argument('-I',
                        '--input-device',
                        default='default',
                        help='Name of the audio input device')
    parser.add_argument('-O',
                        '--output-device',
                        default='default',
                        help='Name of the audio output device')
    parser.add_argument('-T',
                        '--trigger',
                        default='gpio',
                        help='Trigger to use {\'clap\', \'gpio\'}')
    parser.add_argument(
        '--cloud-speech',
        action='store_true',
        help='Use the Cloud Speech API instead of the Assistant API')
    parser.add_argument(
        '-L',
        '--language',
        default='en-US',
        help='Language code to use for speech (default: en-US)')
    parser.add_argument('-l',
                        '--led-fifo',
                        default='/tmp/status-led',
                        help='Status led control fifo')
    parser.add_argument('-p',
                        '--pid-file',
                        default=PID_FILE,
                        help='File containing our process id for monitoring')
    parser.add_argument(
        '--audio-logging',
        action='store_true',
        help='Log all requests and responses to WAV files in /tmp')
    parser.add_argument('--assistant-secrets',
                        help='Path to client secrets for the Assistant API')
    parser.add_argument('--cloud-speech-secrets',
                        help='Path to service account credentials for the '
                        'Cloud Speech API')

    args = parser.parse_args()

    create_pid_file(args.pid_file)
    i18n.set_language_code(args.language, gettext_install=True)

    player = audio.Player(args.output_device)
    say = tts.create_say(player)

    actor = action.make_actor(say)
    listener = tj.Client(actor, say, player)

    if args.cloud_speech:
        credentials_file = os.path.expanduser(args.cloud_speech_secrets)
        if not os.path.exists(credentials_file) and os.path.exists(
                OLD_SERVICE_CREDENTIALS):
            credentials_file = OLD_SERVICE_CREDENTIALS
        recognizer = speech.CloudSpeechRequest(credentials_file)
    else:
        credentials = try_to_get_credentials(
            os.path.expanduser(args.assistant_secrets))
        recognizer = speech.AssistantSpeechRequest(credentials)

    recorder = audio.Recorder(input_device=args.input_device,
                              channels=1,
                              bytes_per_sample=speech.AUDIO_SAMPLE_SIZE,
                              sample_rate_hz=speech.AUDIO_SAMPLE_RATE_HZ)
    with recorder:
        do_recognition(args, recorder, recognizer, player)