Ejemplo n.º 1
0
    def capture(self, vad_throwaway_frames=VAD_THROWAWAY_FRAMES):
        audio = ""

        pcm = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL,
                            self.config['device'])
        pcm.setchannels(1)
        pcm.setrate(VAD_SAMPLERATE)
        pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE)
        pcm.setperiodsize(VAD_PERIOD)

        vad = webrtcvad.Vad(2)

        silenceRun = 0
        numSilenceRuns = 0
        thresholdSilenceMet = False

        frames = 0
        start = time.time()

        events.fire('capture_started')

        # do not count first 10 frames when doing VAD
        while frames < vad_throwaway_frames:
            length, data = pcm.read()
            frames = frames + 1
            if length:
                audio += data

        # now do VAD
        while ((thresholdSilenceMet is False)
               and ((time.time() - start) < self.config['timeout'])):
            length, data = pcm.read()
            if length:
                audio += data

                if length == VAD_PERIOD:
                    isSpeech = vad.is_speech(data, VAD_SAMPLERATE)

                    if not isSpeech:
                        silenceRun = silenceRun + 1
                        # print "0"
                    else:
                        silenceRun = 0
                        numSilenceRuns = numSilenceRuns + 1
                        # print "1"

            # only count silence runs after the first one
            # (allow user to speak for total of max recording length if they haven't said anything yet)
            if (numSilenceRuns != 0) and (
                (silenceRun * VAD_FRAME_MS) > VAD_SILENCE_TIMEOUT):
                thresholdSilenceMet = True

        path = TMP_PATH + str(uuid.uuid4()) + '.wav'

        with open(path, 'w') as rf:
            rf.write(audio)

        pcm.close()

        events.fire('capture_fullfilled', audio=path)
Ejemplo n.º 2
0
    def capture(self, type, *args, **kwargs):
        if kwargs['id'] != self.config['trigger']:
            return

        if not self.config['silent']:
            events.fire('speech_requested', audio=RES_PATH + 'alexayes.mp3')

        events.fire('capture_requested')
Ejemplo n.º 3
0
    def boot(self, type, *args, **kwargs):
        #		sys.stdout.write("Trying to reach Amazon...")
        #		while not check_network():
        #			sys.stdout.write(".")

        #		print("")
        #		print("Connection OK")

        if not self._get_token():
            sys.exit()

        events.fire('greetings_requested')
        events.fire('detection_requested')
Ejemplo n.º 4
0
			action="store_true",
			default=False,
			help="start without saying hello")
	parser.add_option('-d', '--debug',
			dest="debug",
			action="store_true",
			default=False,
			help="display debug messages")

	cmdopts, cmdargs = parser.parse_args()
	config['debug']  = True # cmdopts.debug
	config['silent'] = cmdopts.silent

	# debug logger
	def log(type, *args, **kwargs):
		print('> {} {}'.format(type, kwargs))

	# wiring logger to pubsub
	events.register('*', log)

	# base components
	avs = AVS(config['avs'])
	microphone = Microphone(config['microphone'])
	synthetizer = Synthetizer(config['synthetizer'])

	# third party components


	# Ignition
	events.fire('boot_requested')
Ejemplo n.º 5
0
    def detect(self):
        # create decoders on the fly
        if not self.decoders:
            self.decoders = []

            for id, phrase in self.config['triggers'].iteritems():
                config = Decoder.default_config()

                # set recognition model to US
                config.set_string('-hmm',
                                  os.path.join(get_model_path(), 'en-us'))
                config.set_string(
                    '-dict',
                    os.path.join(get_model_path(), 'cmudict-en-us.dict'))

                # specify recognition key phrase
                config.set_string('-keyphrase', phrase)
                config.set_float('-kws_threshold', 1e-5)

                # hide the VERY verbose logging information
                # if not self.config['debug']:
                config.set_string('-logfn', '/dev/null')

                decoder = Decoder(config)
                decoder.id = id

                self.decoders.append(decoder)

        events.fire('detection_started')

        # start decoding
        for decoder in self.decoders:
            decoder.start_utt()

        pcm = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL,
                            self.config['device'])
        pcm.setchannels(1)
        pcm.setrate(16000)
        pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE)
        pcm.setperiodsize(1024)

        phrase = None
        triggered = False
        while not triggered:
            _, buffer = pcm.read()

            for decoder in self.decoders:
                decoder.process_raw(buffer, False, False)
                triggered = decoder.hyp() is not None

                if triggered:
                    phrase = decoder.id
                    break

        pcm.close()
        pcm = None

        for decoder in self.decoders:
            decoder.end_utt()

        events.fire('detection_fullfilled', id=phrase)
Ejemplo n.º 6
0
    def process(self, type, *args, **kwargs):
        # create request
        url = 'https://access-alexa-na.amazon.com/v1/avs/speechrecognizer/recognize'
        headers = {'Authorization': 'Bearer %s' % self._get_token()}
        payload = {
            "messageHeader": {
                "deviceContext": [{
                    "name": "playbackState",
                    "namespace": "AudioPlayer",
                    "payload": {
                        "streamId": "",
                        "offsetInMilliseconds": "0",
                        "playerActivity": "IDLE"
                    }
                }]
            },
            "messageBody": {
                "profile": "alexa-close-talk",
                "locale": "en-us",
                "format": "audio/L16; rate=16000; channels=1"
            }
        }

        with open(kwargs['audio']) as audio:
            files = [('file', ('request', json.dumps(payload),
                               'application/json; charset=UTF-8')),
                     ('file', ('audio', audio,
                               'audio/L16; rate=16000; channels=1'))]

            # send request
            response = requests.post(url, headers=headers, files=files)

        # cleanup right after use
        os.remove(kwargs['audio'])

        print(response)

        if response.status_code == 204:
            events.fire('detection_requested')
            return

        if response.status_code != 200:
            events.fire('error_report_requested', response=response)
            return

        wrapper = "Content-Type: " + response.headers[
            'content-type'] + '\r\n\r\n' + response.content
        message = email.message_from_string(wrapper)

        for payload in message.get_payload():
            content_type = payload.get_content_type()
            response_body = payload.get_payload()

            if content_type == "audio/mpeg":
                filename = TMP_PATH + payload.get('Content-ID').strip(
                    "<>") + ".mp3"
                with open(filename, 'wb') as audio:
                    audio.write(payload.get_payload())
            else:
                if content_type == "application/json":
                    data = json.loads(response_body)

                elif self.config['debug']:
                    print('-- Unknown data returned:')
                    print(json.dumps(data))

        # process audio items first
#		if 'audioItem' in data['messageBody']:
#			self.player.play_playlist(data['messageBody'])
#			pass

# for lisibility
        directives = data['messageBody']['directives']

        if not directives or len(directives) == 0:
            events.fire('detection_requested')
            return

        wishes = []

        for directive in directives:
            # speaker control such as volume or mute
            if directive['namespace'] == "Speaker":
                if directive['name'] == 'SetVolume':
                    wishes.append({
                        'type':
                        'volume',
                        'value':
                        int(directive['payload']['volume']),
                        'relative':
                        directive['payload']['adjustmentType'] == 'relative'
                    })

                elif directive['name'] == 'SetMute':
                    pass

            # if need of a new capture phase
            elif directive['namespace'] == 'SpeechRecognizer' and directive[
                    'name'] == 'listen':
                events.fire('capture_requested',
                            vad_throwaway_frames=directive['payload']
                            ['timeoutIntervalInMillis'] / 116)

            # play speech
            elif directive['namespace'] == 'SpeechSynthesizer':
                if directive['name'] == 'speak':
                    wishes.append({
                        'type':
                        'speech',
                        'value':
                        mrl_fix("file://" + TMP_PATH + directive['payload']
                                ['audioContent'].lstrip("cid:") + ".mp3")
                    })

            # play music
            elif directive['namespace'] == 'AudioPlayer':
                if directive['name'] == 'play':
                    pass

        for wish in wishes:
            if wish['type'] == 'speech':
                events.fire('speech_requested', audio=wish['value'])

        time.sleep(.1)
        events.fire('detection_requested')
Ejemplo n.º 7
0
 def greet(self, type, *args, **kwargs):
     events.fire('speech_requested', audio=RES_PATH + 'hello.mp3')
Ejemplo n.º 8
0
 def _callback(self):
     events.fire('speech_fullfilled')