def capture(self, vad_throwaway_frames=VAD_THROWAWAY_FRAMES): audio = "" pcm = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, self.config['device']) pcm.setchannels(1) pcm.setrate(VAD_SAMPLERATE) pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE) pcm.setperiodsize(VAD_PERIOD) vad = webrtcvad.Vad(2) silenceRun = 0 numSilenceRuns = 0 thresholdSilenceMet = False frames = 0 start = time.time() events.fire('capture_started') # do not count first 10 frames when doing VAD while frames < vad_throwaway_frames: length, data = pcm.read() frames = frames + 1 if length: audio += data # now do VAD while ((thresholdSilenceMet is False) and ((time.time() - start) < self.config['timeout'])): length, data = pcm.read() if length: audio += data if length == VAD_PERIOD: isSpeech = vad.is_speech(data, VAD_SAMPLERATE) if not isSpeech: silenceRun = silenceRun + 1 # print "0" else: silenceRun = 0 numSilenceRuns = numSilenceRuns + 1 # print "1" # only count silence runs after the first one # (allow user to speak for total of max recording length if they haven't said anything yet) if (numSilenceRuns != 0) and ( (silenceRun * VAD_FRAME_MS) > VAD_SILENCE_TIMEOUT): thresholdSilenceMet = True path = TMP_PATH + str(uuid.uuid4()) + '.wav' with open(path, 'w') as rf: rf.write(audio) pcm.close() events.fire('capture_fullfilled', audio=path)
def capture(self, type, *args, **kwargs): if kwargs['id'] != self.config['trigger']: return if not self.config['silent']: events.fire('speech_requested', audio=RES_PATH + 'alexayes.mp3') events.fire('capture_requested')
def boot(self, type, *args, **kwargs): # sys.stdout.write("Trying to reach Amazon...") # while not check_network(): # sys.stdout.write(".") # print("") # print("Connection OK") if not self._get_token(): sys.exit() events.fire('greetings_requested') events.fire('detection_requested')
action="store_true", default=False, help="start without saying hello") parser.add_option('-d', '--debug', dest="debug", action="store_true", default=False, help="display debug messages") cmdopts, cmdargs = parser.parse_args() config['debug'] = True # cmdopts.debug config['silent'] = cmdopts.silent # debug logger def log(type, *args, **kwargs): print('> {} {}'.format(type, kwargs)) # wiring logger to pubsub events.register('*', log) # base components avs = AVS(config['avs']) microphone = Microphone(config['microphone']) synthetizer = Synthetizer(config['synthetizer']) # third party components # Ignition events.fire('boot_requested')
def detect(self): # create decoders on the fly if not self.decoders: self.decoders = [] for id, phrase in self.config['triggers'].iteritems(): config = Decoder.default_config() # set recognition model to US config.set_string('-hmm', os.path.join(get_model_path(), 'en-us')) config.set_string( '-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict')) # specify recognition key phrase config.set_string('-keyphrase', phrase) config.set_float('-kws_threshold', 1e-5) # hide the VERY verbose logging information # if not self.config['debug']: config.set_string('-logfn', '/dev/null') decoder = Decoder(config) decoder.id = id self.decoders.append(decoder) events.fire('detection_started') # start decoding for decoder in self.decoders: decoder.start_utt() pcm = alsaaudio.PCM(alsaaudio.PCM_CAPTURE, alsaaudio.PCM_NORMAL, self.config['device']) pcm.setchannels(1) pcm.setrate(16000) pcm.setformat(alsaaudio.PCM_FORMAT_S16_LE) pcm.setperiodsize(1024) phrase = None triggered = False while not triggered: _, buffer = pcm.read() for decoder in self.decoders: decoder.process_raw(buffer, False, False) triggered = decoder.hyp() is not None if triggered: phrase = decoder.id break pcm.close() pcm = None for decoder in self.decoders: decoder.end_utt() events.fire('detection_fullfilled', id=phrase)
def process(self, type, *args, **kwargs): # create request url = 'https://access-alexa-na.amazon.com/v1/avs/speechrecognizer/recognize' headers = {'Authorization': 'Bearer %s' % self._get_token()} payload = { "messageHeader": { "deviceContext": [{ "name": "playbackState", "namespace": "AudioPlayer", "payload": { "streamId": "", "offsetInMilliseconds": "0", "playerActivity": "IDLE" } }] }, "messageBody": { "profile": "alexa-close-talk", "locale": "en-us", "format": "audio/L16; rate=16000; channels=1" } } with open(kwargs['audio']) as audio: files = [('file', ('request', json.dumps(payload), 'application/json; charset=UTF-8')), ('file', ('audio', audio, 'audio/L16; rate=16000; channels=1'))] # send request response = requests.post(url, headers=headers, files=files) # cleanup right after use os.remove(kwargs['audio']) print(response) if response.status_code == 204: events.fire('detection_requested') return if response.status_code != 200: events.fire('error_report_requested', response=response) return wrapper = "Content-Type: " + response.headers[ 'content-type'] + '\r\n\r\n' + response.content message = email.message_from_string(wrapper) for payload in message.get_payload(): content_type = payload.get_content_type() response_body = payload.get_payload() if content_type == "audio/mpeg": filename = TMP_PATH + payload.get('Content-ID').strip( "<>") + ".mp3" with open(filename, 'wb') as audio: audio.write(payload.get_payload()) else: if content_type == "application/json": data = json.loads(response_body) elif self.config['debug']: print('-- Unknown data returned:') print(json.dumps(data)) # process audio items first # if 'audioItem' in data['messageBody']: # self.player.play_playlist(data['messageBody']) # pass # for lisibility directives = data['messageBody']['directives'] if not directives or len(directives) == 0: events.fire('detection_requested') return wishes = [] for directive in directives: # speaker control such as volume or mute if directive['namespace'] == "Speaker": if directive['name'] == 'SetVolume': wishes.append({ 'type': 'volume', 'value': int(directive['payload']['volume']), 'relative': directive['payload']['adjustmentType'] == 'relative' }) elif directive['name'] == 'SetMute': pass # if need of a new capture phase elif directive['namespace'] == 'SpeechRecognizer' and directive[ 'name'] == 'listen': events.fire('capture_requested', vad_throwaway_frames=directive['payload'] ['timeoutIntervalInMillis'] / 116) # play speech elif directive['namespace'] == 'SpeechSynthesizer': if directive['name'] == 'speak': wishes.append({ 'type': 'speech', 'value': mrl_fix("file://" + TMP_PATH + directive['payload'] ['audioContent'].lstrip("cid:") + ".mp3") }) # play music elif directive['namespace'] == 'AudioPlayer': if directive['name'] == 'play': pass for wish in wishes: if wish['type'] == 'speech': events.fire('speech_requested', audio=wish['value']) time.sleep(.1) events.fire('detection_requested')
def greet(self, type, *args, **kwargs): events.fire('speech_requested', audio=RES_PATH + 'hello.mp3')
def _callback(self): events.fire('speech_fullfilled')