def _compile_vocabulary(self, phrases): prefix = 'dingdang' tmpdir = tempfile.mkdtemp() lexicon_file = dingdangpath.data('julius-stt', 'VoxForge.tgz') lexicon_archive_member = 'VoxForge/VoxForgeDict' profile_path = dingdangpath.config('profile.yml') if os.path.exists(profile_path): with open(profile_path, 'r') as f: profile = yaml.safe_load(f) if 'julius' in profile: if 'lexicon' in profile['julius']: lexicon_file = profile['julius']['lexicon'] if 'lexicon_archive_member' in profile['julius']: lexicon_archive_member = \ profile['julius']['lexicon_archive_member'] lexicon = JuliusVocabulary.VoxForgeLexicon(lexicon_file, lexicon_archive_member) # Create grammar file tmp_grammar_file = os.path.join(tmpdir, os.extsep.join([prefix, 'grammar'])) with open(tmp_grammar_file, 'w') as f: grammar = self._get_grammar(phrases) for definition in grammar.pop('S'): f.write("%s: %s\n" % ('S', ' '.join(definition))) for name, definitions in grammar.items(): for definition in definitions: f.write("%s: %s\n" % (name, ' '.join(definition))) # Create voca file tmp_voca_file = os.path.join(tmpdir, os.extsep.join([prefix, 'voca'])) with open(tmp_voca_file, 'w') as f: for category, words in self._get_word_defs(lexicon, phrases).items(): f.write("%% %s\n" % category) for word, phoneme in words: f.write("%s\t\t\t%s\n" % (word, phoneme)) # mkdfa.pl olddir = os.getcwd() os.chdir(tmpdir) cmd = ['mkdfa.pl', str(prefix)] with tempfile.SpooledTemporaryFile() as out_f: subprocess.call(cmd, stdout=out_f, stderr=out_f) out_f.seek(0) for line in out_f.read().splitlines(): line = line.strip() if line: self._logger.debug(line) os.chdir(olddir) tmp_dfa_file = os.path.join(tmpdir, os.extsep.join([prefix, 'dfa'])) tmp_dict_file = os.path.join(tmpdir, os.extsep.join([prefix, 'dict'])) shutil.move(tmp_dfa_file, self.dfa_file) shutil.move(tmp_dict_file, self.dict_file) shutil.rmtree(tmpdir)
def handle(text, mic, profile): """ Reports the current time based on the user's timezone. Arguments: text -- user-input, typically transcribed speech mic -- used to interact with the user (for both input and output) profile -- contains information related to the user (e.g., phone number) wxbot -- wechat bot instance """ # sys.path.append(mic.dingdangpath.LIB_PATH) quality = 100 count_down = 3 dest_path = os.path.expanduser('~/Pictures') vertical_flip = False horizontal_flip = False sound = True usb_camera = True # read config dest_file = os.path.join(dest_path, "%s.jpg" % time.time()) if usb_camera: command = "fswebcam --no-banner -r 1024x765 -q " if vertical_flip: command = command + ' -s v ' if horizontal_flip: command = command + '-s h ' command = command + dest_file else: command = ['raspistill', '-o', dest_file, '-q', str(quality)] if count_down > 0 and sound: command.extend(['-t', str(count_down * 1000)]) if vertical_flip: command.append('-vf') if horizontal_flip: command.append('-hf') if sound and count_down > 0: mic.say(u"收到,%d秒后启动拍照" % (count_down)) if usb_camera: time.sleep(count_down) process = subprocess.Popen(command, shell=usb_camera) res = process.wait() if res != 0: if sound: mic.say(u"拍照失败,请检查相机是否连接正确") return if sound: mic.say(dingdangpath.data('audio', 'camera.wav'), True) uploadImage(dest_file) os.system('rm ' + dest_file) return True
def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds Returns a list of the matching options or None """ RATE = 16000 CHUNK = 1024 LISTEN_TIME = 12 # check if no threshold provided if THRESHOLD is None: THRESHOLD = self.fetchThreshold() self.speaker.play(dingdangpath.data('audio', 'beep_hi.wav')) # prepare recording stream stream = self._audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(30)] for i in range(0, RATE / CHUNK * LISTEN_TIME): try: data = stream.read(CHUNK) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.9: break except Exception, e: print "------1----" self._logger.warning(e) continue
def get_keyword_phrases(): """ Gets the keyword phrases from the keywords file in the dingdang data dir. Returns: A list of keyword phrases. """ phrases = [] with open(dingdangpath.data('keyword_phrases'), mode="r") as f: for line in f: phrase = line.strip() if phrase: phrases.append(phrase) return phrases
def say(self, phrase, cache=False): self._logger.debug(u"Saying '%s' with '%s'", phrase, self.SLUG) cache_file_path = dingdangpath.data( 'audio', self.SLUG + phrase + '.mp3' ) if cache and os.path.exists(cache_file_path): self._logger.info( "found speech in cache, playing...[%s]" % cache_file_path) self.play_mp3(cache_file_path) else: tmpfile = self.get_speech(phrase) if tmpfile is not None: self.play_mp3(tmpfile) if cache: self._logger.info( "not found speech in cache," + " caching...[%s]" % cache_file_path) os.rename(tmpfile, cache_file_path) else: os.remove(tmpfile)
frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break except Exception, e: self._logger.error(e) continue self.speaker.play(dingdangpath.data('audio', 'beep_lo.wav')) # save the audio data try: stream.stop_stream() stream.close() except Exception, e: self._logger.debug(e) pass with tempfile.SpooledTemporaryFile(mode='w+b') as f: wav_fp = wave.open(f, 'wb') wav_fp.setnchannels(1) wav_fp.setsampwidth(pyaudio.get_sample_size(pyaudio.paInt16)) wav_fp.setframerate(RATE) wav_fp.writeframes(''.join(frames))
def activeListenToAllOptions(self, THRESHOLD=None, LISTEN=True, MUSIC=False): """ Records until a second of silence or times out after 12 seconds Returns a list of the matching options or None """ RATE = 16000 CHUNK = 1024 LISTEN_TIME = 12 CHANNELS = 1 record_second = 5 if not self.trans_mode: self.say(dingdangpath.data('audio', 'beep_hi.wav'), True) file_path = os.path.join(dingdangpath.DATA_PATH, 'audio/listen_content.wav') # check if no threshold provided if THRESHOLD is None: THRESHOLD = self.fetchThreshold() # prepare recording stream stream = self._audio.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) frames = [] # increasing the range # results in longer pause after command # generation lastN = [THRESHOLD * 1.2 for i in range(40)] for i in range(0, RATE / CHUNK * LISTEN_TIME): try: data = stream.read(CHUNK, exception_on_overflow=False) frames.append(data) score = self.getScore(data) lastN.pop(0) lastN.append(score) average = sum(lastN) / float(len(lastN)) # TODO: 0.8 should not be a MAGIC NUMBER! if average < THRESHOLD * 0.8: break except Exception as e: print("异常:" + e.message) continue # save the audio data try: stream.stop_stream() stream.close() wf = wave.open(file_path, 'wb') wf.setframerate(RATE) wf.setnchannels(CHANNELS) wf.setsampwidth(self._audio.get_sample_size(pyaudio.paInt16)) wf.writeframes(b''.join(frames)) wf.close() except Exception as e: print("异常:" + e.message) pass if not self.trans_mode: self.say(dingdangpath.data('audio', 'beep_lo.wav'), True) return self.active_stt_engine.transcribe(frames)