def __init__(self, source=None, volume=None, aggressiveness=None, model_dir=None, lang=None, config=CONFIG): EventEmitter.__init__(self) self.config = config # ensure default values for k in CONFIG["listener"]: if k not in self.config["listener"]: self.config["listener"][k] = CONFIG["listener"][k] volume = volume or self.config["listener"]["default_volume"] aggressiveness = aggressiveness or self.config["listener"][ "default_aggressiveness"] model_dir = model_dir or self.config["listener"]["default_model_dir"] self.lang = lang or self.config["lang"] if "-" in self.lang: self.lang = self.lang.split("-")[0] if "{lang}" in model_dir: model_dir = model_dir.format(lang=self.lang) if not isdir(model_dir): if model_dir in self._default_models: logging.error( "you need to install the package: " "kaldi-chain-zamia-speech-{lang}".format(lang=self.lang)) raise ModelNotFound self.rec = PulseRecorder(source_name=source, volume=volume) self.vad = VAD(aggressiveness=aggressiveness) logging.info("Loading model from %s ..." % model_dir) self.asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=model_dir, kaldi_beam=self.config["listener"]["default_beam"], kaldi_acoustic_scale=self.config["listener"] ["default_acoustic_scale"], kaldi_frame_subsampling_factor=self.config["listener"] ["default_frame_subsampling_factor"]) self._hotwords = dict(self.config["hotwords"])
def server_start(decoder): tcpServer = socket.socket(socket.AF_INET, socket.SOCK_STREAM) #TCP tcpServer.bind(ADDRESS) # 自动恢复监听 while True: try: tcpServer.listen(1) # 排队数 # tcpServer.accept()返回一个元组, 元素1为客户端的socket对象, 元素2为客户端的地址(ip地址,端口号) client_socket, client_address = tcpServer.accept() logging.info("listen from %s : %s" % (client_address[0], client_address[1])) except (BlockingIOError, ConnectionResetError): pass vad = VAD(aggressiveness=vad_level, sample_rate=SAMPLE_RATE, max_utt_length=MAX_UTT_LENGTH) oneConnection(client_socket, decoder, vad) #Thread(target=oneThread, args=(client_socket, client_address, decoder)).start() tcpServer.close()
class KaldiWWSpotter(EventEmitter): _default_models = ["/opt/kaldi/model/kaldi-generic-en-tdnn_250", "/opt/kaldi/model/kaldi-generic-de-tdnn_250"] def __init__(self, source=None, volume=None, aggressiveness=None, model_dir=None, lang=None, config=CONFIG): EventEmitter.__init__(self) self.config = config # ensure default values for k in CONFIG["listener"]: if k not in self.config["listener"]: self.config["listener"][k] = CONFIG["listener"][k] volume = volume or self.config["listener"]["default_volume"] aggressiveness = aggressiveness or self.config["listener"][ "default_aggressiveness"] model_dir = model_dir or self.config["listener"]["default_model_dir"] self.lang = lang or self.config["lang"] if "-" in self.lang: self.lang = self.lang.split("-")[0] if "{lang}" in model_dir: model_dir = model_dir.format(lang=self.lang) if not isfile(model_dir): if model_dir in self._default_models: logging.error("you need to install the package: " "kaldi-chain-zamia-speech-{lang}".format( lang=self.lang)) raise ModelNotFound self.rec = PulseRecorder(source_name=source, volume=volume) self.vad = VAD(aggressiveness=aggressiveness) logging.info("Loading model from %s ..." % model_dir) self.asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=model_dir, kaldi_beam=self.config["listener"]["default_beam"], kaldi_acoustic_scale=self.config["listener"][ "default_acoustic_scale"], kaldi_frame_subsampling_factor=self.config["listener"][ "default_frame_subsampling_factor"]) self._hotwords = dict(self.config["hotwords"]) def add_hotword(self, name, config=None): config = config or {"transcriptions": [name], "intent": name} self._hotwords[name] = config def remove_hotword(self, name): if name in self._hotwords.keys(): self._hotwords.pop(name) @property def hotwords(self): return self._hotwords def _detection_event(self, message_type, message_data): serialized_message = json.dumps( {"type": message_type, "data": message_data}) logging.debug(serialized_message) self.emit(message_type, serialized_message) def _process_transcription(self, user_utt, confidence=0.99): for hotw in self.hotwords: if not self.hotwords[hotw].get("active"): continue rule = self.hotwords[hotw].get("rule", "sensitivity") s = 1 - self.hotwords[hotw].get("sensitivity", 0.2) confidence = (confidence + s) / 2 for w in self.hotwords[hotw]["transcriptions"]: if (w in user_utt and rule == "in") or \ (user_utt.startswith(w) and rule == "start") or \ (user_utt.endswith(w) and rule == "end") or \ (fuzzy_match(w, user_utt) >= s and rule == "sensitivity") or \ (w == user_utt and rule == "equal"): yield {"hotword": hotw, "utterance": user_utt, "confidence": confidence, "intent": self.hotwords[hotw]["intent"]} def _detect_ww(self, user_utt, confidence=0.99): for hw_data in self._process_transcription(user_utt, confidence): sound = self.hotwords[hw_data["hotword"]].get("sound") if sound and isfile(sound): play_sound(sound) self._detection_event("hotword", hw_data) def decode_wav_file(self, wav_file): user_utt, confidence = self.asr.decode_wav_file(wav_file) confidence = 1 - exp(-1 * confidence) return user_utt, confidence def wav_file_hotwords(self, wav_file): user_utt, confidence = self.decode_wav_file(wav_file) return list(self._process_transcription(user_utt, confidence)) def run(self): self.rec.start_recording() logging.info("Listening") while True: samples = self.rec.get_samples() audio, finalize = self.vad.process_audio(samples) if not audio: continue logging.debug('decoding audio len=%d finalize=%s audio=%s' % ( len(audio), repr(finalize), audio[0].__class__)) user_utt, confidence = self.asr.decode(audio, finalize, stream_id="mic") confidence = 1 - exp(-1 * confidence) if finalize and user_utt: self._detection_event("transcription", {"utterance": user_utt, "confidence": confidence}) self._detect_ww(user_utt, confidence)
source = options.source volume = options.volume aggressiveness = options.aggressiveness model_dir = options.model_dir # # pulseaudio recorder # rec = PulseRecorder (source_name=source, volume=volume) # # VAD # vad = VAD(aggressiveness=aggressiveness) # # ASR # print "Loading model from %s ..." % model_dir asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir, kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE, kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR) # # main #
VOLUME = 150 class Intent(Enum): HELLO = 1 LIGHT = 2 RADIO = 3 print("Initializing...") radio_on = False lights_on = False asr = ASR(model_dir=MODELDIR) rec = PulseRecorder(volume=VOLUME) vad = VAD() tts = TTS(engine="espeak", voice="en") utt_map = {} def add_utt(utterance, intent): utt_map[utterance] = intent add_utt("hello computer", Intent.HELLO) add_utt("switch on the lights", Intent.LIGHT) add_utt("switch off the lights", Intent.LIGHT) add_utt("switch on the radio", Intent.RADIO) add_utt("switch off the radio", Intent.RADIO)
# # pulseaudio player # misc.message_popup(stdscr, 'Initializing...', 'Init Pulseaudio Player...') player = PulsePlayer('Zamia AI Debugger') paint_main() logging.debug ('PulsePlayer initialized.') # # VAD # misc.message_popup(stdscr, 'Initializing...', 'Init VAD...') vad = VAD(aggressiveness=aggressiveness, sample_rate=SAMPLE_RATE) paint_main() logging.debug ('VAD initialized.') # # setup AI Kernal # misc.message_popup(stdscr, 'Initializing...', 'Init AI Kernal...') kernal = AIKernal(load_all_modules=True) # kernal.setup_tf_model (mode='decode', load_model=True, ini_fn=ai_model) # kernal.setup_align_utterances(lang=lang) paint_main() logging.debug ('AI kernal initialized.') #
rec = PulseRecorder(source, SAMPLE_RATE, volume) logging.debug('PulseRecorder initialized.') # # pulseaudio player # player = PulsePlayer('Zamia AI Debugger') logging.debug('PulsePlayer initialized.') # # VAD # vad = VAD(aggressiveness=aggressiveness, sample_rate=SAMPLE_RATE) logging.debug('VAD initialized.') # # setup AI DB, Kernal and Context # kernal = AIKernal(db_url, xsb_root, toplevel) for mn2 in kernal.all_modules: kernal.consult_module(mn2) kernal.setup_tf_model('decode', True, ai_model) lang = kernal.nlp_model.lang ctx = AIContext(USER_URI, kernal.session, lang, DEMO_REALM,
source = options.source volume = options.volume aggressiveness = options.aggressiveness model_dir = options.model_dir # # pulseaudio recorder # rec = PulseRecorder(source_name=source, volume=volume) # # VAD # vad = VAD(aggressiveness=aggressiveness,max_utt_length=MAX_UTT_LENGTH) # # ASR # print("Loading model from %s ...", model_dir) asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir, kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE, kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR) # # main #
(options, args) = parser.parse_args() url = 'http://%s:%d/decode' % (options.host, options.port) if options.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) logging.getLogger("requests").setLevel(logging.WARNING) source = options.source volume = options.volume aggressiveness = options.aggressiveness rec = PulseRecorder(source_name=source, volume=volume, rate=sampleRate) vad = VAD(aggressiveness=aggressiveness, sample_rate=sampleRate, max_utt_length=MAX_UTT_LENGTH) #main frames = int(sampleRate * BUFFER_DURATION / 1000) #BUFFER_DURATION 30 rec.start_recording(frames_per_buffer=frames) time_start = time() print("Please speak.") total, buff_size, finalize = 0, 0, 0 while True: samples = rec.get_samples() audio, finalize = vad.process_audio(samples) #print(len(samples),total,audio) if not audio:
source = options.source volume = options.volume aggressiveness = options.aggressiveness model_dir = options.model_dir # # pulseaudio recorder # rec = PulseRecorder(source, SAMPLE_RATE, volume) # # VAD # vad = VAD(aggressiveness=AGGRESSIVENESS, sample_rate=SAMPLE_RATE) # # ASR # print("Loading model from %s ..." % MODEL_DIR) asr = KaldiNNet3OnlineModel(MODEL_DIR, MODEL) #, acoustic_scale=ACOUSTIC_SCALE, beam=BEAM, frame_subsampling_factor=FRAME_SUBSAMPLING_FACTOR) print("Loading model from %s, done ..." % MODEL_DIR) # # main # print("Start recording") rec.start_recording(FRAMES_PER_BUFFER)
rec = PulseRecorder (volume=options.mic_volume) logging.debug ('PulseRecorder initialized.') # # pulseaudio player # player = PulsePlayer('Zamia AI Voie Assistant') logging.debug ('PulsePlayer initialized.') # # VAD # vad = VAD() logging.debug ('VAD initialized.') # # setup AI DB, Kernal and Context # kernal = AIKernal.from_ini_file() for skill in kernal.all_skills: kernal.consult_skill (skill) kernal.setup_nlp_model() ctx = kernal.create_context() logging.debug ('AI kernal initialized.') # # ASR
source = options.source volume = options.volume aggressiveness = options.aggressiveness model_dir = options.model_dir # # pulseaudio recorder # #rec = PulseRecorder (source_name=source, volume=volume) # # VAD # vad = VAD(aggressiveness=aggressiveness) # # ASR print "Loading model from %s ..." % model_dir asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir, kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE, kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR) # # main #