Example #1
0
    def __init__(self,
                 source=None,
                 volume=None,
                 aggressiveness=None,
                 model_dir=None,
                 lang=None,
                 config=CONFIG):
        EventEmitter.__init__(self)
        self.config = config

        # ensure default values
        for k in CONFIG["listener"]:
            if k not in self.config["listener"]:
                self.config["listener"][k] = CONFIG["listener"][k]

        volume = volume or self.config["listener"]["default_volume"]
        aggressiveness = aggressiveness or self.config["listener"][
            "default_aggressiveness"]
        model_dir = model_dir or self.config["listener"]["default_model_dir"]
        self.lang = lang or self.config["lang"]
        if "-" in self.lang:
            self.lang = self.lang.split("-")[0]

        if "{lang}" in model_dir:
            model_dir = model_dir.format(lang=self.lang)

        if not isdir(model_dir):
            if model_dir in self._default_models:
                logging.error(
                    "you need to install the package: "
                    "kaldi-chain-zamia-speech-{lang}".format(lang=self.lang))
            raise ModelNotFound

        self.rec = PulseRecorder(source_name=source, volume=volume)
        self.vad = VAD(aggressiveness=aggressiveness)
        logging.info("Loading model from %s ..." % model_dir)

        self.asr = ASR(engine=ASR_ENGINE_NNET3,
                       model_dir=model_dir,
                       kaldi_beam=self.config["listener"]["default_beam"],
                       kaldi_acoustic_scale=self.config["listener"]
                       ["default_acoustic_scale"],
                       kaldi_frame_subsampling_factor=self.config["listener"]
                       ["default_frame_subsampling_factor"])
        self._hotwords = dict(self.config["hotwords"])
def server_start(decoder):
    tcpServer = socket.socket(socket.AF_INET, socket.SOCK_STREAM)  #TCP
    tcpServer.bind(ADDRESS)
    # 自动恢复监听
    while True:
        try:
            tcpServer.listen(1)  # 排队数
            # tcpServer.accept()返回一个元组, 元素1为客户端的socket对象, 元素2为客户端的地址(ip地址,端口号)
            client_socket, client_address = tcpServer.accept()
            logging.info("listen from %s : %s" %
                         (client_address[0], client_address[1]))
        except (BlockingIOError, ConnectionResetError):
            pass
        vad = VAD(aggressiveness=vad_level,
                  sample_rate=SAMPLE_RATE,
                  max_utt_length=MAX_UTT_LENGTH)
        oneConnection(client_socket, decoder, vad)
        #Thread(target=oneThread, args=(client_socket, client_address, decoder)).start()
    tcpServer.close()
Example #3
0
class KaldiWWSpotter(EventEmitter):
    _default_models = ["/opt/kaldi/model/kaldi-generic-en-tdnn_250",
                       "/opt/kaldi/model/kaldi-generic-de-tdnn_250"]

    def __init__(self, source=None, volume=None, aggressiveness=None,
                 model_dir=None, lang=None, config=CONFIG):
        EventEmitter.__init__(self)
        self.config = config

        # ensure default values
        for k in CONFIG["listener"]:
            if k not in self.config["listener"]:
                self.config["listener"][k] = CONFIG["listener"][k]

        volume = volume or self.config["listener"]["default_volume"]
        aggressiveness = aggressiveness or self.config["listener"][
            "default_aggressiveness"]
        model_dir = model_dir or self.config["listener"]["default_model_dir"]
        self.lang = lang or self.config["lang"]
        if "-" in self.lang:
            self.lang = self.lang.split("-")[0]

        if "{lang}" in model_dir:
            model_dir = model_dir.format(lang=self.lang)

        if not isfile(model_dir):
            if model_dir in self._default_models:
                logging.error("you need to install the package: "
                              "kaldi-chain-zamia-speech-{lang}".format(
                    lang=self.lang))
            raise ModelNotFound

        self.rec = PulseRecorder(source_name=source, volume=volume)
        self.vad = VAD(aggressiveness=aggressiveness)
        logging.info("Loading model from %s ..." % model_dir)

        self.asr = ASR(engine=ASR_ENGINE_NNET3, model_dir=model_dir,
                       kaldi_beam=self.config["listener"]["default_beam"],
                       kaldi_acoustic_scale=self.config["listener"][
                           "default_acoustic_scale"],
                       kaldi_frame_subsampling_factor=self.config["listener"][
                           "default_frame_subsampling_factor"])
        self._hotwords = dict(self.config["hotwords"])

    def add_hotword(self, name, config=None):
        config = config or {"transcriptions": [name], "intent": name}
        self._hotwords[name] = config

    def remove_hotword(self, name):
        if name in self._hotwords.keys():
            self._hotwords.pop(name)

    @property
    def hotwords(self):
        return self._hotwords

    def _detection_event(self, message_type, message_data):
        serialized_message = json.dumps(
            {"type": message_type, "data": message_data})
        logging.debug(serialized_message)
        self.emit(message_type, serialized_message)

    def _process_transcription(self, user_utt, confidence=0.99):
        for hotw in self.hotwords:
            if not self.hotwords[hotw].get("active"):
                continue
            rule = self.hotwords[hotw].get("rule", "sensitivity")
            s = 1 - self.hotwords[hotw].get("sensitivity", 0.2)
            confidence = (confidence + s) / 2
            for w in self.hotwords[hotw]["transcriptions"]:

                if (w in user_utt and rule == "in") or \
                        (user_utt.startswith(w) and rule == "start") or \
                        (user_utt.endswith(w) and rule == "end") or \
                        (fuzzy_match(w,
                                     user_utt) >= s and rule == "sensitivity") or \
                        (w == user_utt and rule == "equal"):
                    yield {"hotword": hotw,
                           "utterance": user_utt,
                           "confidence": confidence,
                           "intent": self.hotwords[hotw]["intent"]}

    def _detect_ww(self, user_utt, confidence=0.99):
        for hw_data in self._process_transcription(user_utt, confidence):
            sound = self.hotwords[hw_data["hotword"]].get("sound")
            if sound and isfile(sound):
                play_sound(sound)
            self._detection_event("hotword", hw_data)

    def decode_wav_file(self, wav_file):
        user_utt, confidence = self.asr.decode_wav_file(wav_file)
        confidence = 1 - exp(-1 * confidence)
        return user_utt, confidence

    def wav_file_hotwords(self, wav_file):
        user_utt, confidence = self.decode_wav_file(wav_file)
        return list(self._process_transcription(user_utt, confidence))

    def run(self):

        self.rec.start_recording()
        logging.info("Listening")

        while True:

            samples = self.rec.get_samples()

            audio, finalize = self.vad.process_audio(samples)

            if not audio:
                continue

            logging.debug('decoding audio len=%d finalize=%s audio=%s' % (
                len(audio), repr(finalize), audio[0].__class__))

            user_utt, confidence = self.asr.decode(audio, finalize,
                                                   stream_id="mic")
            confidence = 1 - exp(-1 * confidence)
            if finalize and user_utt:
                self._detection_event("transcription",
                                      {"utterance": user_utt,
                                       "confidence": confidence})
                self._detect_ww(user_utt, confidence)
Example #4
0
source         = options.source
volume         = options.volume
aggressiveness = options.aggressiveness
model_dir      = options.model_dir

#
# pulseaudio recorder
#

rec = PulseRecorder (source_name=source, volume=volume)

#
# VAD
#

vad = VAD(aggressiveness=aggressiveness)

#
# ASR
#

print "Loading model from %s ..." % model_dir

asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir,
          kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE,
          kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR)


#
# main
#
Example #5
0
VOLUME = 150


class Intent(Enum):
    HELLO = 1
    LIGHT = 2
    RADIO = 3


print("Initializing...")

radio_on = False
lights_on = False
asr = ASR(model_dir=MODELDIR)
rec = PulseRecorder(volume=VOLUME)
vad = VAD()
tts = TTS(engine="espeak", voice="en")

utt_map = {}


def add_utt(utterance, intent):
    utt_map[utterance] = intent


add_utt("hello computer", Intent.HELLO)
add_utt("switch on the lights", Intent.LIGHT)
add_utt("switch off the lights", Intent.LIGHT)
add_utt("switch on the radio", Intent.RADIO)
add_utt("switch off the radio", Intent.RADIO)
Example #6
0
    #
    # pulseaudio player
    #

    misc.message_popup(stdscr, 'Initializing...', 'Init Pulseaudio Player...')
    player = PulsePlayer('Zamia AI Debugger')
    paint_main()
    logging.debug ('PulsePlayer initialized.')

    #
    # VAD
    #

    misc.message_popup(stdscr, 'Initializing...', 'Init VAD...')
    vad = VAD(aggressiveness=aggressiveness, sample_rate=SAMPLE_RATE)
    paint_main()
    logging.debug ('VAD initialized.')

    #
    # setup AI Kernal
    #

    misc.message_popup(stdscr, 'Initializing...', 'Init AI Kernal...')
    kernal = AIKernal(load_all_modules=True)
    # kernal.setup_tf_model (mode='decode', load_model=True, ini_fn=ai_model)
    # kernal.setup_align_utterances(lang=lang)
    paint_main()
    logging.debug ('AI kernal initialized.')

    #
Example #7
0
rec = PulseRecorder(source, SAMPLE_RATE, volume)
logging.debug('PulseRecorder initialized.')

#
# pulseaudio player
#

player = PulsePlayer('Zamia AI Debugger')
logging.debug('PulsePlayer initialized.')

#
# VAD
#

vad = VAD(aggressiveness=aggressiveness, sample_rate=SAMPLE_RATE)
logging.debug('VAD initialized.')

#
# setup AI DB, Kernal and Context
#

kernal = AIKernal(db_url, xsb_root, toplevel)
for mn2 in kernal.all_modules:
    kernal.consult_module(mn2)
kernal.setup_tf_model('decode', True, ai_model)
lang = kernal.nlp_model.lang
ctx = AIContext(USER_URI,
                kernal.session,
                lang,
                DEMO_REALM,
Example #8
0
source         = options.source
volume         = options.volume
aggressiveness = options.aggressiveness
model_dir      = options.model_dir

#
# pulseaudio recorder
#

rec = PulseRecorder(source_name=source, volume=volume)

#
# VAD
#

vad = VAD(aggressiveness=aggressiveness,max_utt_length=MAX_UTT_LENGTH)

#
# ASR
#

print("Loading model from %s ...", model_dir)

asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir,
          kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE,
          kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR)


#
# main
#
Example #9
0
(options, args) = parser.parse_args()
url = 'http://%s:%d/decode' % (options.host, options.port)

if options.verbose:
    logging.basicConfig(level=logging.DEBUG)
else:
    logging.basicConfig(level=logging.INFO)
logging.getLogger("requests").setLevel(logging.WARNING)

source = options.source
volume = options.volume
aggressiveness = options.aggressiveness

rec = PulseRecorder(source_name=source, volume=volume, rate=sampleRate)
vad = VAD(aggressiveness=aggressiveness,
          sample_rate=sampleRate,
          max_utt_length=MAX_UTT_LENGTH)

#main
frames = int(sampleRate * BUFFER_DURATION / 1000)  #BUFFER_DURATION 30
rec.start_recording(frames_per_buffer=frames)

time_start = time()
print("Please speak.")
total, buff_size, finalize = 0, 0, 0
while True:

    samples = rec.get_samples()
    audio, finalize = vad.process_audio(samples)
    #print(len(samples),total,audio)
    if not audio:
Example #10
0
source = options.source
volume = options.volume
aggressiveness = options.aggressiveness
model_dir = options.model_dir

#
# pulseaudio recorder
#

rec = PulseRecorder(source, SAMPLE_RATE, volume)

#
# VAD
#

vad = VAD(aggressiveness=AGGRESSIVENESS, sample_rate=SAMPLE_RATE)

#
# ASR
#

print("Loading model from %s ..." % MODEL_DIR)
asr = KaldiNNet3OnlineModel(MODEL_DIR, MODEL)
#, acoustic_scale=ACOUSTIC_SCALE, beam=BEAM, frame_subsampling_factor=FRAME_SUBSAMPLING_FACTOR)
print("Loading model from %s, done ..." % MODEL_DIR)
#
# main
#

print("Start recording")
rec.start_recording(FRAMES_PER_BUFFER)
Example #11
0
rec = PulseRecorder (volume=options.mic_volume)
logging.debug ('PulseRecorder initialized.')

#
# pulseaudio player
#

player = PulsePlayer('Zamia AI Voie Assistant')
logging.debug ('PulsePlayer initialized.')

#
# VAD
#

vad = VAD()
logging.debug ('VAD initialized.')

#
# setup AI DB, Kernal and Context
#

kernal = AIKernal.from_ini_file()
for skill in kernal.all_skills:
    kernal.consult_skill (skill)
kernal.setup_nlp_model()
ctx  = kernal.create_context()
logging.debug ('AI kernal initialized.')

#
# ASR
source         = options.source
volume         = options.volume
aggressiveness = options.aggressiveness
model_dir      = options.model_dir

#
# pulseaudio recorder
#

#rec = PulseRecorder (source_name=source, volume=volume)

#
# VAD
#

vad = VAD(aggressiveness=aggressiveness)

#
# ASR


print "Loading model from %s ..." % model_dir

asr = ASR(engine = ASR_ENGINE_NNET3, model_dir = model_dir,
          kaldi_beam = DEFAULT_BEAM, kaldi_acoustic_scale = DEFAULT_ACOUSTIC_SCALE,
          kaldi_frame_subsampling_factor = DEFAULT_FRAME_SUBSAMPLING_FACTOR)


#
# main
#