def detectKeywords(libpath):

	audio_stream = AudiostreamSource()
	extractor = FeatureExtractor(libpath)
	detector = AudioRecognition(libpath)

	extactor_gain = 1.0
	vad_threshold = 0.2

	keywordVAD = detector.addContinousModel('../../models/Hotword/vad_16.premium')
	bufsize = detector.getInputDataSize()

	print("Audio Recognition Version: " + detector.getVersionString())

	audio_stream.start()
	try:
		while(True):
			frame = audio_stream.read(bufsize*2,bufsize*2)
			if(not frame):
				time.sleep(0.01)
				continue

			features = extractor.signalToMel(frame,extactor_gain)
			_ = detector.runDetection(features)

			vadResult = detector.getContinousResult(keywordVAD)

			if(vadResult[1] > vad_threshold):
				print("Speech detected")

	except KeyboardInterrupt:
		print("Terminating")
		audio_stream.stop()
		sys.exit(0)
Esempio n. 2
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdFirefox = detector.addModel(
        '../../models/Hotword/firefox_v1.4.5.premium', 0.6)
    keywordIdSheila = detector.addModel(
        '../../models/Hotword/sheila_v1.4.5.premium', 0.6)
    keywordIdMarvin = detector.addModel(
        '../../models/Hotword/marvin_v1.4.5.premium', 0.6)
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v1.4.5.premium', 0.6)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signalToMel(frame, extactor_gain)

            prediction = detector.runDetection(features)
            if (prediction != 0):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                if (prediction == keywordIdFirefox):
                    print("Firefox detected:" + now)
                elif (prediction == keywordIdSheila):
                    print("Sheila detected:" + now)
                elif (prediction == keywordIdMarvin):
                    print("Marvin detected:" + now)
                elif (prediction == keywordIdAlexa):
                    print("Alexa detected:" + now)

                os.system(play_command + " ../resources/ding.wav")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
#!/usr/bin/env python

import socket
import sys
import os
sys.path.append('../../python/src')

from libnyumaya import AudioRecognition
from auto_platform import default_libpath

serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serversocket.bind(('', 9999))
serversocket.listen(5)  # become a server socket, maximum 5 connections

detector = AudioRecognition(default_libpath)
keywordIdFirefox = detector.addModel(
    '../../models/Hotword/firefox_v2.0.23.premium', 0.8)

connection, address = serversocket.accept()

while True:
    buf = connection.recv(640)
    if len(buf) > 0:
        prediction = detector.runDetection(buf)
        if (prediction != 0):
            print("Keyword detected")
def recordActivations(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)

    detectors = {}
    framebuffersFront = {}
    framebuffersBack = {}

    extactor_gain = 1.0
    recordBefore = 2.5  # Seconds before the activation
    recordAfter = 0.5  # Seconds after the activation

    activationCount = 0
    ensure_dir(saveDirectory)

    rbFrontSize = int(recordBefore * bytesPerSample * framesPerSecond)
    rbBackSize = int(recordAfter * bytesPerSample * framesPerSecond)

    for mpath, msens, mname in models:
        detector = AudioRecognition(libpath)
        detector.addModel(mpath, msens)
        detectors[mname] = detector
        framebuffersFront[mname] = bytearray()
        framebuffersBack[mname] = bytearray()

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)

            if (not frame):
                time.sleep(0.01)
                continue

            for mname in detectors:
                #Fill audio before the activation
                framebuffersFront[mname] = framebuffersFront[mname] + frame
                if (len(framebuffersFront[mname]) > rbFrontSize):
                    framebuffersFront[mname] = framebuffersFront[mname][
                        -rbFrontSize:]

            features = extractor.signalToMel(frame, extactor_gain)

            for mname in detectors:
                detector = detectors[mname]
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    #FIXME: Record after is currently ignored
                    #Fill audio after the activation
                    #while(len(framebuffersBack[mname]) < rbBackSize):
                    #	frame = audio_stream.read(bufsize*2,bufsize*2)
                    #	if(not frame):
                    #		time.sleep(0.01)
                    #		continue
                    #	framebuffersBack[mname] = framebuffersBack[mname] + frame

                    savePath = saveDirectory + "/activation_{}_{}_{}.wav".format(
                        mname, activationCount, time.time_ns())
                    save_wav(framebuffersFront[mname], savePath)
                    #save_wav(framebufferFront+framebufferBack,savePath)
                    print("Saving Activation to {}".format(savePath))
                    activationCount += 1

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Esempio n. 5
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    framerate = 16000
    model = Model("model")

    #Let's define a custom dictionary
    rec = KaldiRecognizer(
        model, framerate,
        '["oh one two three four five six seven eight nine zero", "[unk]"]')

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v3.0.35.premium', 0.85)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    command_started = False

    audio_stream.start()
    try:
        while (True):
            # Wakeword loop
            if (not command_started):
                frame = audio_stream.read(bufsize * 2, bufsize * 2)
                if (not frame):
                    time.sleep(0.01)
                    continue

                features = extractor.signalToMel(frame, extactor_gain)
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                    if (prediction == keywordIdAlexa):
                        print("Alexa detected:" + now)

                    os.system(play_command + " ../resources/ding.wav")
                    command_started = True
            # vosk loop
            else:
                frame = audio_stream.read(4000, 4000)
                if (not frame):
                    time.sleep(0.01)
                    continue

                if rec.AcceptWaveform(bytes(frame)):
                    print(rec.Result())
                    command_started = False
                    print(rec.FinalResult())

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
class MultiDetector():
    def __init__(self, libpath, timeout=40):
        self.current_index = 0
        self.number_detectors = 0
        self.countdown = 0
        self.timeout = timeout
        self.detector = None
        self.commands = []
        self.libpath = libpath
        self.history = []
        self.last_frames = []
        self.max_last_frames = 5
        self.detector = AudioRecognition(self.libpath)
        self.keyword_map = {}

    #Given the current history which words are we checking for?
    def get_possible_words(self, history):
        words = []
        for cmd in self.commands:
            index = command_starts_with_history(cmd['command'], history)

            if (index >= len(cmd['command'])):
                print("Error index out of range:")
                print("Command: " + str(cmd))
                print("Index: " + str(index))
                print("History: " + str(history))
                return []

            if (index >= 0):
                cmd = cmd['command'][index]
                if (not cmd in words):
                    words.append(cmd)
        return words

    def UpdateLastFrames(self, frame):
        self.last_frames.append(frame)
        if len(self.last_frames) > self.max_last_frames:
            self.last_frames.pop(0)

    def add_command(self, command, callback_function):
        if (len(command.split(",")) == 0):
            print("No valid command")
            return

        self.commands.append({
            'command': command.split(","),
            'function': callback_function
        })
        self.update_word_and_detector()

    def add_word(self, graph, name, sensitivity):
        keywordId = self.detector.addModel(graph, sensitivity)
        self.keyword_map[keywordId] = name

    def add_reset_history_callback(self, callback_function):
        self.history_callback = callback_function

    def add_detected_callback(self, callback_function):
        self.detected_callback = callback_function

    def GetInputDataSize(self):
        return self.detector.getInputDataSize()

    def maby_execute(self):
        executed_cmd = False
        for cmd in self.commands:
            if (cmd['command'] == self.history):
                cmd['function']()
                self.history = []
                self.countdown = 0
                self.last_frames = []
                executed_cmd = True

        return executed_cmd

    def check_timeout(self):

        if (self.countdown > 0):
            self.countdown -= 1
            if (self.countdown == 0):
                self.history = []
                self.update_word_and_detector()
                if (self.history_callback):
                    self.history_callback()

    def update_word_and_detector(self):
        self.possible_words = self.get_possible_words(self.history)
        #Set possible words active
        #Set impossible words inactive
        print(self.possible_words)
        for id in self.keyword_map:
            key = self.keyword_map[id]
            if (key in self.possible_words):
                self.detector.setActive(id, True)
            else:
                self.detector.setActive(id, False)

    def run_frame(self, frame, update_frames=True):

        if (update_frames):
            self.UpdateLastFrames(frame)

        self.check_timeout()

        prediction = self.detector.runDetection(frame)
        if (prediction):
            label = self.keyword_map[prediction]
            if (label in self.possible_words):
                print("Got prediction: " + label)
                self.countdown = self.timeout
                self.history.append(label)
                result = self.maby_execute()
                self.update_word_and_detector()

                if (self.detected_callback):
                    self.detected_callback()

                #Command hasn't finished so run last frames in next detectors
                if (not result):
                    self.run_last_frames()

    def run_last_frames(self):
        for frame in self.last_frames:
            self.run_frame(frame, update_frames=False)

    def print_commands(self):
        for cmd in self.commands:
            print(cmd)