Example #1
0
def label_stream(libpath):

	audio_stream = AudiostreamSource()

	extractor = FeatureExtractor(libpath)
	extactor_gain=16.0

	#FIXME: This is just used for bufsize
	detector = AudioRecognition(default_libpath)
	bufsize = detector.getInputDataSize()

	audio_stream.start()
	try:
		while(True):
			frame = audio_stream.read(bufsize*2,bufsize*2)
			if(not frame):
				time.sleep(0.01)
				continue

			features = extractor.signalToMel(frame,extactor_gain)
			send_features(features)

	except KeyboardInterrupt:
		print("Terminating")
		audio_stream.stop()
		sys.exit(0)
Example #2
0
def label_stream(labels, libpath, graph, sensitivity):

    audio_stream = AudiostreamSource()
    detector = AudioRecognition(libpath, graph, labels)

    detector.SetSensitivity(sensitivity)
    detector.SetGain(1)
    detector.RemoveDC(False)

    bufsize = detector.GetInputDataSize()

    play_command = "play -q" if platform.system() == "Darwin" else "aplay"

    print("Audio Recognition Version: " + detector.GetVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize, bufsize)
            if (not frame):
                time.sleep(0.01)
                continue

            prediction = detector.RunDetection(frame)

            if (prediction):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                print(detector.GetPredictionLabel(prediction) + " " + now)
                os.system(play_command + " ./ding.wav")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
 def __init__(self, libpath, timeout=40):
     self.current_index = 0
     self.number_detectors = 0
     self.countdown = 0
     self.timeout = timeout
     self.detector = None
     self.commands = []
     self.libpath = libpath
     self.history = []
     self.last_frames = []
     self.max_last_frames = 5
     self.detector = AudioRecognition(self.libpath)
     self.keyword_map = {}
def detectKeywords(libpath):

	audio_stream = AudiostreamSource()
	extractor = FeatureExtractor(libpath)
	detector = AudioRecognition(libpath)

	extactor_gain = 1.0
	vad_threshold = 0.2

	keywordVAD = detector.addContinousModel('../../models/Hotword/vad_16.premium')
	bufsize = detector.getInputDataSize()

	print("Audio Recognition Version: " + detector.getVersionString())

	audio_stream.start()
	try:
		while(True):
			frame = audio_stream.read(bufsize*2,bufsize*2)
			if(not frame):
				time.sleep(0.01)
				continue

			features = extractor.signalToMel(frame,extactor_gain)
			_ = detector.runDetection(features)

			vadResult = detector.getContinousResult(keywordVAD)

			if(vadResult[1] > vad_threshold):
				print("Speech detected")

	except KeyboardInterrupt:
		print("Terminating")
		audio_stream.stop()
		sys.exit(0)
def label_stream(labels, libpath, graph, sensitivity):

    audio_stream = AudiostreamSource()

    extractor = FeatureExtractor(libpath)
    extactor_gain = 1.0

    detector = AudioRecognition(libpath, graph, labels)
    detector.SetSensitivity(sensitivity)

    bufsize = detector.GetInputDataSize()

    print("Audio Recognition Version: " + detector.GetVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signal_to_mel(frame, extactor_gain)

            prediction = detector.RunDetection(features)

            if (prediction):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                print(detector.GetPredictionLabel(prediction) + " " + now)
                os.system(play_command + " ./resources/ding.wav")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Example #6
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdFirefox = detector.addModel(
        '../../models/Hotword/firefox_v1.4.5.premium', 0.6)
    keywordIdSheila = detector.addModel(
        '../../models/Hotword/sheila_v1.4.5.premium', 0.6)
    keywordIdMarvin = detector.addModel(
        '../../models/Hotword/marvin_v1.4.5.premium', 0.6)
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v1.4.5.premium', 0.6)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signalToMel(frame, extactor_gain)

            prediction = detector.runDetection(features)
            if (prediction != 0):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                if (prediction == keywordIdFirefox):
                    print("Firefox detected:" + now)
                elif (prediction == keywordIdSheila):
                    print("Sheila detected:" + now)
                elif (prediction == keywordIdMarvin):
                    print("Marvin detected:" + now)
                elif (prediction == keywordIdAlexa):
                    print("Alexa detected:" + now)

                os.system(play_command + " ../resources/ding.wav")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
#!/usr/bin/env python

import socket
import sys
import os
sys.path.append('../../python/src')

from libnyumaya import AudioRecognition
from auto_platform import default_libpath

serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
serversocket.bind(('', 9999))
serversocket.listen(5)  # become a server socket, maximum 5 connections

detector = AudioRecognition(default_libpath)
keywordIdFirefox = detector.addModel(
    '../../models/Hotword/firefox_v2.0.23.premium', 0.8)

connection, address = serversocket.accept()

while True:
    buf = connection.recv(640)
    if len(buf) > 0:
        prediction = detector.runDetection(buf)
        if (prediction != 0):
            print("Keyword detected")
def recordActivations(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)

    detectors = {}
    framebuffersFront = {}
    framebuffersBack = {}

    extactor_gain = 1.0
    recordBefore = 2.5  # Seconds before the activation
    recordAfter = 0.5  # Seconds after the activation

    activationCount = 0
    ensure_dir(saveDirectory)

    rbFrontSize = int(recordBefore * bytesPerSample * framesPerSecond)
    rbBackSize = int(recordAfter * bytesPerSample * framesPerSecond)

    for mpath, msens, mname in models:
        detector = AudioRecognition(libpath)
        detector.addModel(mpath, msens)
        detectors[mname] = detector
        framebuffersFront[mname] = bytearray()
        framebuffersBack[mname] = bytearray()

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)

            if (not frame):
                time.sleep(0.01)
                continue

            for mname in detectors:
                #Fill audio before the activation
                framebuffersFront[mname] = framebuffersFront[mname] + frame
                if (len(framebuffersFront[mname]) > rbFrontSize):
                    framebuffersFront[mname] = framebuffersFront[mname][
                        -rbFrontSize:]

            features = extractor.signalToMel(frame, extactor_gain)

            for mname in detectors:
                detector = detectors[mname]
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    #FIXME: Record after is currently ignored
                    #Fill audio after the activation
                    #while(len(framebuffersBack[mname]) < rbBackSize):
                    #	frame = audio_stream.read(bufsize*2,bufsize*2)
                    #	if(not frame):
                    #		time.sleep(0.01)
                    #		continue
                    #	framebuffersBack[mname] = framebuffersBack[mname] + frame

                    savePath = saveDirectory + "/activation_{}_{}_{}.wav".format(
                        mname, activationCount, time.time_ns())
                    save_wav(framebuffersFront[mname], savePath)
                    #save_wav(framebufferFront+framebufferBack,savePath)
                    print("Saving Activation to {}".format(savePath))
                    activationCount += 1

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Example #9
0
	parser.add_argument(
		'--good_folder', type=str, default='./good_files/', help='Path to good files.')
	parser.add_argument(
		'--noise_folders', type=str, default='./demand/', help='Path to noise files.')
	parser.add_argument(
		'--bad_folders', type=str, default='', help='Path to additional bad folders seperated by comma.')
	parser.add_argument(
		'--libpath', type=str, default='../lib/linux/libnyumaya.so', help='Path to nyumaya_library')


	FLAGS, unparsed = parser.parse_known_args()


	sensitivities = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,0.95,0.99]

	detector = AudioRecognition(FLAGS.libpath,FLAGS.graph,FLAGS.labels)
	addnoise = [False,True]
	results_clean = []
	results_noisy = []
	results_false = []
	print(FLAGS.graph + "\n")
	for noise in addnoise:

		for sensitivity in sensitivities:
			wrong_predictions, good_predictions,missed_predictions,samples = run_good_predictions(detector,FLAGS.good_folder,FLAGS.noise_folders,noise,sensitivity)

			result = {}
			result["sensitivity"] = sensitivity
			result["accuracy"] = 1-(missed_predictions+wrong_predictions)/samples
			if(noise):
				results_noisy.append(result)
	def add_detector(self,graph,labels,sensitivity):
		detector = AudioRecognition(self.libpath,graph,labels)
		detector.SetSensitivity(sensitivity)
		self.detectors.append(detector)
def label_stream():

    hotword_detected = False
    countdown = 0

    audio_stream = AudiostreamSource()

    action_detector = AudioRecognition(libpath, action_graph, action_labels)
    hotword_detector = AudioRecognition(libpath, hotword_graph, hotword_labels)
    #
    #action_detector = hotword_detector

    hotword_detector.SetSensitivity(0.5)
    action_detector.SetSensitivity(0.55)
    bufsize = hotword_detector.GetInputDataSize()
    audio_stream.start()

    print("Audio Recognition Version: " + hotword_detector.GetVersionString())
    try:
        while (True):
            frame = audio_stream.read(bufsize, bufsize)

            if (not frame):
                time.sleep(0.01)
                continue

            if (countdown > 0):
                countdown -= 1
                if (countdown == 0):
                    hotword_detected = False
                    print("Stopped Listening")

            if (not hotword_detected):
                prediction = hotword_detector.RunDetection(frame)
                print(hotword_detector.GetPredictionLabel(prediction))
                if (prediction
                        and hotword_detector.GetPredictionLabel(prediction)
                        == 'light'):
                    hotword_detected = True
                    countdown = 20
                    now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                    print("Listening")
            else:
                prediction = action_detector.RunDetection(frame)
                if (prediction):
                    label = action_detector.GetPredictionLabel(prediction)

                    if (label == "on"):
                        print("Turning light on")

                    if (label == "off"):
                        print("Turning light off")

                    countdown = 0
                    hotword_detected = False

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Example #12
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    framerate = 16000
    model = Model("model")

    #Let's define a custom dictionary
    rec = KaldiRecognizer(
        model, framerate,
        '["oh one two three four five six seven eight nine zero", "[unk]"]')

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v3.0.35.premium', 0.85)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    command_started = False

    audio_stream.start()
    try:
        while (True):
            # Wakeword loop
            if (not command_started):
                frame = audio_stream.read(bufsize * 2, bufsize * 2)
                if (not frame):
                    time.sleep(0.01)
                    continue

                features = extractor.signalToMel(frame, extactor_gain)
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                    if (prediction == keywordIdAlexa):
                        print("Alexa detected:" + now)

                    os.system(play_command + " ../resources/ding.wav")
                    command_started = True
            # vosk loop
            else:
                frame = audio_stream.read(4000, 4000)
                if (not frame):
                    time.sleep(0.01)
                    continue

                if rec.AcceptWaveform(bytes(frame)):
                    print(rec.Result())
                    command_started = False
                    print(rec.FinalResult())

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
class MultiDetector():
    def __init__(self, libpath, timeout=40):
        self.current_index = 0
        self.number_detectors = 0
        self.countdown = 0
        self.timeout = timeout
        self.detector = None
        self.commands = []
        self.libpath = libpath
        self.history = []
        self.last_frames = []
        self.max_last_frames = 5
        self.detector = AudioRecognition(self.libpath)
        self.keyword_map = {}

    #Given the current history which words are we checking for?
    def get_possible_words(self, history):
        words = []
        for cmd in self.commands:
            index = command_starts_with_history(cmd['command'], history)

            if (index >= len(cmd['command'])):
                print("Error index out of range:")
                print("Command: " + str(cmd))
                print("Index: " + str(index))
                print("History: " + str(history))
                return []

            if (index >= 0):
                cmd = cmd['command'][index]
                if (not cmd in words):
                    words.append(cmd)
        return words

    def UpdateLastFrames(self, frame):
        self.last_frames.append(frame)
        if len(self.last_frames) > self.max_last_frames:
            self.last_frames.pop(0)

    def add_command(self, command, callback_function):
        if (len(command.split(",")) == 0):
            print("No valid command")
            return

        self.commands.append({
            'command': command.split(","),
            'function': callback_function
        })
        self.update_word_and_detector()

    def add_word(self, graph, name, sensitivity):
        keywordId = self.detector.addModel(graph, sensitivity)
        self.keyword_map[keywordId] = name

    def add_reset_history_callback(self, callback_function):
        self.history_callback = callback_function

    def add_detected_callback(self, callback_function):
        self.detected_callback = callback_function

    def GetInputDataSize(self):
        return self.detector.getInputDataSize()

    def maby_execute(self):
        executed_cmd = False
        for cmd in self.commands:
            if (cmd['command'] == self.history):
                cmd['function']()
                self.history = []
                self.countdown = 0
                self.last_frames = []
                executed_cmd = True

        return executed_cmd

    def check_timeout(self):

        if (self.countdown > 0):
            self.countdown -= 1
            if (self.countdown == 0):
                self.history = []
                self.update_word_and_detector()
                if (self.history_callback):
                    self.history_callback()

    def update_word_and_detector(self):
        self.possible_words = self.get_possible_words(self.history)
        #Set possible words active
        #Set impossible words inactive
        print(self.possible_words)
        for id in self.keyword_map:
            key = self.keyword_map[id]
            if (key in self.possible_words):
                self.detector.setActive(id, True)
            else:
                self.detector.setActive(id, False)

    def run_frame(self, frame, update_frames=True):

        if (update_frames):
            self.UpdateLastFrames(frame)

        self.check_timeout()

        prediction = self.detector.runDetection(frame)
        if (prediction):
            label = self.keyword_map[prediction]
            if (label in self.possible_words):
                print("Got prediction: " + label)
                self.countdown = self.timeout
                self.history.append(label)
                result = self.maby_execute()
                self.update_word_and_detector()

                if (self.detected_callback):
                    self.detected_callback()

                #Command hasn't finished so run last frames in next detectors
                if (not result):
                    self.run_last_frames()

    def run_last_frames(self):
        for frame in self.last_frames:
            self.run_frame(frame, update_frames=False)

    def print_commands(self):
        for cmd in self.commands:
            print(cmd)
Example #14
0
def label_stream(labels, libpath, verification_path, graph, sensitivity):
    last_frames = []

    #Keyword spotting has 200ms frames, Verifiyer takes 2 seconds of audio
    max_last_frames = 10

    audio_stream = AudiostreamSource()

    extractor = FeatureExtractor(libpath)

    detector = AudioRecognition(libpath, graph, labels)
    detector.SetSensitivity(sensitivity)

    verifiyer = SpeakerVerification(libpath, verification_path)

    bufsize = detector.GetInputDataSize()

    print("Bufsize: " + str(bufsize))

    print("Audio Recognition Version: " + detector.GetVersionString())

    print(
        "WARNING EXPERIMENTAL: The voice verification module can be use to verify if"
    )
    print(
        "A command is issued by a certian speaker. It processes speech signals with a"
    )
    print("two second length. This experimental version isn't very good yet.")

    print(
        "\n\n During enrolling a fingerprint of your voice is caputred. By default 5 samples"
    )
    print(
        "Will be captured and averaged. The progam will output a similarity score between 0 and 1"
    )
    print("A value of 1 means totally similar, 0 means different.")

    print("Currently a threshold of 0.95 seems good")

    print(
        "This module should not be run on a Pi Zero, as it uses excessive CPU")
    print(
        "Verification can also be helpful to reduce false positives of non speech signals"
    )

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signal_to_mel(frame)

            last_frames.append(features)
            if len(last_frames) > max_last_frames:
                last_frames.pop(0)

            prediction = detector.RunDetection(features)

            if (prediction):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                print(detector.GetPredictionLabel(prediction) + " " + now)
                os.system(play_command + " ./resources/ding.wav")

                detect_frame = bytearray()
                for element in last_frames:
                    detect_frame.extend(element)

                print("Running Verification")

                features = verifiyer.VerifySpeaker(detect_frame)

                if (len(fingerprints) < enrolling):
                    print("Enrolling")
                    fingerprints.append(features)
                else:
                    print("Completed")

                print(features)

                avg_fingerprint = get_averaged_fingerprint()

                if (avg_fingerprint):
                    similarity_score = cosine_similarity(
                        features, avg_fingerprint)
                    print("Similarity: " + str(similarity_score))

                print("Verification Done")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)