def detectKeywords(libpath):

	audio_stream = AudiostreamSource()
	extractor = FeatureExtractor(libpath)
	detector = AudioRecognition(libpath)

	extactor_gain = 1.0
	vad_threshold = 0.2

	keywordVAD = detector.addContinousModel('../../models/Hotword/vad_16.premium')
	bufsize = detector.getInputDataSize()

	print("Audio Recognition Version: " + detector.getVersionString())

	audio_stream.start()
	try:
		while(True):
			frame = audio_stream.read(bufsize*2,bufsize*2)
			if(not frame):
				time.sleep(0.01)
				continue

			features = extractor.signalToMel(frame,extactor_gain)
			_ = detector.runDetection(features)

			vadResult = detector.getContinousResult(keywordVAD)

			if(vadResult[1] > vad_threshold):
				print("Speech detected")

	except KeyboardInterrupt:
		print("Terminating")
		audio_stream.stop()
		sys.exit(0)
Esempio n. 2
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdFirefox = detector.addModel(
        '../../models/Hotword/firefox_v1.4.5.premium', 0.6)
    keywordIdSheila = detector.addModel(
        '../../models/Hotword/sheila_v1.4.5.premium', 0.6)
    keywordIdMarvin = detector.addModel(
        '../../models/Hotword/marvin_v1.4.5.premium', 0.6)
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v1.4.5.premium', 0.6)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signalToMel(frame, extactor_gain)

            prediction = detector.runDetection(features)
            if (prediction != 0):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                if (prediction == keywordIdFirefox):
                    print("Firefox detected:" + now)
                elif (prediction == keywordIdSheila):
                    print("Sheila detected:" + now)
                elif (prediction == keywordIdMarvin):
                    print("Marvin detected:" + now)
                elif (prediction == keywordIdAlexa):
                    print("Alexa detected:" + now)

                os.system(play_command + " ../resources/ding.wav")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
def recordActivations(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)

    detectors = {}
    framebuffersFront = {}
    framebuffersBack = {}

    extactor_gain = 1.0
    recordBefore = 2.5  # Seconds before the activation
    recordAfter = 0.5  # Seconds after the activation

    activationCount = 0
    ensure_dir(saveDirectory)

    rbFrontSize = int(recordBefore * bytesPerSample * framesPerSecond)
    rbBackSize = int(recordAfter * bytesPerSample * framesPerSecond)

    for mpath, msens, mname in models:
        detector = AudioRecognition(libpath)
        detector.addModel(mpath, msens)
        detectors[mname] = detector
        framebuffersFront[mname] = bytearray()
        framebuffersBack[mname] = bytearray()

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)

            if (not frame):
                time.sleep(0.01)
                continue

            for mname in detectors:
                #Fill audio before the activation
                framebuffersFront[mname] = framebuffersFront[mname] + frame
                if (len(framebuffersFront[mname]) > rbFrontSize):
                    framebuffersFront[mname] = framebuffersFront[mname][
                        -rbFrontSize:]

            features = extractor.signalToMel(frame, extactor_gain)

            for mname in detectors:
                detector = detectors[mname]
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    #FIXME: Record after is currently ignored
                    #Fill audio after the activation
                    #while(len(framebuffersBack[mname]) < rbBackSize):
                    #	frame = audio_stream.read(bufsize*2,bufsize*2)
                    #	if(not frame):
                    #		time.sleep(0.01)
                    #		continue
                    #	framebuffersBack[mname] = framebuffersBack[mname] + frame

                    savePath = saveDirectory + "/activation_{}_{}_{}.wav".format(
                        mname, activationCount, time.time_ns())
                    save_wav(framebuffersFront[mname], savePath)
                    #save_wav(framebufferFront+framebufferBack,savePath)
                    print("Saving Activation to {}".format(savePath))
                    activationCount += 1

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Esempio n. 4
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    framerate = 16000
    model = Model("model")

    #Let's define a custom dictionary
    rec = KaldiRecognizer(
        model, framerate,
        '["oh one two three four five six seven eight nine zero", "[unk]"]')

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v3.0.35.premium', 0.85)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    command_started = False

    audio_stream.start()
    try:
        while (True):
            # Wakeword loop
            if (not command_started):
                frame = audio_stream.read(bufsize * 2, bufsize * 2)
                if (not frame):
                    time.sleep(0.01)
                    continue

                features = extractor.signalToMel(frame, extactor_gain)
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                    if (prediction == keywordIdAlexa):
                        print("Alexa detected:" + now)

                    os.system(play_command + " ../resources/ding.wav")
                    command_started = True
            # vosk loop
            else:
                frame = audio_stream.read(4000, 4000)
                if (not frame):
                    time.sleep(0.01)
                    continue

                if rec.AcceptWaveform(bytes(frame)):
                    print(rec.Result())
                    command_started = False
                    print(rec.FinalResult())

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)