def label_stream(labels, libpath, graph, sensitivity):

    audio_stream = AudiostreamSource()

    extractor = FeatureExtractor(libpath)
    extactor_gain = 1.0

    detector = AudioRecognition(libpath, graph, labels)
    detector.SetSensitivity(sensitivity)

    bufsize = detector.GetInputDataSize()

    print("Audio Recognition Version: " + detector.GetVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signal_to_mel(frame, extactor_gain)

            prediction = detector.RunDetection(features)

            if (prediction):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                print(detector.GetPredictionLabel(prediction) + " " + now)
                os.system(play_command + " ./resources/ding.wav")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
def detectKeywords(libpath):

	audio_stream = AudiostreamSource()
	extractor = FeatureExtractor(libpath)
	detector = AudioRecognition(libpath)

	extactor_gain = 1.0
	vad_threshold = 0.2

	keywordVAD = detector.addContinousModel('../../models/Hotword/vad_16.premium')
	bufsize = detector.getInputDataSize()

	print("Audio Recognition Version: " + detector.getVersionString())

	audio_stream.start()
	try:
		while(True):
			frame = audio_stream.read(bufsize*2,bufsize*2)
			if(not frame):
				time.sleep(0.01)
				continue

			features = extractor.signalToMel(frame,extactor_gain)
			_ = detector.runDetection(features)

			vadResult = detector.getContinousResult(keywordVAD)

			if(vadResult[1] > vad_threshold):
				print("Speech detected")

	except KeyboardInterrupt:
		print("Terminating")
		audio_stream.stop()
		sys.exit(0)
Ejemplo n.º 3
0
def label_stream(libpath):

	audio_stream = AudiostreamSource()

	extractor = FeatureExtractor(libpath)
	extactor_gain=16.0

	#FIXME: This is just used for bufsize
	detector = AudioRecognition(default_libpath)
	bufsize = detector.getInputDataSize()

	audio_stream.start()
	try:
		while(True):
			frame = audio_stream.read(bufsize*2,bufsize*2)
			if(not frame):
				time.sleep(0.01)
				continue

			features = extractor.signalToMel(frame,extactor_gain)
			send_features(features)

	except KeyboardInterrupt:
		print("Terminating")
		audio_stream.stop()
		sys.exit(0)
Ejemplo n.º 4
0
def label_stream(libpath):

    extractor = FeatureExtractor(libpath)
    extractor_gain = 1.0

    mDetector = MultiDetector(libpath, timeout=20)

    mDetector.add_detector(action_graph, action_labels, 0.8)
    mDetector.add_detector(hotword_graph, hotword_labels, 0.5)

    mDetector.add_command("marvin,on", light_on)
    mDetector.add_command("marvin,off", light_off)
    mDetector.add_command("stop", stop)

    bufsize = mDetector.GetInputDataSize()

    audio_stream = AudiostreamSource()

    audio_stream.start()

    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)

            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signal_to_mel(frame, extractor_gain)
            mDetector.run_frame(features)

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Ejemplo n.º 5
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdFirefox = detector.addModel(
        '../../models/Hotword/firefox_v1.4.5.premium', 0.6)
    keywordIdSheila = detector.addModel(
        '../../models/Hotword/sheila_v1.4.5.premium', 0.6)
    keywordIdMarvin = detector.addModel(
        '../../models/Hotword/marvin_v1.4.5.premium', 0.6)
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v1.4.5.premium', 0.6)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signalToMel(frame, extactor_gain)

            prediction = detector.runDetection(features)
            if (prediction != 0):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                if (prediction == keywordIdFirefox):
                    print("Firefox detected:" + now)
                elif (prediction == keywordIdSheila):
                    print("Sheila detected:" + now)
                elif (prediction == keywordIdMarvin):
                    print("Marvin detected:" + now)
                elif (prediction == keywordIdAlexa):
                    print("Alexa detected:" + now)

                os.system(play_command + " ../resources/ding.wav")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Ejemplo n.º 6
0
def label_stream(libpath):

    extractor = FeatureExtractor(libpath)
    extractor_gain = 1.0

    mDetector = MultiDetector(libpath, timeout=20)

    mDetector.add_word("../../models/Hotword/firefox_v1.2.0.premium",
                       "firefox", 0.5)
    mDetector.add_word("../../models/Command/light_v1.2.0.premium", "light",
                       0.8)
    mDetector.add_word("../../models/Command/off_v1.2.0.premium", "off", 0.8)
    mDetector.add_word("../../models/Command/stop_v1.2.0.premium", "stop", 0.8)

    mDetector.add_command("firefox,light", light_on)
    mDetector.add_command("firefox,off", light_off)
    mDetector.add_command("stop", stop)

    mDetector.add_detected_callback(feedback)
    mDetector.add_reset_history_callback(history_reset)

    bufsize = mDetector.GetInputDataSize()

    audio_stream = AudiostreamSource()

    audio_stream.start()

    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)

            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signalToMel(frame, extractor_gain)
            mDetector.run_frame(features)

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Ejemplo n.º 7
0
def label_stream(libpath):

	audio_stream = AudiostreamSource()

	extractor = FeatureExtractor(libpath)
	extactor_gain=16.0

	bufsize = 3200

	audio_stream.start()
	try:
		while(True):
			frame = audio_stream.read(bufsize*2,bufsize*2)
			if(not frame):
				time.sleep(0.01)
				continue

			features = extractor.signal_to_mel(frame,extactor_gain)
			send_features(features)

	except KeyboardInterrupt:
		print("Terminating")
		audio_stream.stop()
		sys.exit(0)
def recordActivations(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)

    detectors = {}
    framebuffersFront = {}
    framebuffersBack = {}

    extactor_gain = 1.0
    recordBefore = 2.5  # Seconds before the activation
    recordAfter = 0.5  # Seconds after the activation

    activationCount = 0
    ensure_dir(saveDirectory)

    rbFrontSize = int(recordBefore * bytesPerSample * framesPerSecond)
    rbBackSize = int(recordAfter * bytesPerSample * framesPerSecond)

    for mpath, msens, mname in models:
        detector = AudioRecognition(libpath)
        detector.addModel(mpath, msens)
        detectors[mname] = detector
        framebuffersFront[mname] = bytearray()
        framebuffersBack[mname] = bytearray()

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)

            if (not frame):
                time.sleep(0.01)
                continue

            for mname in detectors:
                #Fill audio before the activation
                framebuffersFront[mname] = framebuffersFront[mname] + frame
                if (len(framebuffersFront[mname]) > rbFrontSize):
                    framebuffersFront[mname] = framebuffersFront[mname][
                        -rbFrontSize:]

            features = extractor.signalToMel(frame, extactor_gain)

            for mname in detectors:
                detector = detectors[mname]
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    #FIXME: Record after is currently ignored
                    #Fill audio after the activation
                    #while(len(framebuffersBack[mname]) < rbBackSize):
                    #	frame = audio_stream.read(bufsize*2,bufsize*2)
                    #	if(not frame):
                    #		time.sleep(0.01)
                    #		continue
                    #	framebuffersBack[mname] = framebuffersBack[mname] + frame

                    savePath = saveDirectory + "/activation_{}_{}_{}.wav".format(
                        mname, activationCount, time.time_ns())
                    save_wav(framebuffersFront[mname], savePath)
                    #save_wav(framebufferFront+framebufferBack,savePath)
                    print("Saving Activation to {}".format(savePath))
                    activationCount += 1

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Ejemplo n.º 9
0
def detectKeywords(libpath):

    audio_stream = AudiostreamSource()
    extractor = FeatureExtractor(libpath)
    detector = AudioRecognition(libpath)

    framerate = 16000
    model = Model("model")

    #Let's define a custom dictionary
    rec = KaldiRecognizer(
        model, framerate,
        '["oh one two three four five six seven eight nine zero", "[unk]"]')

    extactor_gain = 1.0

    #Add one or more keyword models
    keywordIdAlexa = detector.addModel(
        '../../models/Hotword/alexa_v3.0.35.premium', 0.85)

    bufsize = detector.getInputDataSize()

    print("Audio Recognition Version: " + detector.getVersionString())

    command_started = False

    audio_stream.start()
    try:
        while (True):
            # Wakeword loop
            if (not command_started):
                frame = audio_stream.read(bufsize * 2, bufsize * 2)
                if (not frame):
                    time.sleep(0.01)
                    continue

                features = extractor.signalToMel(frame, extactor_gain)
                prediction = detector.runDetection(features)
                if (prediction != 0):
                    now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                    if (prediction == keywordIdAlexa):
                        print("Alexa detected:" + now)

                    os.system(play_command + " ../resources/ding.wav")
                    command_started = True
            # vosk loop
            else:
                frame = audio_stream.read(4000, 4000)
                if (not frame):
                    time.sleep(0.01)
                    continue

                if rec.AcceptWaveform(bytes(frame)):
                    print(rec.Result())
                    command_started = False
                    print(rec.FinalResult())

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
Ejemplo n.º 10
0
def label_stream(labels, libpath, verification_path, graph, sensitivity):
    last_frames = []

    #Keyword spotting has 200ms frames, Verifiyer takes 2 seconds of audio
    max_last_frames = 10

    audio_stream = AudiostreamSource()

    extractor = FeatureExtractor(libpath)

    detector = AudioRecognition(libpath, graph, labels)
    detector.SetSensitivity(sensitivity)

    verifiyer = SpeakerVerification(libpath, verification_path)

    bufsize = detector.GetInputDataSize()

    print("Bufsize: " + str(bufsize))

    print("Audio Recognition Version: " + detector.GetVersionString())

    print(
        "WARNING EXPERIMENTAL: The voice verification module can be use to verify if"
    )
    print(
        "A command is issued by a certian speaker. It processes speech signals with a"
    )
    print("two second length. This experimental version isn't very good yet.")

    print(
        "\n\n During enrolling a fingerprint of your voice is caputred. By default 5 samples"
    )
    print(
        "Will be captured and averaged. The progam will output a similarity score between 0 and 1"
    )
    print("A value of 1 means totally similar, 0 means different.")

    print("Currently a threshold of 0.95 seems good")

    print(
        "This module should not be run on a Pi Zero, as it uses excessive CPU")
    print(
        "Verification can also be helpful to reduce false positives of non speech signals"
    )

    audio_stream.start()
    try:
        while (True):
            frame = audio_stream.read(bufsize * 2, bufsize * 2)
            if (not frame):
                time.sleep(0.01)
                continue

            features = extractor.signal_to_mel(frame)

            last_frames.append(features)
            if len(last_frames) > max_last_frames:
                last_frames.pop(0)

            prediction = detector.RunDetection(features)

            if (prediction):
                now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S")
                print(detector.GetPredictionLabel(prediction) + " " + now)
                os.system(play_command + " ./resources/ding.wav")

                detect_frame = bytearray()
                for element in last_frames:
                    detect_frame.extend(element)

                print("Running Verification")

                features = verifiyer.VerifySpeaker(detect_frame)

                if (len(fingerprints) < enrolling):
                    print("Enrolling")
                    fingerprints.append(features)
                else:
                    print("Completed")

                print(features)

                avg_fingerprint = get_averaged_fingerprint()

                if (avg_fingerprint):
                    similarity_score = cosine_similarity(
                        features, avg_fingerprint)
                    print("Similarity: " + str(similarity_score))

                print("Verification Done")

    except KeyboardInterrupt:
        print("Terminating")
        audio_stream.stop()
        sys.exit(0)
	parser.add_argument(
		'--bad_folders', type=str, default='', help='Path to additional bad folders seperated by comma.')
	parser.add_argument(
		'--libpath', type=str, default='../lib/linux/libnyumaya.so', help='Path to nyumaya_library')

	parser.add_argument(
		'--use_all_files',  action='store_true', help='Wether to use all files or only files which have a hash matching test files')

	FLAGS, unparsed = parser.parse_known_args()

	print("include_only_test_files: " + str(FLAGS.use_all_files))

	sensitivities = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]

	detector = AudioRecognition(FLAGS.libpath,FLAGS.graph,FLAGS.labels)
	extractor = FeatureExtractor(FLAGS.libpath)
		
	addnoise = [False,True]
	results_clean = []
	results_noisy = []
	results_false = []
	print(FLAGS.graph + "\n")
	for noise in addnoise:

		for sensitivity in sensitivities:
			wrong_predictions, good_predictions,missed_predictions,samples = run_good_predictions(detector,extractor,FLAGS.good_folder,FLAGS.noise_folders,noise,sensitivity,FLAGS.use_all_files)

			if(wrong_predictions is not None):
				result = {}
				result["sensitivity"] = sensitivity
				result["accuracy"] = 1-(missed_predictions+wrong_predictions)/samples