def label_stream(): hotword_detected = False countdown = 0 audio_stream = AudiostreamSource() action_detector = AudioRecognition(libpath, action_graph, action_labels) hotword_detector = AudioRecognition(libpath, hotword_graph, hotword_labels) # #action_detector = hotword_detector hotword_detector.SetSensitivity(0.5) action_detector.SetSensitivity(0.55) bufsize = hotword_detector.GetInputDataSize() audio_stream.start() print("Audio Recognition Version: " + hotword_detector.GetVersionString()) try: while (True): frame = audio_stream.read(bufsize, bufsize) if (not frame): time.sleep(0.01) continue if (countdown > 0): countdown -= 1 if (countdown == 0): hotword_detected = False print("Stopped Listening") if (not hotword_detected): prediction = hotword_detector.RunDetection(frame) print(hotword_detector.GetPredictionLabel(prediction)) if (prediction and hotword_detector.GetPredictionLabel(prediction) == 'light'): hotword_detected = True countdown = 20 now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S") print("Listening") else: prediction = action_detector.RunDetection(frame) if (prediction): label = action_detector.GetPredictionLabel(prediction) if (label == "on"): print("Turning light on") if (label == "off"): print("Turning light off") countdown = 0 hotword_detected = False except KeyboardInterrupt: print("Terminating") audio_stream.stop() sys.exit(0)
def label_stream(labels, libpath, graph, sensitivity): audio_stream = AudiostreamSource() extractor = FeatureExtractor(libpath) extactor_gain = 1.0 detector = AudioRecognition(libpath, graph, labels) detector.SetSensitivity(sensitivity) bufsize = detector.GetInputDataSize() print("Audio Recognition Version: " + detector.GetVersionString()) audio_stream.start() try: while (True): frame = audio_stream.read(bufsize * 2, bufsize * 2) if (not frame): time.sleep(0.01) continue features = extractor.signal_to_mel(frame, extactor_gain) prediction = detector.RunDetection(features) if (prediction): now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S") print(detector.GetPredictionLabel(prediction) + " " + now) os.system(play_command + " ./resources/ding.wav") except KeyboardInterrupt: print("Terminating") audio_stream.stop() sys.exit(0)
def label_stream(labels, libpath, graph, sensitivity): audio_stream = AudiostreamSource() detector = AudioRecognition(libpath, graph, labels) detector.SetSensitivity(sensitivity) bufsize = detector.GetInputDataSize() play_command = "play -q" if platform.system() == "Darwin" else "aplay" print("Audio Recognition Version: " + detector.GetVersionString()) audio_stream.start() try: while (True): frame = audio_stream.read(bufsize, bufsize) if (not frame): time.sleep(0.01) continue prediction = detector.RunDetection(frame) if (prediction): now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S") print(detector.GetPredictionLabel(prediction) + " " + now) os.system(play_command + " ./ding.wav") except KeyboardInterrupt: print("Terminating") audio_stream.stop() sys.exit(0)
def label_stream(labels, libpath, verification_path, graph, sensitivity): last_frames = [] #Keyword spotting has 200ms frames, Verifiyer takes 2 seconds of audio max_last_frames = 10 audio_stream = AudiostreamSource() extractor = FeatureExtractor(libpath) detector = AudioRecognition(libpath, graph, labels) detector.SetSensitivity(sensitivity) verifiyer = SpeakerVerification(libpath, verification_path) bufsize = detector.GetInputDataSize() print("Bufsize: " + str(bufsize)) print("Audio Recognition Version: " + detector.GetVersionString()) print( "WARNING EXPERIMENTAL: The voice verification module can be use to verify if" ) print( "A command is issued by a certian speaker. It processes speech signals with a" ) print("two second length. This experimental version isn't very good yet.") print( "\n\n During enrolling a fingerprint of your voice is caputred. By default 5 samples" ) print( "Will be captured and averaged. The progam will output a similarity score between 0 and 1" ) print("A value of 1 means totally similar, 0 means different.") print("Currently a threshold of 0.95 seems good") print( "This module should not be run on a Pi Zero, as it uses excessive CPU") print( "Verification can also be helpful to reduce false positives of non speech signals" ) audio_stream.start() try: while (True): frame = audio_stream.read(bufsize * 2, bufsize * 2) if (not frame): time.sleep(0.01) continue features = extractor.signal_to_mel(frame) last_frames.append(features) if len(last_frames) > max_last_frames: last_frames.pop(0) prediction = detector.RunDetection(features) if (prediction): now = datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S") print(detector.GetPredictionLabel(prediction) + " " + now) os.system(play_command + " ./resources/ding.wav") detect_frame = bytearray() for element in last_frames: detect_frame.extend(element) print("Running Verification") features = verifiyer.VerifySpeaker(detect_frame) if (len(fingerprints) < enrolling): print("Enrolling") fingerprints.append(features) else: print("Completed") print(features) avg_fingerprint = get_averaged_fingerprint() if (avg_fingerprint): similarity_score = cosine_similarity( features, avg_fingerprint) print("Similarity: " + str(similarity_score)) print("Verification Done") except KeyboardInterrupt: print("Terminating") audio_stream.stop() sys.exit(0)