def load_featurizer_model(self, featurizer_path): """ load the given compiled ELL featurizer for use in processing subsequent audio input """ if featurizer_path: self.featurizer = featurizer.AudioTransform(featurizer_path, 40) self.setup_spectrogram_image() self.vad = vad.VoiceActivityDetector(self.sample_rate, self.featurizer.output_size) self.show_output("Feature input size: {}, output size: {}".format( self.featurizer.input_size, self.featurizer.output_size)) self.init_data()
def create_vad(self): vad_options = { "tau_up": self.get_entry(self.tau_up), "tau_down": self.get_entry(self.tau_down), "threshold_up": self.get_entry(self.threshold_up), "threshold_down": self.get_entry(self.threshold_down), "large_input": self.get_entry(self.large_input), "gain_att": self.get_entry(self.gain_att), "level_threshold": self.get_entry(self.level_threshold) } model = make_vad.make_vad("vad.ell", self.sample_rate, self.featurizer.input_size, self.featurizer.output_size, vad_options) self.vad = vad.VoiceActivityDetector(model)
def __init__(self, featurizer_model=None, classifier_model=None, auto_scale=True, sample_rate=None, channels=None, input_device=None, categories=None, image_width=80, threshold=None, wav_file=None, clear=5, serial=None, vad_model=None, smoothing=None, ignore_list=None): """ Initialize AudioDemo object featurizer_model - the path to the ELL featurizer classifier_model - the path to the ELL classifier auto_scale - auto scale audio input to range [-1, 1] sample_rate - sample rate to featurizer is expecting channels - number of channels featurizer is expecting input_device - optional id of microphone to use categories - path to file containing category labels image_width - width of the spectrogram image threshold - ignore predictions that have confidence below this number (e.g. 0.5) wav_file - optional wav_file to use when you click Play serial - optional serial input, reading numbers from the given serial port. vad_model - optional ELL model containing VoiceActivityDetector smoothing - controls the size of the smoothing window (defaults to 0). ignore_list - list of category labels to ignore (like 'background' or 'silence') """ super().__init__() self.CLASSIFIER_MODEL_KEY = "classifier_model" self.FEATURIZER_MODEL_KEY = "featurizer_model" self.WAV_FILE_KEY = "wav_file" self.CATEGORY_FILE_KEY = "categories" self.get_settings_file_name() self.load_settings() self.reading_input = False self.featurizer_model = None self.serial_port = serial self.smoothing = smoothing self.ignore_list = ignore_list if featurizer_model: self.featurizer_model = featurizer_model self.settings[self.FEATURIZER_MODEL_KEY] = featurizer_model elif self.FEATURIZER_MODEL_KEY in self.settings: self.featurizer_model = self.settings[self.FEATURIZER_MODEL_KEY] self.classifier_model = None if classifier_model: self.classifier_model = classifier_model self.settings[self.CLASSIFIER_MODEL_KEY] = classifier_model elif self.CLASSIFIER_MODEL_KEY in self.settings: self.classifier_model = self.settings[self.CLASSIFIER_MODEL_KEY] self.wav_filename = wav_file if self.wav_filename is None and self.WAV_FILE_KEY in self.settings: self.wav_filename = self.settings[self.WAV_FILE_KEY] self.wav_file_list = None self.auto_scale = auto_scale self.sample_rate = sample_rate if sample_rate is not None else 16000 self.channels = channels if channels is not None else 1 self.input_device = input_device self.num_classifier_features = None self.vad = None self.vad_reset = (vad_model is not None) self.previous_vad = 0 self.vad_latch = 3 # only reset after 3 vad=0 signals to smooth vad signal a bit. if not categories and self.CATEGORY_FILE_KEY in self.settings: categories = self.settings[self.CATEGORY_FILE_KEY] self.categories = categories if categories: self.settings[self.CATEGORY_FILE_KEY] = categories self.save_settings() # in case we just changed it. self.audio_level = 0 self.min_level = 0 self.max_level = 0 self.threshold = threshold self.output_clear_time = int(clear * 1000) if clear else 5000 self.featurizer = None self.classifier = None self.wav_file = None self.speaker = None self.microphone = None self.animation = None self.show_classifier_output = True self.last_prediction = None self.probability = 0 # Threads self.read_input_thread = None self.lock = Lock() self.main_thread = get_ident() self.message_queue = [] # UI components self.max_spectrogram_width = image_width self.features_entry = None self.classifier_feature_data = None self.spectrogram_image_data = None self.init_ui() if self.featurizer_model: self.load_featurizer_model(os.path.abspath(self.featurizer_model)) else: self.show_output("Please specify and load a feature model") if smoothing == "vad": # smooth up to 1 second worth of predictions self.smoothing = int(self.sample_rate / self.featurizer.input_size) if vad_model is None: vad_model = make_vad.make_vad("vad.ell", self.sample_rate, self.featurizer.input_size, self.featurizer.output_size, None) if self.classifier_model: self.load_classifier(self.classifier_model) self.setup_spectrogram_image() else: self.show_output("Please specify and load a classifier model") if vad_model: self.vad = vad.VoiceActivityDetector(vad_model)