def load_classifier(self, classifier_path): """ load the given compiled ELL classifier for use in processing subsequent audio input """ if classifier_path: self.classifier = classifier.AudioClassifier(classifier_path, self.categories, self.threshold) self.show_output("Classifier input size: {}, output size: {}".format( self.classifier.input_size, self.classifier.output_size)) if self.classifier_entry.get() != classifier_path: self.classifier_entry.delete(0, END) self.classifier_entry.insert(0, classifier_path) self.init_data()
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_files, threshold, sample_rate, output_speaker=False, auto_scale=False, reset=False): predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING) transform = featurizer.AudioTransform(featurizer_model, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") the_speaker = None if output_speaker: the_speaker = speaker.Speaker() results = [] if wav_files: if not os.path.isdir(wav_files): raise Exception("--wav_files {} dir not found".format(wav_files)) file_list = os.listdir(wav_files) file_list.sort() for filename in file_list: ext = os.path.splitext(filename)[1] if ext != ".wav": print("Skipping non-wav file: ", filename) else: reader = wav_reader.WavReader(sample_rate, CHANNELS, auto_scale) path = os.path.join(wav_files, filename) print("opening ", path) reader.open(path, transform.input_size, the_speaker) result = get_prediction(reader, transform, predictor, categories) results += [result] if reset: predictor.reset() else: reader = microphone.Microphone(True, True) reader.open(transform.input_size, sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") result = get_prediction(reader, transform, predictor, categories) results += [result] return results
def test_keyword_spotter(featurizer_model, classifier_model, categories, wav_file, threshold, sample_rate, output_speaker=False): predictor = classifier.AudioClassifier(classifier_model, categories, threshold, SMOOTHING) transform = featurizer.AudioTransform(featurizer_model, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") # set up inputs and outputs if wav_file: the_speaker = None if output_speaker: the_speaker = speaker.Speaker() reader = wav_reader.WavReader(sample_rate, CHANNELS) reader.open(wav_file, transform.input_size, the_speaker) else: reader = microphone.Microphone(True) reader.open(transform.input_size, sample_rate, CHANNELS) print("Please type 'x' and enter to terminate this app...") transform.open(reader) results = None try: while True: feature_data = transform.read() if feature_data is None: break else: prediction, probability, label = predictor.predict(feature_data) if probability is not None: if not results or results[1] < probability: results = (prediction, probability, label) percent = int(100 * probability) print("<<< DETECTED ({}) {}% '{}' >>>".format(prediction, percent, label)) except KeyboardInterrupt: pass transform.close() average_time = predictor.avg_time() + transform.avg_time() print("Average processing time: {}".format(average_time)) if results is None: raise Exception("test_keyword_spotter failed to find any predictions!") return tuple(list(results) + [average_time])
def RunTest(self, featurizer_model, classifier_model, list_file, dataset, categories, sample_rate, ignore_label): predictor = classifier.AudioClassifier(classifier_model, categories, [ignore_label], THRESHOLD, SMOOTHING) transform = featurizer.AudioTransform(featurizer_model, predictor.input_size) print("Evaluation with transform input size {}, output size {}".format( transform.input_size, transform.output_size)) print( "Evaluation with classifier input size {}, output size {}".format( predictor.input_size, predictor.output_size)) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") if list_file: with open(list_file, "r") as fp: testlist = [e.strip() for e in fp.readlines()] wav_dir = os.path.dirname(list_file) start = time.time() for name in testlist: # bed/28497c5b_nohash_0.wav expected = name.split('/')[0] wav_file = os.path.join(wav_dir, "audio", name) # open the wav file. reader = wav_reader.WavReader(sample_rate) reader.open(wav_file, transform.input_size, None) transform.open(reader) prediction = self.get_prediction(transform, predictor) self.process_prediction(prediction, expected) elif dataset: if type(dataset) is str: ds = np.load(dataset) features = ds['features'] labels = ds['labels'] else: features = dataset.features labels = dataset.label_names index = 0 start = time.time() for f in features: expected = labels[index] reader = FeatureReader(f, predictor.input_size) prediction = self.get_prediction(reader, predictor) self.process_prediction(prediction, expected) index += 1 end = time.time() seconds = end - start print("Test completed in {:.2f} seconds".format(seconds)) print("{} passed, {} failed, pass rate of {:.2f} %".format( self.passed, self.failed, self.rate * 100)) return self.rate
parser = argparse.ArgumentParser("test the classifier and featurizer against mic or wav file input") parser.add_argument("--wav_file", help="optional path to wav file to test", default=None) parser.add_argument("--featurizer", "-f", required=True, help="specify path to featurizer model (*.ell or compiled_folder/model_name)") parser.add_argument("--classifier", "-c", required=True, help="specify path to classifier model (*.ell or compiled_folder/model_name)") parser.add_argument("--categories", "-cat", help="specify path to categories file", required=True) parser.add_argument("--sample_rate", "-s", default=SAMPLE_RATE, type=int, help="Audio sample rate expected by classifier") parser.add_argument("--threshold", "-t", help="Classifier threshold (default 0.6)", default=THRESHOLD, type=float) parser.add_argument("--speaker", help="Output audio to the speaker.", action='store_true') args = parser.parse_args() predictor = classifier.AudioClassifier(args.classifier, args.categories, args.threshold, SMOOTHING) transform = featurizer.AudioTransform(args.featurizer, predictor.input_size) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") # set up inputs and outputs if args.wav_file: output_speaker = None if args.speaker: output_speaker = speaker.Speaker() reader = wav_reader.WavReader(args.sample_rate, CHANNELS) reader.open(args.wav_file, transform.input_size, output_speaker) else: reader = microphone.Microphone(True) reader.open(transform.input_size, args.sample_rate, CHANNELS)
def run_test(self, featurizer_model, classifier_model, list_file, max_tests, dataset, categories, sample_rate, auto_scale, output_file, algorithm="max", window_size=0): """ Run the test using the given input models (featurizer and classifier) which may or may not be compiled. The test set is defined by a list_file or a dataset. The list file lists .wav files which we will featurize using the given featurizer. The dataset contains pre-featurized data as created by make_dataset.py. The categories define the names of the keywords detected by the classifier and the sample_rate defines the audio sample rate in Hertz -- all input audio is resampled at this rate before featurization. """ predictor = classifier.AudioClassifier(classifier_model, categories, THRESHOLD, SMOOTHING) if window_size == 0: window_size = predictor.input_size transform = featurizer.AudioTransform(featurizer_model, window_size) if not self.silent: self.logger.info( "Evaluation with transform input size {}, output size {}". format(transform.input_size, transform.output_size)) self.logger.info( "Evaluation with classifier input size {}, output size {}". format(predictor.input_size, predictor.output_size)) if transform.using_map != predictor.using_map: raise Exception("cannot mix .ell and compiled models") results = [] if list_file: with open(list_file, "r") as fp: testlist = [e.strip() for e in fp.readlines()] wav_dir = os.path.dirname(list_file) if max_tests: testlist = np.random.choice(testlist, max_tests, replace=False) start = time.time() for name in testlist: # e.g. bed/28497c5b_nohash_0.wav expected = name.split('/')[0] wav_file = os.path.join(wav_dir, name) # open the wav file. reader = wav_reader.WavReader(sample_rate, 1, auto_scale) reader.open(wav_file, transform.input_size, None) transform.open(reader) prediction, confidence, _, elapsed = self.get_prediction( name, transform, predictor, algorithm) self.process_prediction(name, prediction, expected, confidence) results += [prediction] if self.best_time is None or elapsed < self.best_time: self.best_time = elapsed elif dataset: if type(dataset) is str: ds = np.load(dataset) features = ds['features'] labels = ds['labels'] else: features = dataset.features labels = dataset.label_names index = 0 start = time.time() for f in features: expected = labels[index] reader = FeatureReader(f, predictor.input_size) name = "row " + str(index) prediction, confidence, _, elapsed = self.get_prediction( name, reader, predictor) self.process_prediction(name, prediction, expected, confidence) if self.best_time is None or elapsed < self.best_time: self.best_time = elapsed index += 1 else: raise Exception("Missing list_file and dataset arguments") end = time.time() seconds = end - start self.logger.info("Saving '{}'".format(output_file)) with open(output_file, "w") as f: json.dump(results, f) self.logger.info("Test completed in {:.2f} seconds".format(seconds)) self.logger.info("{} passed, {} failed, pass rate of {:.2f} %".format( self.passed, self.failed, self.rate * 100)) self.logger.info("Best prediction time was {} seconds".format( self.best_time)) return self.rate, self.best_time