def __init__(self, model_name: str, chunk_size: int = -1, runner_cls: type = None): self.window_audio = np.array([]) self.pr = inject_params(model_name) self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc)) self.chunk_size = chunk_size runner_cls = runner_cls or self.find_runner(model_name) self.runner = runner_cls(model_name) self.threshold_decoder = ThresholdDecoder(self.pr.threshold_config, pr.threshold_center)
def run(self): args = self.args if args.models: data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) print('Data:', data) filenames = sum( data.train_files if args.use_train else data.test_files, []) loader = CachedDataLoader( partial(data.load, args.use_train, not args.use_train, shuffle=False)) model_data = calc_stats(args.models, loader, args.use_train, filenames) else: model_data = { name: Stats.from_np_dict(data) for name, data in np.load(args.input_file) ['data'].item().items() } for name, stats in model_data.items(): print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(), stats.summary_str())) if args.output_file: np.savez(args.output_file, data={ name: stats.to_np_dict() for name, stats in model_data.items() }) else: plt = load_plt() decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center) thresholds = [ decoder.encode(i) for i in np.linspace(0.0, 1.0, args.resolution)[1:-1] ] for model_name, stats in model_data.items(): x = [stats.false_positives(i) for i in thresholds] y = [stats.false_negatives(i) for i in thresholds] plt.plot(x, y, marker='x', linestyle='-', label=model_name) if args.labels: for x, y, threshold in zip(x, y, thresholds): plt.annotate('{:.4f}'.format(threshold), (x, y)) plt.legend() plt.xlabel('False Positives') plt.ylabel('False Negatives') plt.show()
class Listener: """Listener that preprocesses audio into MFCC vectors and executes neural networks""" def __init__(self, model_name: str, chunk_size: int = -1, runner_cls: type = None): self.window_audio = np.array([]) self.pr = inject_params(model_name) self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc)) self.chunk_size = chunk_size runner_cls = runner_cls or self.find_runner(model_name) self.runner = runner_cls(model_name) self.threshold_decoder = ThresholdDecoder(self.pr.threshold_config, pr.threshold_center) @staticmethod def find_runner(model_name: str) -> Type[Runner]: runners = {'.net': KerasRunner, '.pb': TensorFlowRunner} ext = splitext(model_name)[-1] if ext not in runners: raise ValueError('File extension of ' + model_name + ' must be: ' + str(list(runners))) return runners[ext] def clear(self): self.window_audio = np.array([]) self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc)) def update_vectors( self, stream: Union[BinaryIO, np.ndarray, bytes]) -> np.ndarray: if isinstance(stream, np.ndarray): buffer_audio = stream else: if isinstance(stream, (bytes, bytearray)): chunk = stream else: chunk = stream.read(self.chunk_size) if len(chunk) == 0: raise EOFError buffer_audio = buffer_to_audio(chunk) self.window_audio = np.concatenate((self.window_audio, buffer_audio)) if len(self.window_audio) >= self.pr.window_samples: new_features = vectorize_raw(self.window_audio) self.window_audio = self.window_audio[len(new_features) * self.pr.hop_samples:] if len(new_features) > len(self.mfccs): new_features = new_features[-len(self.mfccs):] self.mfccs = np.concatenate( (self.mfccs[len(new_features):], new_features)) return self.mfccs def update(self, stream: Union[BinaryIO, np.ndarray, bytes]) -> float: mfccs = self.update_vectors(stream) if self.pr.use_delta: mfccs = add_deltas(mfccs) raw_output = self.runner.run(mfccs) return self.threshold_decoder.decode(raw_output)
def main(): parser = create_parser(usage) parser.add_argument( 'models', nargs='*', help='Either Keras (.net) or TensorFlow (.pb) models to test') args = TrainData.parse_args(parser) if not args.models and not args.input_file and args.folder: args.input_file = args.folder if bool(args.models) == bool(args.input_file): parser.error('Please specify either a list of models or an input file') if not args.output_file: load_plt() # Error early if matplotlib not installed import numpy as np if args.models: data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) print('Data:', data) filenames = sum( data.train_files if args.use_train else data.test_files, []) loader = CachedDataLoader( partial(data.load, args.use_train, not args.use_train, shuffle=False)) model_data = calc_stats(args.models, loader, args.use_train, filenames) else: model_data = { name: Stats.from_np_dict(data) for name, data in np.load(args.input_file)['data'].item().items() } for name, stats in model_data.items(): print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(), stats.summary_str())) if args.output_file: np.savez(args.output_file, data={ name: stats.to_np_dict() for name, stats in model_data.items() }) else: plt = load_plt() decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center) thresholds = [ decoder.encode(i) for i in np.linspace(0.0, 1.0, args.resolution)[1:-1] ] for model_name, stats in model_data.items(): x = [stats.false_positives(i) for i in thresholds] y = [stats.false_negatives(i) for i in thresholds] plt.plot(x, y, marker='x', linestyle='-', label=model_name) if args.labels: for x, y, threshold in zip(x, y, thresholds): plt.annotate('{:.4f}'.format(threshold), (x, y)) plt.legend() plt.xlabel('False Positives') plt.ylabel('False Negatives') plt.show()