def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) train, test = data.load(args.use_train, not args.use_train, shuffle=False) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) stats = Stats(predictions, targets, filenames) print('Data:', data) if not args.no_filenames: fp_files = stats.calc_filenames(False, True, args.threshold) fn_files = stats.calc_filenames(False, False, args.threshold) print('=== False Positives ===') print('\n'.join(fp_files)) print() print('=== False Negatives ===') print('\n'.join(fn_files)) print() print(stats.counts_str(args.threshold)) print() print(stats.summary_str(args.threshold))
def main(): args = create_parser(usage).parse_args() args.tags_file = abspath(args.tags_file) if args.tags_file else None args.folder = abspath(args.folder) args.output_folder = abspath(args.output_folder) noise_min, noise_max = args.noise_ratio_low, args.noise_ratio_high data = TrainData.from_both(args.tags_file, args.folder, args.folder) noise_data = NoiseData(args.noise_folder) print('Data:', data) def translate_filename(source: str, n=0) -> str: assert source.startswith(args.folder) relative_file = source[len(args.folder):].strip(os.path.sep) if n > 0: base, ext = splitext(relative_file) relative_file = base + '.' + str(n) + ext return join(args.output_folder, relative_file) all_filenames = sum(data.train_files + data.test_files, []) for i, filename in enumerate(all_filenames): print('{0:.2%} \r'.format(i / (len(all_filenames) - 1)), end='', flush=True) audio = load_audio(filename) for n in range(args.inflation_factor): altered = noise_data.noised_audio(audio, noise_min + (noise_max - noise_min) * random()) output_filename = translate_filename(filename, n) makedirs(dirname(output_filename), exist_ok=True) save_audio(output_filename, altered) print('Done!') if args.tags_file and args.tags_file.startswith(args.folder): shutil.copy2(args.tags_file, translate_filename(args.tags_file))
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) save_params(args.model) data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir) print('Data:', data) (inputs, outputs), test_data = data.load(True, not args.no_validation) print('Inputs shape:', inputs.shape) print('Outputs shape:', outputs.shape) if test_data: print('Test inputs shape:', test_data[0].shape) print('Test outputs shape:', test_data[1].shape) if 0 in inputs.shape or 0 in outputs.shape: print('Not enough data to train') exit(1) model = create_model(args.model, args.no_validation, args.extra_metrics) model.summary() from keras.callbacks import ModelCheckpoint checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) try: model.fit(inputs, outputs, 5000, args.epochs, validation_data=test_data, callbacks=[checkpoint]) except KeyboardInterrupt: print() finally: model.save(args.model)
def main(): args = TrainData.parse_args(create_parser(usage)) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) data_files = data.train_files if args.use_train else data.test_files listener = PocketsphinxListener(args.key_phrase, args.dict_file, args.hmm_folder, args.threshold) print('Data:', data) stats = test_pocketsphinx(listener, data_files) show_stats(stats, not args.no_filenames)
def __init__(self): super().__init__(create_parser(usage)) self.bb = BlackBoxOptimizer(file=self.args.trials_name) if not self.test: data = TrainData.from_both(self.args.tags_file, self.args.tags_folder, self.args.folder) _, self.test = data.load(False, True) from keras.callbacks import ModelCheckpoint for i in list(self.callbacks): if isinstance(i, ModelCheckpoint): self.callbacks.remove(i)
def run(self): args = self.args if args.models: data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) print('Data:', data) filenames = sum( data.train_files if args.use_train else data.test_files, []) loader = CachedDataLoader( partial(data.load, args.use_train, not args.use_train, shuffle=False)) model_data = calc_stats(args.models, loader, args.use_train, filenames) else: model_data = { name: Stats.from_np_dict(data) for name, data in np.load(args.input_file) ['data'].item().items() } for name, stats in model_data.items(): print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(), stats.summary_str())) if args.output_file: np.savez(args.output_file, data={ name: stats.to_np_dict() for name, stats in model_data.items() }) else: plt = load_plt() decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center) thresholds = [ decoder.encode(i) for i in np.linspace(0.0, 1.0, args.resolution)[1:-1] ] for model_name, stats in model_data.items(): x = [stats.false_positives(i) for i in thresholds] y = [stats.false_negatives(i) for i in thresholds] plt.plot(x, y, marker='x', linestyle='-', label=model_name) if args.labels: for x, y, threshold in zip(x, y, thresholds): plt.annotate('{:.4f}'.format(threshold), (x, y)) plt.legend() plt.xlabel('False Positives') plt.ylabel('False Negatives') plt.show()
def main(): parser = create_parser(usage) parser.add_argument( 'models', nargs='*', help='List of model filenames in format: wake-word.yy-mm-dd.net') args = TrainData.parse_args(parser) if not (bool(args.pocketsphinx_dict) == bool(args.pocketsphinx_folder) == bool(args.pocketsphinx_wake_word)): parser.error('Must pass all or no Pocketsphinx arguments') data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) data_files = data.train_files if args.use_train else data.test_files print('Data:', data) metrics = {} if args.pocketsphinx_dict and args.pocketsphinx_folder and args.pocketsphinx_wake_word: if not isfile(args.pocketsphinx_dict): parser.error('No such file: ' + args.pocketsphinx_dict) if not isdir(args.pocketsphinx_folder): parser.error('No such folder: ' + args.pocketsphinx_folder) listener = PocketsphinxListener(args.pocketsphinx_wake_word, args.pocketsphinx_dict, args.pocketsphinx_folder, args.pocketsphinx_threshold) stats = test_pocketsphinx(listener, data_files) metrics[args.pocketsphinx_dict] = stats_to_dict(stats) for model_name in args.models: print('Calculating', model_name + '...') inject_params(model_name) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test predictions = Listener.find_runner(model_name)(model_name).predict( inputs) stats = Stats(predictions, targets, sum(data_files, [])) print('----', model_name, '----') print(stats.counts_str()) print() print(stats.summary_str()) print() metrics[model_name] = stats.to_dict(args.threshold) print('Writing to:', args.output) with open(args.output, 'w') as f: json.dump(metrics, f)
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = load_precise_model(args.model).predict(inputs) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) train, test = data.load(args.use_train, not args.use_train, shuffle=False) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
def load_data(args: Any) -> Tuple[tuple, tuple]: data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) print('Data:', data) train, test = data.load(True, not args.no_validation) print('Inputs shape:', train[0].shape) print('Outputs shape:', train[1].shape) if test: print('Test inputs shape:', test[0].shape) print('Test outputs shape:', test[1].shape) if 0 in train[0].shape or 0 in train[1].shape: print('Not enough data to train') exit(1) return train, test
def run(self): args = self.args data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) data_files = data.train_files if args.use_train else data.test_files print('Data:', data) metrics = {} if self.is_pocketsphinx: script = PocketsphinxTestScript.create( key_phrase=args.pocketsphinx_wake_word, dict_file=args.pocketsphinx_dict, hmm_folder=args.pocketsphinx_folder, threshold=args.pocketsphinx_threshold) ww_files, nww_files = data_files script.run_test(ww_files, 'Wake Word', 1.0) script.run_test(nww_files, 'Not Wake Word', 0.0) stats = script.get_stats() metrics[args.pocketsphinx_dict] = stats.to_dict(args.threshold) for model_name in args.models: print('Calculating', model_name + '...') inject_params(model_name) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test predictions = Listener.find_runner(model_name)(model_name).predict( inputs) stats = Stats(predictions, targets, sum(data_files, [])) print('----', model_name, '----') print(stats.counts_str()) print() print(stats.summary_str()) print() metrics[model_name] = stats.to_dict(args.threshold) print('Writing to:', args.output) with open(args.output, 'w') as f: json.dump(metrics, f)
def run(self): args = self.args data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) print('Data:', data) ww_files, nww_files = data.train_files if args.use_train else data.test_files self.run_test(ww_files, 'Wake Word', 1.0) self.run_test(nww_files, 'Not Wake Word', 0.0) stats = self.get_stats() if not self.args.no_filenames: fp_files = stats.calc_filenames(False, True, 0.5) fn_files = stats.calc_filenames(False, False, 0.5) print('=== False Positives ===') print('\n'.join(fp_files)) print() print('=== False Negatives ===') print('\n'.join(fn_files)) print() print(stats.counts_str(0.5)) print() print(stats.summary_str(0.5))
def __init__(self): parser = create_parser(usage) self.args = args = TrainData.parse_args(parser) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics, loss_bias=1.0 - args.sensitivity) self.model = create_model(args.model, params) self.listener = Listener('', args.chunk_size, runner_cls=lambda x: None) from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch') self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(a, b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ] self.data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) pos_files, neg_files = self.data.train_files self.neg_files_it = iter(cycle(neg_files)) self.pos_files_it = iter(cycle(pos_files))
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) #print(data) #print(args.use_train) train, test = data.load(args.use_train, not args.use_train) #print(train) #print(test) inputs, targets = train if args.use_train else test #print(inputs) filenames = sum(data.train_files if args.use_train else data.test_files, []) #for i in range(0, len(targets)): # print(filenames[i], "\t", targets[i]) print(args.model) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) #for i, filename in enumerate(filenames): #print(filename, "\t", predictions[i], targets[i]) #print(predictions[i]) #print (predictions) #print (len(predictions)) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
def main(): parser = create_parser(usage) parser.add_argument( 'models', nargs='*', help='Either Keras (.net) or TensorFlow (.pb) models to test') args = TrainData.parse_args(parser) if not args.models and not args.input_file and args.folder: args.input_file = args.folder if bool(args.models) == bool(args.input_file): parser.error('Please specify either a list of models or an input file') if not args.output_file: load_plt() # Error early if matplotlib not installed import numpy as np if args.models: data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) print('Data:', data) filenames = sum( data.train_files if args.use_train else data.test_files, []) loader = CachedDataLoader( partial(data.load, args.use_train, not args.use_train, shuffle=False)) model_data = calc_stats(args.models, loader, args.use_train, filenames) else: model_data = { name: Stats.from_np_dict(data) for name, data in np.load(args.input_file)['data'].item().items() } for name, stats in model_data.items(): print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(), stats.summary_str())) if args.output_file: np.savez(args.output_file, data={ name: stats.to_np_dict() for name, stats in model_data.items() }) else: plt = load_plt() decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center) thresholds = [ decoder.encode(i) for i in np.linspace(0.0, 1.0, args.resolution)[1:-1] ] for model_name, stats in model_data.items(): x = [stats.false_positives(i) for i in thresholds] y = [stats.false_negatives(i) for i in thresholds] plt.plot(x, y, marker='x', linestyle='-', label=model_name) if args.labels: for x, y, threshold in zip(x, y, thresholds): plt.annotate('{:.4f}'.format(threshold), (x, y)) plt.legend() plt.xlabel('False Positives') plt.ylabel('False Negatives') plt.show()