def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) save_params(args.model) data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir) print('Data:', data) (inputs, outputs), test_data = data.load(True, not args.no_validation) print('Inputs shape:', inputs.shape) print('Outputs shape:', outputs.shape) if test_data: print('Test inputs shape:', test_data[0].shape) print('Test outputs shape:', test_data[1].shape) if 0 in inputs.shape or 0 in outputs.shape: print('Not enough data to train') exit(1) model = create_model(args.model, args.no_validation, args.extra_metrics) model.summary() from keras.callbacks import ModelCheckpoint checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) try: model.fit(inputs, outputs, 5000, args.epochs, validation_data=test_data, callbacks=[checkpoint]) except KeyboardInterrupt: print() finally: model.save(args.model)
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) train, test = data.load(args.use_train, not args.use_train, shuffle=False) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) stats = Stats(predictions, targets, filenames) print('Data:', data) if not args.no_filenames: fp_files = stats.calc_filenames(False, True, args.threshold) fn_files = stats.calc_filenames(False, False, args.threshold) print('=== False Positives ===') print('\n'.join(fp_files)) print() print('=== False Negatives ===') print('\n'.join(fn_files)) print() print(stats.counts_str(args.threshold)) print() print(stats.summary_str(args.threshold))
def __init__(self, parser=None): parser = parser or ArgumentParser() add_to_parser(parser, self.usage, True) args = TrainData.parse_args(parser) self.args = args = self.process_args(args) or args if args.invert_samples and not args.samples_file: parser.error( 'You must specify --samples-file when using --invert-samples') if args.samples_file and not isfile(args.samples_file): parser.error('No such file: ' + (args.invert_samples or args.samples_file)) if not 0.0 <= args.sensitivity <= 1.0: parser.error('sensitivity must be between 0.0 and 1.0') output_folder = os.path.join(args.folder, splitext(args.model)[0]) if not os.path.exists(output_folder): print('Creating output folder:', output_folder) os.makedirs(output_folder) args.model = os.path.join(output_folder, args.model) inject_params(args.model) save_params(args.model) self.train, self.test = self.load_data(self.args) set_loss_bias(1.0 - args.sensitivity) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics) self.model = create_model(args.model, params) self.model.summary() from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_file = splitext(args.model)[0] epoch_file = os.path.join(epoch_file + '.epoch') epoch_fiti = Fitipy(epoch_file) self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(a, b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] if args.samples_file: self.samples, self.hash_to_ind = self.load_sample_data( args.samples_file, self.train) else: self.samples = set() self.hash_to_ind = {} self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ]
def main(): args = TrainData.parse_args(create_parser(usage)) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) data_files = data.train_files if args.use_train else data.test_files listener = PocketsphinxListener(args.key_phrase, args.dict_file, args.hmm_folder, args.threshold) print('Data:', data) stats = test_pocketsphinx(listener, data_files) show_stats(stats, not args.no_filenames)
def main(): args = TrainData.parse_args(create_parser(usage)) for i in (join(args.folder, 'not-wake-word', 'generated'), join(args.folder, 'test', 'not-wake-word', 'generated')): makedirs(i, exist_ok=True) trainer = IncrementalTrainer(args) try: trainer.train_incremental() except KeyboardInterrupt: print()
def main(): parser = create_parser(usage) parser.add_argument( 'models', nargs='*', help='List of model filenames in format: wake-word.yy-mm-dd.net') args = TrainData.parse_args(parser) if not (bool(args.pocketsphinx_dict) == bool(args.pocketsphinx_folder) == bool(args.pocketsphinx_wake_word)): parser.error('Must pass all or no Pocketsphinx arguments') data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) data_files = data.train_files if args.use_train else data.test_files print('Data:', data) metrics = {} if args.pocketsphinx_dict and args.pocketsphinx_folder and args.pocketsphinx_wake_word: if not isfile(args.pocketsphinx_dict): parser.error('No such file: ' + args.pocketsphinx_dict) if not isdir(args.pocketsphinx_folder): parser.error('No such folder: ' + args.pocketsphinx_folder) listener = PocketsphinxListener(args.pocketsphinx_wake_word, args.pocketsphinx_dict, args.pocketsphinx_folder, args.pocketsphinx_threshold) stats = test_pocketsphinx(listener, data_files) metrics[args.pocketsphinx_dict] = stats_to_dict(stats) for model_name in args.models: print('Calculating', model_name + '...') inject_params(model_name) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test predictions = Listener.find_runner(model_name)(model_name).predict( inputs) stats = Stats(predictions, targets, sum(data_files, [])) print('----', model_name, '----') print(stats.counts_str()) print() print(stats.summary_str()) print() metrics[model_name] = stats.to_dict(args.threshold) print('Writing to:', args.output) with open(args.output, 'w') as f: json.dump(metrics, f)
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = load_precise_model(args.model).predict(inputs) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) train, test = data.load(args.use_train, not args.use_train, shuffle=False) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
def __init__(self): parser = create_parser(usage) self.args = args = TrainData.parse_args(parser) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics, loss_bias=1.0 - args.sensitivity) self.model = create_model(args.model, params) self.listener = Listener('', args.chunk_size, runner_cls=lambda x: None) from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch') self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(a, b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ] self.data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) pos_files, neg_files = self.data.train_files self.neg_files_it = iter(cycle(neg_files)) self.pos_files_it = iter(cycle(pos_files))
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) #print(data) #print(args.use_train) train, test = data.load(args.use_train, not args.use_train) #print(train) #print(test) inputs, targets = train if args.use_train else test #print(inputs) filenames = sum(data.train_files if args.use_train else data.test_files, []) #for i in range(0, len(targets)): # print(filenames[i], "\t", targets[i]) print(args.model) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) #for i, filename in enumerate(filenames): #print(filename, "\t", predictions[i], targets[i]) #print(predictions[i]) #print (predictions) #print (len(predictions)) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
def main(): parser = create_parser(usage) parser.add_argument( 'models', nargs='*', help='Either Keras (.net) or TensorFlow (.pb) models to test') args = TrainData.parse_args(parser) if not args.models and not args.input_file and args.folder: args.input_file = args.folder if bool(args.models) == bool(args.input_file): parser.error('Please specify either a list of models or an input file') if not args.output_file: load_plt() # Error early if matplotlib not installed import numpy as np if args.models: data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) print('Data:', data) filenames = sum( data.train_files if args.use_train else data.test_files, []) loader = CachedDataLoader( partial(data.load, args.use_train, not args.use_train, shuffle=False)) model_data = calc_stats(args.models, loader, args.use_train, filenames) else: model_data = { name: Stats.from_np_dict(data) for name, data in np.load(args.input_file)['data'].item().items() } for name, stats in model_data.items(): print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(), stats.summary_str())) if args.output_file: np.savez(args.output_file, data={ name: stats.to_np_dict() for name, stats in model_data.items() }) else: plt = load_plt() decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center) thresholds = [ decoder.encode(i) for i in np.linspace(0.0, 1.0, args.resolution)[1:-1] ] for model_name, stats in model_data.items(): x = [stats.false_positives(i) for i in thresholds] y = [stats.false_negatives(i) for i in thresholds] plt.plot(x, y, marker='x', linestyle='-', label=model_name) if args.labels: for x, y, threshold in zip(x, y, thresholds): plt.annotate('{:.4f}'.format(threshold), (x, y)) plt.legend() plt.xlabel('False Positives') plt.ylabel('False Negatives') plt.show()