def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' stdout = sys.stdout sys.stdout = sys.stderr parser = create_parser(usage) parser.add_argument('-v', '--version', action='version', version=__version__) parser.add_argument( 'chunk_size', type=int, nargs='?', default=-1, help='Number of bytes to read before making a prediction.' 'Higher values are less computationally expensive') parser.usage = parser.format_usage().strip().replace('usage: ', '') + ' < audio.wav' args = parser.parse_args() if sys.stdin.isatty(): parser.error('Please pipe audio via stdin using < audio.wav') listener = Listener(args.model_name, args.chunk_size) try: while True: conf = listener.update(sys.stdin.buffer) stdout.buffer.write((str(conf) + '\n').encode('ascii')) stdout.buffer.flush() except (EOFError, KeyboardInterrupt): pass
def __init__(self, args): super().__init__(args) self.listener = Listener(args.model, args.chunk_size) self.audio_buffer = np.zeros(self.listener.pr.buffer_samples, dtype=float) self.engine = ListenerEngine(self.listener, args.chunk_size) self.engine.get_prediction = self.get_prediction self.runner = PreciseRunner(self.engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=self.on_activation, on_prediction=self.on_prediction) self.session_id, self.chunk_num = '%09d' % randint(0, 999999999), 0
def run(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' stdout = sys.stdout sys.stdout = sys.stderr listener = Listener(self.args.model_name, self.args.chunk_size) try: while True: conf = listener.update(sys.stdin.buffer) stdout.buffer.write((str(conf) + '\n').encode('ascii')) stdout.buffer.flush() except (EOFError, KeyboardInterrupt): pass finally: sys.stdout = stdout
def main(): args = create_parser(usage).parse_args() print('chunk_size: ', args.chunk_size) def on_activation(): activate_notify() if args.save_dir: global chunk_num nm = join(args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): print('!' if conf > 0.8 else '.', end='', flush=True) listener = Listener(args.model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.threshold, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) train, test = data.load(args.use_train, not args.use_train, shuffle=False) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) stats = Stats(predictions, targets, filenames) print('Data:', data) if not args.no_filenames: fp_files = stats.calc_filenames(False, True, args.threshold) fn_files = stats.calc_filenames(False, False, args.threshold) print('=== False Positives ===') print('\n'.join(fp_files)) print() print('=== False Negatives ===') print('\n'.join(fn_files)) print() print(stats.counts_str(args.threshold)) print() print(stats.summary_str(args.threshold))
def __init__(self): super().__init__(create_parser(usage)) for i in (join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated')): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) if not isfile(self.args.model): params = ModelParams(skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics) create_model(self.args.model, params).save(self.args.model) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner)
def __init__(self, args): self.args = args self.trained_fns = load_trained_fns(args.model) pr = inject_params(args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) from keras.callbacks import ModelCheckpoint self.checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) data = TrainData.from_tags(args.tags_file, args.tags_folder) self.tags_data = data.load(True, not args.no_validation) if not isfile(args.model): create_model(args.model, args.no_validation, args.extra_metrics).save(args.model) self.listener = Listener(args.model, args.chunk_size, runner_cls=KerasRunner)
def __init__(self, connection, address): self.address = address self.connection = connection # type: socket.socket self.stream = ReadWriteStream() self.runner = PreciseRunner( ListenerEngine(Listener(MODEL_NAME, CHUNK_SIZE), CHUNK_SIZE), 1, stream=self.stream, on_activation=self.on_activation, on_prediction=self.on_prediction ) self.runner.start()
def __init__(self, args): super().__init__(args) for i in ( join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated') ): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams( skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics, loss_bias=1.0 - self.args.sensitivity ) model = create_model(self.args.model, params) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner) self.listener.runner = KerasRunner(self.args.model) self.listener.runner.model = model self.samples_since_train = 0
def __init__(self): parser = create_parser(usage) self.args = args = TrainData.parse_args(parser) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics, loss_bias=1.0 - args.sensitivity) self.model = create_model(args.model, params) self.listener = Listener('', args.chunk_size, runner_cls=lambda x: None) from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch') self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(a, b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ] self.data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) pos_files, neg_files = self.data.train_files self.neg_files_it = iter(cycle(neg_files)) self.pos_files_it = iter(cycle(pos_files))
def main(): parser = create_parser(usage) parser.add_argument( 'models', nargs='*', help='List of model filenames in format: wake-word.yy-mm-dd.net') args = TrainData.parse_args(parser) if not (bool(args.pocketsphinx_dict) == bool(args.pocketsphinx_folder) == bool(args.pocketsphinx_wake_word)): parser.error('Must pass all or no Pocketsphinx arguments') data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) data_files = data.train_files if args.use_train else data.test_files print('Data:', data) metrics = {} if args.pocketsphinx_dict and args.pocketsphinx_folder and args.pocketsphinx_wake_word: if not isfile(args.pocketsphinx_dict): parser.error('No such file: ' + args.pocketsphinx_dict) if not isdir(args.pocketsphinx_folder): parser.error('No such folder: ' + args.pocketsphinx_folder) listener = PocketsphinxListener(args.pocketsphinx_wake_word, args.pocketsphinx_dict, args.pocketsphinx_folder, args.pocketsphinx_threshold) stats = test_pocketsphinx(listener, data_files) metrics[args.pocketsphinx_dict] = stats_to_dict(stats) for model_name in args.models: print('Calculating', model_name + '...') inject_params(model_name) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test predictions = Listener.find_runner(model_name)(model_name).predict( inputs) stats = Stats(predictions, targets, sum(data_files, [])) print('----', model_name, '----') print(stats.counts_str()) print() print(stats.summary_str()) print() metrics[model_name] = stats.to_dict(args.threshold) print('Writing to:', args.output) with open(args.output, 'w') as f: json.dump(metrics, f)
def calc_stats(model_files, loader, use_train, filenames): model_data = {} for model in model_files: train, test = loader.load_for(model) inputs, targets = train if use_train else test print('Running network...') predictions = Listener.find_runner(model)(model).predict(inputs) print(inputs.shape, targets.shape) print('Generating statistics...') stats = Stats(predictions, targets, filenames) print('\n' + stats.counts_str() + '\n\n' + stats.summary_str() + '\n') model_name = basename(splitext(model)[0]) model_data[model_name] = stats return model_data
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test filenames = sum(data.train_files if args.use_train else data.test_files, []) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
def main(): args = create_parser(usage).parse_args() sensitivity = 0.5 def on_activation(): activate_notify() if args.save_dir: global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): if args.light_mode: print('!' if conf > 0.7 else '.', end='', flush=True) else: max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x')) listener = Listener(args.model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.threshold, sensitivity=sensitivity, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def main(): args = create_parser(usage).parse_args() def on_activation(): activate_notify() if args.save_dir: global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): global detecting_flag # print('!' if conf > 0.5 else '.', end='', flush=True) if conf > 0.5: detecting_flag = True if conf < 0.5 and detecting_flag: print(colored("Yeah! I'm Here.", 'green')) detecting_flag = False sunshine_model = './ok-sunshine.net' listener = Listener(sunshine_model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.threshold, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def main(): rospy.init_node('wake_word_detection_node') print("node is up") def on_activation(): print("activate") playsound(res_path + "/attention.wav") try: requests.get('http://www.google.com') try: response = stop_speech_perception_service(True) print(response) except rospy.ServiceException as exc: print("Service did not process request: " + str(exc)) except requests.ConnectionError: print("no internet") speak_pub.publish( "I'm sorry. I am not connected to the internet now and cannot answer" ) set_emotion_service(state="SADNESS", timeout=5500, restore=True) def on_prediction(conf): print(".") listener = Listener(res_path + "/stevie_10_06.pb", chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, trigger_level=3, sensitivity=0.5, on_activation=on_activation, on_prediction=on_prediction) runner.start() print("spinning") rospy.spin()
def run(self): args = self.args data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) data_files = data.train_files if args.use_train else data.test_files print('Data:', data) metrics = {} if self.is_pocketsphinx: script = PocketsphinxTestScript.create( key_phrase=args.pocketsphinx_wake_word, dict_file=args.pocketsphinx_dict, hmm_folder=args.pocketsphinx_folder, threshold=args.pocketsphinx_threshold) ww_files, nww_files = data_files script.run_test(ww_files, 'Wake Word', 1.0) script.run_test(nww_files, 'Not Wake Word', 0.0) stats = script.get_stats() metrics[args.pocketsphinx_dict] = stats.to_dict(args.threshold) for model_name in args.models: print('Calculating', model_name + '...') inject_params(model_name) train, test = data.load(args.use_train, not args.use_train) inputs, targets = train if args.use_train else test predictions = Listener.find_runner(model_name)(model_name).predict( inputs) stats = Stats(predictions, targets, sum(data_files, [])) print('----', model_name, '----') print(stats.counts_str()) print() print(stats.summary_str()) print() metrics[model_name] = stats.to_dict(args.threshold) print('Writing to:', args.output) with open(args.output, 'w') as f: json.dump(metrics, f)
def main(): args = create_parser(usage).parse_args() def on_activation(): activate_notify() # TODO: trigger VMSE def on_prediction(conf): if args.basic_mode: print('!' if conf > 0.7 else '.', end='', flush=True) else: max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - args.sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x')) listener = Listener(args.model, args.chunk_size) audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) engine = ListenerEngine(listener, args.chunk_size) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation, on_prediction=on_prediction) runner.start() Event().wait() # Wait forever
def main(): args = TrainData.parse_args(create_parser(usage)) inject_params(args.model) data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) #print(data) #print(args.use_train) train, test = data.load(args.use_train, not args.use_train) #print(train) #print(test) inputs, targets = train if args.use_train else test #print(inputs) filenames = sum(data.train_files if args.use_train else data.test_files, []) #for i in range(0, len(targets)): # print(filenames[i], "\t", targets[i]) print(args.model) predictions = Listener.find_runner(args.model)(args.model).predict(inputs) #for i, filename in enumerate(filenames): #print(filename, "\t", predictions[i], targets[i]) #print(predictions[i]) #print (predictions) #print (len(predictions)) stats = calc_stats(filenames, targets, predictions) print('Data:', data) show_stats(stats, not args.no_filenames)
class IncrementalTrainer: def __init__(self, args): self.args = args self.trained_fns = load_trained_fns(args.model) pr = inject_params(args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) from keras.callbacks import ModelCheckpoint self.checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) data = TrainData.from_tags(args.tags_file, args.tags_folder) self.tags_data = data.load(True, not args.no_validation) if not isfile(args.model): create_model(args.model, args.no_validation, args.extra_metrics).save(args.model) self.listener = Listener(args.model, args.chunk_size, runner_cls=KerasRunner) def retrain(self): """Train for a session, pulling in any new data from the filesystem""" folder = TrainData.from_folder(self.args.folder) train_data, test_data = folder.load(True, not self.args.no_validation) train_data = TrainData.merge(train_data, self.tags_data[0]) test_data = TrainData.merge(test_data, self.tags_data[1]) print() try: self.listener.runner.model.fit(*train_data, self.args.batch_size, self.args.epochs, validation_data=test_data, callbacks=[self.checkpoint]) finally: self.listener.runner.model.save(self.args.model) def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 samples_since_train = 0 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > 0.5: samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and samples_since_train >= self.args.delay_samples and self.args.epochs > 0: samples_since_train = 0 self.retrain() def train_incremental(self): """ Begin reading through audio files, saving false activations and retraining when necessary """ for fn in glob_all(self.args.random_data_folder, '*.wav'): if fn in self.trained_fns: print('Skipping ' + fn + '...') continue print('Starting file ' + fn + '...') self.train_on_audio(fn) print('\r100% ') self.trained_fns.append(fn) save_trained_fns(self.trained_fns, self.args.model)
def __init__(self, args): super().__init__(args) inject_params(self.args.model) self.runner = Listener.find_runner(self.args.model)(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
def __init__(self): self.args = create_parser(usage).parse_args() inject_params(self.args.model) self.runner = Listener.find_runner(self.args.model)(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
class TrainIncrementalScript(TrainScript): usage = Usage(''' Train a model to inhibit activation by marking false activations and retraining :-e --epochs int 1 Number of epochs to train before continuing evaluation :-ds --delay-samples int 10 Number of false activations to save before re-training :-c --chunk-size int 2048 Number of samples between testing the neural network :-r --random-data-folder str data/random Folder with properly encoded wav files of random audio that should not cause an activation :-th --threshold float 0.5 Network output to be considered activated ... ''') | TrainScript.usage def __init__(self, args): super().__init__(args) for i in ( join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated') ): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams( skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics, loss_bias=1.0 - self.args.sensitivity ) model = create_model(self.args.model, params) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner) self.listener.runner = KerasRunner(self.args.model) self.listener.runner.model = model self.samples_since_train = 0 @staticmethod def load_data(args: Any): data = TrainData.from_tags(args.tags_file, args.tags_folder) return data.load(True, not args.no_validation) def retrain(self): """Train for a session, pulling in any new data from the filesystem""" folder = TrainData.from_folder(self.args.folder) train_data, test_data = folder.load(True, not self.args.no_validation) train_data = TrainData.merge(train_data, self.sampled_data) test_data = TrainData.merge(test_data, self.test) train_inputs, train_outputs = train_data print() try: self.listener.runner.model.fit( train_inputs, train_outputs, self.args.batch_size, self.epoch + self.args.epochs, validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch ) finally: self.listener.runner.model.save(self.args.model) def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > self.args.threshold: self.samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and self.samples_since_train >= self.args.delay_samples and \ self.args.epochs > 0: self.samples_since_train = 0 self.retrain() def run(self): """ Begin reading through audio files, saving false activations and retraining when necessary """ for fn in glob_all(self.args.random_data_folder, '*.wav'): if fn in self.trained_fns: print('Skipping ' + fn + '...') continue print('Starting file ' + fn + '...') self.train_on_audio(fn) print('\r100% ') self.trained_fns.append(fn) save_trained_fns(self.trained_fns, self.args.model)
def main(): args = create_parser(usage).parse_args() os.chdir(os.getcwd() + "/Precise") def on_activation_normal(): print("activated\n", flush=True) def on_activation_safe(): global chunk_num nm = join( args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav') save_audio(nm, audio_buffer) print() print('Saved to ' + nm + '.') chunk_num += 1 def on_prediction(conf): max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - args.sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x') + "\n", flush=True) def get_prediction(chunk): nonlocal audio_buffer audio = buffer_to_audio(chunk) audio_buffer = np.concatenate((audio_buffer[len(audio):], audio)) return listener.update(chunk) while True: line = sys.stdin.readline().rstrip() if ("start" in line): if (os.path.isfile("./model.pb")): listener = Listener("model.pb", args.chunk_size) else: listener = Listener("model.net", args.chunk_size) engine = ListenerEngine(listener, args.chunk_size) if ("visual" not in line): if (line == "start normal"): runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_normal) elif (line == "start safe"): runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_safe) else: if (line == "start normal visual"): audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_normal, on_prediction=on_prediction) elif (line == "start safe visual"): audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float) engine.get_prediction = get_prediction runner = PreciseRunner(engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=on_activation_safe, on_prediction=on_prediction) runner.start() elif (line == "stop"): runner.stop() elif (line == "running"): print(runner.running)
class ListenScript(BaseScript): usage = Usage(''' Run a model on microphone audio input :model str Either Keras (.net) or TensorFlow (.pb) model to run :-c --chunk-size int 2048 Samples between inferences :-l --trigger-level int 3 Number of activated chunks to cause an activation :-s --sensitivity float 0.5 Network output required to be considered activated :-b --basic-mode Report using . or ! rather than a visual representation :-d --save-dir str - Folder to save false positives :-p --save-prefix str - Prefix for saved filenames ''') def __init__(self, args): super().__init__(args) self.listener = Listener(args.model, args.chunk_size) self.audio_buffer = np.zeros(self.listener.pr.buffer_samples, dtype=float) self.engine = ListenerEngine(self.listener, args.chunk_size) self.engine.get_prediction = self.get_prediction self.runner = PreciseRunner(self.engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=self.on_activation, on_prediction=self.on_prediction) self.session_id, self.chunk_num = '%09d' % randint(0, 999999999), 0 def on_activation(self): activate_notify() if self.args.save_dir: nm = join( self.args.save_dir, self.args.save_prefix + self.session_id + '.' + str(self.chunk_num) + '.wav') save_audio(nm, self.audio_buffer) print() print('Saved to ' + nm + '.') self.chunk_num += 1 def on_prediction(self, conf): if self.args.basic_mode: print('!' if conf > 0.7 else '.', end='', flush=True) else: max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - self.args.sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x')) def get_prediction(self, chunk): audio = buffer_to_audio(chunk) self.audio_buffer = np.concatenate( (self.audio_buffer[len(audio):], audio)) return self.listener.update(chunk) def run(self): self.runner.start() Event().wait() # Wait forever
class IncrementalTrainer(Trainer): def __init__(self): super().__init__(create_parser(usage)) for i in (join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated')): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics, loss_bias=1.0 - self.args.sensitivity) model = create_model(self.args.model, params) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner) self.listener.runner = KerasRunner(self.args.model) self.listener.runner.model = model self.samples_since_train = 0 @staticmethod def load_data(args: Any): data = TrainData.from_tags(args.tags_file, args.tags_folder) return data.load(True, not args.no_validation) def retrain(self): """Train for a session, pulling in any new data from the filesystem""" folder = TrainData.from_folder(self.args.folder) train_data, test_data = folder.load(True, not self.args.no_validation) train_data = TrainData.merge(train_data, self.sampled_data) test_data = TrainData.merge(test_data, self.test) train_inputs, train_outputs = train_data print() try: self.listener.runner.model.fit(train_inputs, train_outputs, self.args.batch_size, self.epoch + self.args.epochs, validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch) finally: self.listener.runner.model.save(self.args.model) def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > self.args.threshold: self.samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and self.samples_since_train >= self.args.delay_samples and \ self.args.epochs > 0: self.samples_since_train = 0 self.retrain() def run(self): """ Begin reading through audio files, saving false activations and retraining when necessary """ for fn in glob_all(self.args.random_data_folder, '*.wav'): if fn in self.trained_fns: print('Skipping ' + fn + '...') continue print('Starting file ' + fn + '...') self.train_on_audio(fn) print('\r100% ') self.trained_fns.append(fn) save_trained_fns(self.trained_fns, self.args.model)
class TrainGeneratedScript(BaseScript): usage = Usage(''' Train a model on infinitely generated batches :model str Keras .net model file to load from and write to :-e --epochs int 100 Number of epochs to train on :-b --batch-size int 200 Number of samples in each batch :-t --steps-per-epoch int 100 Number of steps that are considered an epoch :-c --chunk-size int 2048 Number of audio samples between generating a training sample :-r --random-data-folder str data/random Folder with properly encoded wav files of random audio that should not cause an activation :-s --sensitivity float 0.2 Weighted loss bias. Higher values decrease increase positives :-sb --save-best Only save the model each epoch if its stats improve :-nv --no-validation Disable accuracy and validation calculation to improve speed during training :-mm --metric-monitor str loss Metric used to determine when to save :-em --extra-metrics Add extra metrics during training :-p --save-prob float 0.0 Probability of saving audio into debug/ww and debug/nww folders ... ''') | TrainData.usage """A trainer the runs on generated data by overlaying wakewords on background audio""" def __init__(self, args): super().__init__(args) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=args.no_validation, extra_metrics=args.extra_metrics, loss_bias=1.0 - args.sensitivity) self.model = create_model(args.model, params) self.listener = Listener('', args.chunk_size, runner_cls=lambda x: None) from keras.callbacks import ModelCheckpoint, TensorBoard checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch') self.epoch = epoch_fiti.read().read(0, int) def on_epoch_end(_a, _b): self.epoch += 1 epoch_fiti.write().write(self.epoch, str) self.model_base = splitext(self.args.model)[0] self.callbacks = [ checkpoint, TensorBoard(log_dir=self.model_base + '.logs', ), LambdaCallback(on_epoch_end=on_epoch_end) ] self.data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder) pos_files, neg_files = self.data.train_files self.neg_files_it = iter(cycle(neg_files)) self.pos_files_it = iter(cycle(pos_files)) def layer_with(self, sample: np.ndarray, value: int) -> np.ndarray: """Create an identical 2d array where the second row is filled with value""" b = np.full((2, len(sample)), value, dtype=float) b[0] = sample return b def generate_wakeword_pieces(self, volume): """Generates chunks of audio that represent the wakeword stream""" while True: target = 1 if random() > 0.5 else 0 it = self.pos_files_it if target else self.neg_files_it sample_file = next(it) yield self.layer_with( self.normalize_volume_to(load_audio(sample_file), volume), target) yield self.layer_with( np.zeros(int(pr.sample_rate * (0.5 + 2.0 * random()))), 0) def chunk_audio_pieces(self, pieces, chunk_size): """Convert chunks of audio into a series of equally sized pieces""" left_over = np.array([]) for piece in pieces: if left_over.size == 0: combined = piece else: combined = np.concatenate([left_over, piece], axis=-1) for chunk in chunk_audio(combined.T, chunk_size): yield chunk.T left_over = piece[-(len(piece) % chunk_size):] def calc_volume(self, sample: np.ndarray): """Find the RMS of the audio""" return sqrt(np.mean(np.square(sample))) def normalize_volume_to(self, sample, volume): """Normalize the volume to a certain RMS""" return volume * sample / self.calc_volume(sample) def merge(self, a, b, ratio): """Perform a weighted sum of a and b. ratio=1.0 means 100% of b and 0% of a""" return (1.0 - ratio) * a + ratio * b @staticmethod def max_run_length(x: np.ndarray, val: int): """Finds the maximum continuous length of the given value in the sequence""" if x.size == 0: return 0 else: y = np.array(x[1:] != x[:-1]) i = np.append(np.where(y), len(x) - 1) run_lengths = np.diff(np.append(-1, i)) run_length_values = x[i] return max([ rl for rl, v in zip(run_lengths, run_length_values) if v == val ], default=0) def vectors_from_fn(self, fn: str): """ Run through a single background audio file, overlaying with wake words. Generates (mfccs, target) where mfccs is a series of mfcc values and target is a single integer classification of the target network output for that chunk """ audio = load_audio(fn) audio_volume = self.calc_volume(audio) audio_volume *= 0.4 + 0.5 * random() audio = self.normalize_volume_to(audio, audio_volume) self.listener.clear() chunked_bg = chunk_audio(audio, self.args.chunk_size) chunked_ww = self.chunk_audio_pieces( self.generate_wakeword_pieces(audio_volume), self.args.chunk_size) for i, (chunk_bg, (chunk_ww, targets)) in enumerate(zip(chunked_bg, chunked_ww)): chunk = self.merge(chunk_bg, chunk_ww, 0.6) self.vals_buffer = np.concatenate( (self.vals_buffer[len(targets):], targets)) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) mfccs = self.listener.update_vectors(chunk) percent_overlapping = self.max_run_length( self.vals_buffer, 1) / len(self.vals_buffer) if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8: target = 1 elif percent_overlapping < 0.5: target = 0 else: continue if random() > 1.0 - self.args.save_prob: name = splitext(basename(fn))[0] wav_file = join('debug', 'ww' if target == 1 else 'nww', '{} - {}.wav'.format(name, i)) save_audio(wav_file, self.audio_buffer) yield mfccs, target @staticmethod def samples_to_batches(samples: Iterable, batch_size: int): """Chunk a series of network inputs and outputs into larger batches""" it = iter(samples) while True: with suppress(StopIteration): batch_in, batch_out = [], [] for i in range(batch_size): sample_in, sample_out = next(it) batch_in.append(sample_in) batch_out.append(sample_out) if not batch_in: raise StopIteration yield np.array(batch_in), np.array(batch_out) def generate_samples(self): """Generate training samples (network inputs and outputs)""" filenames = glob_all(self.args.random_data_folder, '*.wav') shuffle(filenames) while True: for fn in filenames: for x, y in self.vectors_from_fn(fn): yield x, y def run(self): """Train the model on randomly generated batches""" _, test_data = self.data.load(train=False, test=True) try: self.model.fit_generator(self.samples_to_batches( self.generate_samples(), self.args.batch_size), steps_per_epoch=self.args.steps_per_epoch, epochs=self.epoch + self.args.epochs, validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch) finally: self.model.save(self.args.model) save_params(self.args.model)
import keras from keras.models import Sequential from keras.models import load_model from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D from keras.utils import to_categorical import numpy as np import os import librosa from keras import backend as K from tensorflow.python.framework import graph_util from tensorflow.python.framework import graph_io from sonopy import mfcc_spec, chop_array, power_spec, filterbanks, safe_log, dct from precise.network_runner import Listener samples, sample_rate = librosa.load("/tmp/fixed.wav", sr=16000) listener = Listener("qqq.pb", -1) copy = samples[:] for i in (4096, 4096, 4096, 3532, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096): chunk = copy[:i] print(listener.update(chunk)) copy = copy[i:] # This code produces 21 outputs # bakerloo produces 75. Hmm.