def main(): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' stdout = sys.stdout sys.stdout = sys.stderr parser = create_parser(usage) parser.add_argument('-v', '--version', action='version', version=__version__) parser.add_argument( 'chunk_size', type=int, nargs='?', default=-1, help='Number of bytes to read before making a prediction.' 'Higher values are less computationally expensive') parser.usage = parser.format_usage().strip().replace('usage: ', '') + ' < audio.wav' args = parser.parse_args() if sys.stdin.isatty(): parser.error('Please pipe audio via stdin using < audio.wav') listener = Listener(args.model_name, args.chunk_size) try: while True: conf = listener.update(sys.stdin.buffer) stdout.buffer.write((str(conf) + '\n').encode('ascii')) stdout.buffer.flush() except (EOFError, KeyboardInterrupt): pass
def run(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' stdout = sys.stdout sys.stdout = sys.stderr listener = Listener(self.args.model_name, self.args.chunk_size) try: while True: conf = listener.update(sys.stdin.buffer) stdout.buffer.write((str(conf) + '\n').encode('ascii')) stdout.buffer.flush() except (EOFError, KeyboardInterrupt): pass finally: sys.stdout = stdout
class TrainIncrementalScript(TrainScript): usage = Usage(''' Train a model to inhibit activation by marking false activations and retraining :-e --epochs int 1 Number of epochs to train before continuing evaluation :-ds --delay-samples int 10 Number of false activations to save before re-training :-c --chunk-size int 2048 Number of samples between testing the neural network :-r --random-data-folder str data/random Folder with properly encoded wav files of random audio that should not cause an activation :-th --threshold float 0.5 Network output to be considered activated ... ''') | TrainScript.usage def __init__(self, args): super().__init__(args) for i in ( join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated') ): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams( skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics, loss_bias=1.0 - self.args.sensitivity ) model = create_model(self.args.model, params) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner) self.listener.runner = KerasRunner(self.args.model) self.listener.runner.model = model self.samples_since_train = 0 @staticmethod def load_data(args: Any): data = TrainData.from_tags(args.tags_file, args.tags_folder) return data.load(True, not args.no_validation) def retrain(self): """Train for a session, pulling in any new data from the filesystem""" folder = TrainData.from_folder(self.args.folder) train_data, test_data = folder.load(True, not self.args.no_validation) train_data = TrainData.merge(train_data, self.sampled_data) test_data = TrainData.merge(test_data, self.test) train_inputs, train_outputs = train_data print() try: self.listener.runner.model.fit( train_inputs, train_outputs, self.args.batch_size, self.epoch + self.args.epochs, validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch ) finally: self.listener.runner.model.save(self.args.model) def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > self.args.threshold: self.samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and self.samples_since_train >= self.args.delay_samples and \ self.args.epochs > 0: self.samples_since_train = 0 self.retrain() def run(self): """ Begin reading through audio files, saving false activations and retraining when necessary """ for fn in glob_all(self.args.random_data_folder, '*.wav'): if fn in self.trained_fns: print('Skipping ' + fn + '...') continue print('Starting file ' + fn + '...') self.train_on_audio(fn) print('\r100% ') self.trained_fns.append(fn) save_trained_fns(self.trained_fns, self.args.model)
class ListenScript(BaseScript): usage = Usage(''' Run a model on microphone audio input :model str Either Keras (.net) or TensorFlow (.pb) model to run :-c --chunk-size int 2048 Samples between inferences :-l --trigger-level int 3 Number of activated chunks to cause an activation :-s --sensitivity float 0.5 Network output required to be considered activated :-b --basic-mode Report using . or ! rather than a visual representation :-d --save-dir str - Folder to save false positives :-p --save-prefix str - Prefix for saved filenames ''') def __init__(self, args): super().__init__(args) self.listener = Listener(args.model, args.chunk_size) self.audio_buffer = np.zeros(self.listener.pr.buffer_samples, dtype=float) self.engine = ListenerEngine(self.listener, args.chunk_size) self.engine.get_prediction = self.get_prediction self.runner = PreciseRunner(self.engine, args.trigger_level, sensitivity=args.sensitivity, on_activation=self.on_activation, on_prediction=self.on_prediction) self.session_id, self.chunk_num = '%09d' % randint(0, 999999999), 0 def on_activation(self): activate_notify() if self.args.save_dir: nm = join( self.args.save_dir, self.args.save_prefix + self.session_id + '.' + str(self.chunk_num) + '.wav') save_audio(nm, self.audio_buffer) print() print('Saved to ' + nm + '.') self.chunk_num += 1 def on_prediction(self, conf): if self.args.basic_mode: print('!' if conf > 0.7 else '.', end='', flush=True) else: max_width = 80 width = min(get_terminal_size()[0], max_width) units = int(round(conf * width)) bar = 'X' * units + '-' * (width - units) cutoff = round((1.0 - self.args.sensitivity) * width) print(bar[:cutoff] + bar[cutoff:].replace('X', 'x')) def get_prediction(self, chunk): audio = buffer_to_audio(chunk) self.audio_buffer = np.concatenate( (self.audio_buffer[len(audio):], audio)) return self.listener.update(chunk) def run(self): self.runner.start() Event().wait() # Wait forever
class IncrementalTrainer(Trainer): def __init__(self): super().__init__(create_parser(usage)) for i in (join(self.args.folder, 'not-wake-word', 'generated'), join(self.args.folder, 'test', 'not-wake-word', 'generated')): makedirs(i, exist_ok=True) self.trained_fns = load_trained_fns(self.args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) params = ModelParams(skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics, loss_bias=1.0 - self.args.sensitivity) model = create_model(self.args.model, params) self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner) self.listener.runner = KerasRunner(self.args.model) self.listener.runner.model = model self.samples_since_train = 0 @staticmethod def load_data(args: Any): data = TrainData.from_tags(args.tags_file, args.tags_folder) return data.load(True, not args.no_validation) def retrain(self): """Train for a session, pulling in any new data from the filesystem""" folder = TrainData.from_folder(self.args.folder) train_data, test_data = folder.load(True, not self.args.no_validation) train_data = TrainData.merge(train_data, self.sampled_data) test_data = TrainData.merge(test_data, self.test) train_inputs, train_outputs = train_data print() try: self.listener.runner.model.fit(train_inputs, train_outputs, self.args.batch_size, self.epoch + self.args.epochs, validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch) finally: self.listener.runner.model.save(self.args.model) def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > self.args.threshold: self.samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and self.samples_since_train >= self.args.delay_samples and \ self.args.epochs > 0: self.samples_since_train = 0 self.retrain() def run(self): """ Begin reading through audio files, saving false activations and retraining when necessary """ for fn in glob_all(self.args.random_data_folder, '*.wav'): if fn in self.trained_fns: print('Skipping ' + fn + '...') continue print('Starting file ' + fn + '...') self.train_on_audio(fn) print('\r100% ') self.trained_fns.append(fn) save_trained_fns(self.trained_fns, self.args.model)
class IncrementalTrainer: def __init__(self, args): self.args = args self.trained_fns = load_trained_fns(args.model) pr = inject_params(args.model) self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float) from keras.callbacks import ModelCheckpoint self.checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor, save_best_only=args.save_best) data = TrainData.from_tags(args.tags_file, args.tags_folder) self.tags_data = data.load(True, not args.no_validation) if not isfile(args.model): create_model(args.model, args.no_validation, args.extra_metrics).save(args.model) self.listener = Listener(args.model, args.chunk_size, runner_cls=KerasRunner) def retrain(self): """Train for a session, pulling in any new data from the filesystem""" folder = TrainData.from_folder(self.args.folder) train_data, test_data = folder.load(True, not self.args.no_validation) train_data = TrainData.merge(train_data, self.tags_data[0]) test_data = TrainData.merge(test_data, self.tags_data[1]) print() try: self.listener.runner.model.fit(*train_data, self.args.batch_size, self.args.epochs, validation_data=test_data, callbacks=[self.checkpoint]) finally: self.listener.runner.model.save(self.args.model) def train_on_audio(self, fn: str): """Run through a single audio file""" save_test = random() > 0.8 samples_since_train = 0 audio = load_audio(fn) num_chunks = len(audio) // self.args.chunk_size self.listener.clear() for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)): print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True) self.audio_buffer = np.concatenate( (self.audio_buffer[len(chunk):], chunk)) conf = self.listener.update(chunk) if conf > 0.5: samples_since_train += 1 name = splitext(basename(fn))[0] + '-' + str(i) + '.wav' name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word', 'generated', name) save_audio(name, self.audio_buffer) print() print('Saved to:', name) if not save_test and samples_since_train >= self.args.delay_samples and self.args.epochs > 0: samples_since_train = 0 self.retrain() def train_incremental(self): """ Begin reading through audio files, saving false activations and retraining when necessary """ for fn in glob_all(self.args.random_data_folder, '*.wav'): if fn in self.trained_fns: print('Skipping ' + fn + '...') continue print('Starting file ' + fn + '...') self.train_on_audio(fn) print('\r100% ') self.trained_fns.append(fn) save_trained_fns(self.trained_fns, self.args.model)
import keras from keras.models import Sequential from keras.models import load_model from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D from keras.utils import to_categorical import numpy as np import os import librosa from keras import backend as K from tensorflow.python.framework import graph_util from tensorflow.python.framework import graph_io from sonopy import mfcc_spec, chop_array, power_spec, filterbanks, safe_log, dct from precise.network_runner import Listener samples, sample_rate = librosa.load("/tmp/fixed.wav", sr=16000) listener = Listener("qqq.pb", -1) copy = samples[:] for i in (4096, 4096, 4096, 3532, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096): chunk = copy[:i] print(listener.update(chunk)) copy = copy[i:] # This code produces 21 outputs # bakerloo produces 75. Hmm.