Esempio n. 1
0
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    stdout = sys.stdout
    sys.stdout = sys.stderr

    parser = create_parser(usage)
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version=__version__)
    parser.add_argument(
        'chunk_size',
        type=int,
        nargs='?',
        default=-1,
        help='Number of bytes to read before making a prediction.'
        'Higher values are less computationally expensive')
    parser.usage = parser.format_usage().strip().replace('usage: ',
                                                         '') + ' < audio.wav'
    args = parser.parse_args()

    if sys.stdin.isatty():
        parser.error('Please pipe audio via stdin using < audio.wav')

    listener = Listener(args.model_name, args.chunk_size)

    try:
        while True:
            conf = listener.update(sys.stdin.buffer)
            stdout.buffer.write((str(conf) + '\n').encode('ascii'))
            stdout.buffer.flush()
    except (EOFError, KeyboardInterrupt):
        pass
Esempio n. 2
0
    def run(self):
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        stdout = sys.stdout
        sys.stdout = sys.stderr
        listener = Listener(self.args.model_name, self.args.chunk_size)

        try:
            while True:
                conf = listener.update(sys.stdin.buffer)
                stdout.buffer.write((str(conf) + '\n').encode('ascii'))
                stdout.buffer.flush()
        except (EOFError, KeyboardInterrupt):
            pass
        finally:
            sys.stdout = stdout
class TrainIncrementalScript(TrainScript):
    usage = Usage('''
        Train a model to inhibit activation by
        marking false activations and retraining

        :-e --epochs int 1
            Number of epochs to train before continuing evaluation

        :-ds --delay-samples int 10
            Number of false activations to save before re-training

        :-c --chunk-size int 2048
            Number of samples between testing the neural network

        :-r --random-data-folder str data/random
            Folder with properly encoded wav files of
            random audio that should not cause an activation

        :-th --threshold float 0.5
            Network output to be considered activated

        ...
    ''') | TrainScript.usage

    def __init__(self, args):
        super().__init__(args)

        for i in (
                join(self.args.folder, 'not-wake-word', 'generated'),
                join(self.args.folder, 'test', 'not-wake-word', 'generated')
        ):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(
            skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics,
            loss_bias=1.0 - self.args.sensitivity
        )
        model = create_model(self.args.model, params)
        self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner)
        self.listener.runner = KerasRunner(self.args.model)
        self.listener.runner.model = model
        self.samples_since_train = 0

    @staticmethod
    def load_data(args: Any):
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        return data.load(True, not args.no_validation)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.sampled_data)
        test_data = TrainData.merge(test_data, self.test)
        train_inputs, train_outputs = train_data
        print()
        try:
            self.listener.runner.model.fit(
                train_inputs, train_outputs, self.args.batch_size, self.epoch + self.args.epochs,
                validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch
            )
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word',
                            'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()

    def run(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
Esempio n. 4
0
class ListenScript(BaseScript):
    usage = Usage('''
        Run a model on microphone audio input

        :model str
            Either Keras (.net) or TensorFlow (.pb) model to run

        :-c --chunk-size int 2048
            Samples between inferences

        :-l --trigger-level int 3
            Number of activated chunks to cause an activation

        :-s --sensitivity float 0.5
            Network output required to be considered activated

        :-b --basic-mode
            Report using . or ! rather than a visual representation

        :-d --save-dir str -
            Folder to save false positives

        :-p --save-prefix str -
            Prefix for saved filenames
    ''')

    def __init__(self, args):
        super().__init__(args)
        self.listener = Listener(args.model, args.chunk_size)
        self.audio_buffer = np.zeros(self.listener.pr.buffer_samples,
                                     dtype=float)
        self.engine = ListenerEngine(self.listener, args.chunk_size)
        self.engine.get_prediction = self.get_prediction
        self.runner = PreciseRunner(self.engine,
                                    args.trigger_level,
                                    sensitivity=args.sensitivity,
                                    on_activation=self.on_activation,
                                    on_prediction=self.on_prediction)
        self.session_id, self.chunk_num = '%09d' % randint(0, 999999999), 0

    def on_activation(self):
        activate_notify()

        if self.args.save_dir:
            nm = join(
                self.args.save_dir, self.args.save_prefix + self.session_id +
                '.' + str(self.chunk_num) + '.wav')
            save_audio(nm, self.audio_buffer)
            print()
            print('Saved to ' + nm + '.')
            self.chunk_num += 1

    def on_prediction(self, conf):
        if self.args.basic_mode:
            print('!' if conf > 0.7 else '.', end='', flush=True)
        else:
            max_width = 80
            width = min(get_terminal_size()[0], max_width)
            units = int(round(conf * width))
            bar = 'X' * units + '-' * (width - units)
            cutoff = round((1.0 - self.args.sensitivity) * width)
            print(bar[:cutoff] + bar[cutoff:].replace('X', 'x'))

    def get_prediction(self, chunk):
        audio = buffer_to_audio(chunk)
        self.audio_buffer = np.concatenate(
            (self.audio_buffer[len(audio):], audio))
        return self.listener.update(chunk)

    def run(self):
        self.runner.start()
        Event().wait()  # Wait forever
class IncrementalTrainer(Trainer):
    def __init__(self):
        super().__init__(create_parser(usage))

        for i in (join(self.args.folder, 'not-wake-word', 'generated'),
                  join(self.args.folder, 'test', 'not-wake-word',
                       'generated')):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=self.args.no_validation,
                             extra_metrics=self.args.extra_metrics,
                             loss_bias=1.0 - self.args.sensitivity)
        model = create_model(self.args.model, params)
        self.listener = Listener(self.args.model,
                                 self.args.chunk_size,
                                 runner_cls=KerasRunner)
        self.listener.runner = KerasRunner(self.args.model)
        self.listener.runner.model = model
        self.samples_since_train = 0

    @staticmethod
    def load_data(args: Any):
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        return data.load(True, not args.no_validation)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.sampled_data)
        test_data = TrainData.merge(test_data, self.test)
        train_inputs, train_outputs = train_data
        print()
        try:
            self.listener.runner.model.fit(train_inputs,
                                           train_outputs,
                                           self.args.batch_size,
                                           self.epoch + self.args.epochs,
                                           validation_data=test_data,
                                           callbacks=self.callbacks,
                                           initial_epoch=self.epoch)
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '',
                            'not-wake-word', 'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()

    def run(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
class IncrementalTrainer:
    def __init__(self, args):
        self.args = args
        self.trained_fns = load_trained_fns(args.model)
        pr = inject_params(args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        from keras.callbacks import ModelCheckpoint
        self.checkpoint = ModelCheckpoint(args.model,
                                          monitor=args.metric_monitor,
                                          save_best_only=args.save_best)
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        self.tags_data = data.load(True, not args.no_validation)

        if not isfile(args.model):
            create_model(args.model, args.no_validation,
                         args.extra_metrics).save(args.model)
        self.listener = Listener(args.model,
                                 args.chunk_size,
                                 runner_cls=KerasRunner)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.tags_data[0])
        test_data = TrainData.merge(test_data, self.tags_data[1])
        print()
        try:
            self.listener.runner.model.fit(*train_data,
                                           self.args.batch_size,
                                           self.args.epochs,
                                           validation_data=test_data,
                                           callbacks=[self.checkpoint])
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        samples_since_train = 0
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > 0.5:
                samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '',
                            'not-wake-word', 'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and samples_since_train >= self.args.delay_samples and self.args.epochs > 0:
                samples_since_train = 0
                self.retrain()

    def train_incremental(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
Esempio n. 7
0
import keras
from keras.models import Sequential
from keras.models import load_model

from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical
import numpy as np
import os
import librosa
from keras import backend as K
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
from sonopy import mfcc_spec, chop_array, power_spec, filterbanks, safe_log, dct
from precise.network_runner import Listener

samples, sample_rate = librosa.load("/tmp/fixed.wav", sr=16000)

listener = Listener("qqq.pb", -1)
copy = samples[:]
for i in (4096, 4096, 4096, 3532, 4096, 4096, 4096, 4096, 4096, 4096, 4096,
          4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096):
    chunk = copy[:i]
    print(listener.update(chunk))
    copy = copy[i:]

# This code produces 21 outputs
# bakerloo produces 75. Hmm.