Esempio n. 1
0
def main():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    stdout = sys.stdout
    sys.stdout = sys.stderr

    parser = create_parser(usage)
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version=__version__)
    parser.add_argument(
        'chunk_size',
        type=int,
        nargs='?',
        default=-1,
        help='Number of bytes to read before making a prediction.'
        'Higher values are less computationally expensive')
    parser.usage = parser.format_usage().strip().replace('usage: ',
                                                         '') + ' < audio.wav'
    args = parser.parse_args()

    if sys.stdin.isatty():
        parser.error('Please pipe audio via stdin using < audio.wav')

    listener = Listener(args.model_name, args.chunk_size)

    try:
        while True:
            conf = listener.update(sys.stdin.buffer)
            stdout.buffer.write((str(conf) + '\n').encode('ascii'))
            stdout.buffer.flush()
    except (EOFError, KeyboardInterrupt):
        pass
Esempio n. 2
0
 def __init__(self, args):
     super().__init__(args)
     self.listener = Listener(args.model, args.chunk_size)
     self.audio_buffer = np.zeros(self.listener.pr.buffer_samples,
                                  dtype=float)
     self.engine = ListenerEngine(self.listener, args.chunk_size)
     self.engine.get_prediction = self.get_prediction
     self.runner = PreciseRunner(self.engine,
                                 args.trigger_level,
                                 sensitivity=args.sensitivity,
                                 on_activation=self.on_activation,
                                 on_prediction=self.on_prediction)
     self.session_id, self.chunk_num = '%09d' % randint(0, 999999999), 0
Esempio n. 3
0
    def run(self):
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        stdout = sys.stdout
        sys.stdout = sys.stderr
        listener = Listener(self.args.model_name, self.args.chunk_size)

        try:
            while True:
                conf = listener.update(sys.stdin.buffer)
                stdout.buffer.write((str(conf) + '\n').encode('ascii'))
                stdout.buffer.flush()
        except (EOFError, KeyboardInterrupt):
            pass
        finally:
            sys.stdout = stdout
Esempio n. 4
0
def main():
    args = create_parser(usage).parse_args()
    
    print('chunk_size: ', args.chunk_size)

    def on_activation():
        activate_notify()

        if args.save_dir:
            global chunk_num
            nm = join(args.save_dir, args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
            save_audio(nm, audio_buffer)
            print()
            print('Saved to ' + nm + '.')
            chunk_num += 1

    def on_prediction(conf):
        print('!' if conf > 0.8 else '.', end='', flush=True)

    listener = Listener(args.model, args.chunk_size)
    audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float)

    def get_prediction(chunk):
        nonlocal audio_buffer
        audio = buffer_to_audio(chunk)
        audio_buffer = np.concatenate((audio_buffer[len(audio):], audio))
        return listener.update(chunk)

    engine = ListenerEngine(listener, args.chunk_size)
    engine.get_prediction = get_prediction
    runner = PreciseRunner(engine, args.threshold, on_activation=on_activation,
                           on_prediction=on_prediction)
    runner.start()
    Event().wait()  # Wait forever
Esempio n. 5
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train, shuffle=False)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = Stats(predictions, targets, filenames)

    print('Data:', data)

    if not args.no_filenames:
        fp_files = stats.calc_filenames(False, True, args.threshold)
        fn_files = stats.calc_filenames(False, False, args.threshold)
        print('=== False Positives ===')
        print('\n'.join(fp_files))
        print()
        print('=== False Negatives ===')
        print('\n'.join(fn_files))
        print()
    print(stats.counts_str(args.threshold))
    print()
    print(stats.summary_str(args.threshold))
Esempio n. 6
0
    def __init__(self):
        super().__init__(create_parser(usage))

        for i in (join(self.args.folder, 'not-wake-word', 'generated'),
                  join(self.args.folder, 'test', 'not-wake-word',
                       'generated')):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        if not isfile(self.args.model):
            params = ModelParams(skip_acc=self.args.no_validation,
                                 extra_metrics=self.args.extra_metrics)
            create_model(self.args.model, params).save(self.args.model)
        self.listener = Listener(self.args.model,
                                 self.args.chunk_size,
                                 runner_cls=KerasRunner)
    def __init__(self, args):
        self.args = args
        self.trained_fns = load_trained_fns(args.model)
        pr = inject_params(args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        from keras.callbacks import ModelCheckpoint
        self.checkpoint = ModelCheckpoint(args.model,
                                          monitor=args.metric_monitor,
                                          save_best_only=args.save_best)
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        self.tags_data = data.load(True, not args.no_validation)

        if not isfile(args.model):
            create_model(args.model, args.no_validation,
                         args.extra_metrics).save(args.model)
        self.listener = Listener(args.model,
                                 args.chunk_size,
                                 runner_cls=KerasRunner)
Esempio n. 8
0
 def __init__(self, connection, address):
     self.address = address
     self.connection = connection  # type: socket.socket
     self.stream = ReadWriteStream()
     self.runner = PreciseRunner(
         ListenerEngine(Listener(MODEL_NAME, CHUNK_SIZE), CHUNK_SIZE),
         1, stream=self.stream, on_activation=self.on_activation,
         on_prediction=self.on_prediction
     )
     self.runner.start()
    def __init__(self, args):
        super().__init__(args)

        for i in (
                join(self.args.folder, 'not-wake-word', 'generated'),
                join(self.args.folder, 'test', 'not-wake-word', 'generated')
        ):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(
            skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics,
            loss_bias=1.0 - self.args.sensitivity
        )
        model = create_model(self.args.model, params)
        self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner)
        self.listener.runner = KerasRunner(self.args.model)
        self.listener.runner.model = model
        self.samples_since_train = 0
Esempio n. 10
0
    def __init__(self):
        parser = create_parser(usage)
        self.args = args = TrainData.parse_args(parser)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
        self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity)
        self.model = create_model(args.model, params)
        self.listener = Listener('',
                                 args.chunk_size,
                                 runner_cls=lambda x: None)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(a, b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]

        self.data = TrainData.from_both(args.tags_file, args.tags_folder,
                                        args.folder)
        pos_files, neg_files = self.data.train_files
        self.neg_files_it = iter(cycle(neg_files))
        self.pos_files_it = iter(cycle(pos_files))
Esempio n. 11
0
def main():
    parser = create_parser(usage)
    parser.add_argument(
        'models',
        nargs='*',
        help='List of model filenames in format: wake-word.yy-mm-dd.net')
    args = TrainData.parse_args(parser)
    if not (bool(args.pocketsphinx_dict) == bool(args.pocketsphinx_folder) ==
            bool(args.pocketsphinx_wake_word)):
        parser.error('Must pass all or no Pocketsphinx arguments')

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    data_files = data.train_files if args.use_train else data.test_files
    print('Data:', data)

    metrics = {}

    if args.pocketsphinx_dict and args.pocketsphinx_folder and args.pocketsphinx_wake_word:
        if not isfile(args.pocketsphinx_dict):
            parser.error('No such file: ' + args.pocketsphinx_dict)
        if not isdir(args.pocketsphinx_folder):
            parser.error('No such folder: ' + args.pocketsphinx_folder)
        listener = PocketsphinxListener(args.pocketsphinx_wake_word,
                                        args.pocketsphinx_dict,
                                        args.pocketsphinx_folder,
                                        args.pocketsphinx_threshold)
        stats = test_pocketsphinx(listener, data_files)
        metrics[args.pocketsphinx_dict] = stats_to_dict(stats)

    for model_name in args.models:
        print('Calculating', model_name + '...')
        inject_params(model_name)

        train, test = data.load(args.use_train, not args.use_train)
        inputs, targets = train if args.use_train else test
        predictions = Listener.find_runner(model_name)(model_name).predict(
            inputs)

        stats = Stats(predictions, targets, sum(data_files, []))

        print('----', model_name, '----')
        print(stats.counts_str())
        print()
        print(stats.summary_str())
        print()
        metrics[model_name] = stats.to_dict(args.threshold)

    print('Writing to:', args.output)
    with open(args.output, 'w') as f:
        json.dump(metrics, f)
Esempio n. 12
0
def calc_stats(model_files, loader, use_train, filenames):
    model_data = {}
    for model in model_files:
        train, test = loader.load_for(model)
        inputs, targets = train if use_train else test
        print('Running network...')
        predictions = Listener.find_runner(model)(model).predict(inputs)
        print(inputs.shape, targets.shape)

        print('Generating statistics...')
        stats = Stats(predictions, targets, filenames)
        print('\n' + stats.counts_str() + '\n\n' + stats.summary_str() + '\n')

        model_name = basename(splitext(model)[0])
        model_data[model_name] = stats
    return model_data
Esempio n. 13
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
def main():
    args = create_parser(usage).parse_args()
    sensitivity = 0.5

    def on_activation():
        activate_notify()

        if args.save_dir:
            global chunk_num
            nm = join(
                args.save_dir,
                args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
            save_audio(nm, audio_buffer)
            print()
            print('Saved to ' + nm + '.')
            chunk_num += 1

    def on_prediction(conf):
        if args.light_mode:
            print('!' if conf > 0.7 else '.', end='', flush=True)
        else:
            max_width = 80
            width = min(get_terminal_size()[0], max_width)
            units = int(round(conf * width))
            bar = 'X' * units + '-' * (width - units)
            cutoff = round((1.0 - sensitivity) * width)
            print(bar[:cutoff] + bar[cutoff:].replace('X', 'x'))

    listener = Listener(args.model, args.chunk_size)
    audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float)

    def get_prediction(chunk):
        nonlocal audio_buffer
        audio = buffer_to_audio(chunk)
        audio_buffer = np.concatenate((audio_buffer[len(audio):], audio))
        return listener.update(chunk)

    engine = ListenerEngine(listener, args.chunk_size)
    engine.get_prediction = get_prediction
    runner = PreciseRunner(engine,
                           args.threshold,
                           sensitivity=sensitivity,
                           on_activation=on_activation,
                           on_prediction=on_prediction)
    runner.start()
    Event().wait()  # Wait forever
Esempio n. 15
0
def main():
    args = create_parser(usage).parse_args()

    def on_activation():
        activate_notify()

        if args.save_dir:
            global chunk_num
            nm = join(
                args.save_dir,
                args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
            save_audio(nm, audio_buffer)
            print()
            print('Saved to ' + nm + '.')
            chunk_num += 1

    def on_prediction(conf):
        global detecting_flag
        # print('!' if conf > 0.5 else '.', end='', flush=True)
        if conf > 0.5:
            detecting_flag = True
        if conf < 0.5 and detecting_flag:
            print(colored("Yeah! I'm Here.", 'green'))
            detecting_flag = False

    sunshine_model = './ok-sunshine.net'
    listener = Listener(sunshine_model, args.chunk_size)
    audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float)

    def get_prediction(chunk):
        nonlocal audio_buffer
        audio = buffer_to_audio(chunk)
        audio_buffer = np.concatenate((audio_buffer[len(audio):], audio))
        return listener.update(chunk)

    engine = ListenerEngine(listener, args.chunk_size)
    engine.get_prediction = get_prediction
    runner = PreciseRunner(engine,
                           args.threshold,
                           on_activation=on_activation,
                           on_prediction=on_prediction)
    runner.start()
    Event().wait()  # Wait forever
def main():
    rospy.init_node('wake_word_detection_node')
    print("node is up")

    def on_activation():
        print("activate")
        playsound(res_path + "/attention.wav")
        try:
            requests.get('http://www.google.com')
            try:
                response = stop_speech_perception_service(True)
                print(response)
            except rospy.ServiceException as exc:
                print("Service did not process request: " + str(exc))
        except requests.ConnectionError:
            print("no internet")
            speak_pub.publish(
                "I'm sorry. I am not connected to the internet now and cannot answer"
            )
            set_emotion_service(state="SADNESS", timeout=5500, restore=True)

    def on_prediction(conf):
        print(".")

    listener = Listener(res_path + "/stevie_10_06.pb", chunk_size)
    audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float)

    def get_prediction(chunk):
        nonlocal audio_buffer
        audio = buffer_to_audio(chunk)
        audio_buffer = np.concatenate((audio_buffer[len(audio):], audio))
        return listener.update(chunk)

    engine = ListenerEngine(listener, chunk_size)
    engine.get_prediction = get_prediction
    runner = PreciseRunner(engine,
                           trigger_level=3,
                           sensitivity=0.5,
                           on_activation=on_activation,
                           on_prediction=on_prediction)
    runner.start()
    print("spinning")
    rospy.spin()
Esempio n. 17
0
    def run(self):
        args = self.args
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        data_files = data.train_files if args.use_train else data.test_files
        print('Data:', data)

        metrics = {}

        if self.is_pocketsphinx:
            script = PocketsphinxTestScript.create(
                key_phrase=args.pocketsphinx_wake_word,
                dict_file=args.pocketsphinx_dict,
                hmm_folder=args.pocketsphinx_folder,
                threshold=args.pocketsphinx_threshold)
            ww_files, nww_files = data_files
            script.run_test(ww_files, 'Wake Word', 1.0)
            script.run_test(nww_files, 'Not Wake Word', 0.0)
            stats = script.get_stats()
            metrics[args.pocketsphinx_dict] = stats.to_dict(args.threshold)

        for model_name in args.models:
            print('Calculating', model_name + '...')
            inject_params(model_name)

            train, test = data.load(args.use_train, not args.use_train)
            inputs, targets = train if args.use_train else test
            predictions = Listener.find_runner(model_name)(model_name).predict(
                inputs)

            stats = Stats(predictions, targets, sum(data_files, []))

            print('----', model_name, '----')
            print(stats.counts_str())
            print()
            print(stats.summary_str())
            print()
            metrics[model_name] = stats.to_dict(args.threshold)

        print('Writing to:', args.output)
        with open(args.output, 'w') as f:
            json.dump(metrics, f)
Esempio n. 18
0
def main():
    args = create_parser(usage).parse_args()

    def on_activation():
        activate_notify()
        # TODO: trigger VMSE

    def on_prediction(conf):
        if args.basic_mode:
            print('!' if conf > 0.7 else '.', end='', flush=True)
        else:
            max_width = 80
            width = min(get_terminal_size()[0], max_width)
            units = int(round(conf * width))
            bar = 'X' * units + '-' * (width - units)
            cutoff = round((1.0 - args.sensitivity) * width)
            print(bar[:cutoff] + bar[cutoff:].replace('X', 'x'))

    listener = Listener(args.model, args.chunk_size)
    audio_buffer = np.zeros(listener.pr.buffer_samples, dtype=float)

    def get_prediction(chunk):
        nonlocal audio_buffer
        audio = buffer_to_audio(chunk)
        audio_buffer = np.concatenate((audio_buffer[len(audio):], audio))
        return listener.update(chunk)

    engine = ListenerEngine(listener, args.chunk_size)
    engine.get_prediction = get_prediction
    runner = PreciseRunner(engine,
                           args.trigger_level,
                           sensitivity=args.sensitivity,
                           on_activation=on_activation,
                           on_prediction=on_prediction)
    runner.start()
    Event().wait()  # Wait forever
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    #print(data)
    #print(args.use_train)

    train, test = data.load(args.use_train, not args.use_train)
    #print(train)
    #print(test)
    inputs, targets = train if args.use_train else test
    #print(inputs)

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])

    #for i in range(0, len(targets)):
    #    print(filenames[i], "\t", targets[i])

    print(args.model)

    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)

    #for i, filename in enumerate(filenames):
    #print(filename, "\t", predictions[i], targets[i])
    #print(predictions[i])

    #print (predictions)
    #print (len(predictions))

    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
class IncrementalTrainer:
    def __init__(self, args):
        self.args = args
        self.trained_fns = load_trained_fns(args.model)
        pr = inject_params(args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        from keras.callbacks import ModelCheckpoint
        self.checkpoint = ModelCheckpoint(args.model,
                                          monitor=args.metric_monitor,
                                          save_best_only=args.save_best)
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        self.tags_data = data.load(True, not args.no_validation)

        if not isfile(args.model):
            create_model(args.model, args.no_validation,
                         args.extra_metrics).save(args.model)
        self.listener = Listener(args.model,
                                 args.chunk_size,
                                 runner_cls=KerasRunner)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.tags_data[0])
        test_data = TrainData.merge(test_data, self.tags_data[1])
        print()
        try:
            self.listener.runner.model.fit(*train_data,
                                           self.args.batch_size,
                                           self.args.epochs,
                                           validation_data=test_data,
                                           callbacks=[self.checkpoint])
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        samples_since_train = 0
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > 0.5:
                samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '',
                            'not-wake-word', 'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and samples_since_train >= self.args.delay_samples and self.args.epochs > 0:
                samples_since_train = 0
                self.retrain()

    def train_incremental(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
Esempio n. 21
0
 def __init__(self, args):
     super().__init__(args)
     inject_params(self.args.model)
     self.runner = Listener.find_runner(self.args.model)(self.args.model)
     self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
Esempio n. 22
0
 def __init__(self):
     self.args = create_parser(usage).parse_args()
     inject_params(self.args.model)
     self.runner = Listener.find_runner(self.args.model)(self.args.model)
     self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
Esempio n. 23
0
class TrainIncrementalScript(TrainScript):
    usage = Usage('''
        Train a model to inhibit activation by
        marking false activations and retraining

        :-e --epochs int 1
            Number of epochs to train before continuing evaluation

        :-ds --delay-samples int 10
            Number of false activations to save before re-training

        :-c --chunk-size int 2048
            Number of samples between testing the neural network

        :-r --random-data-folder str data/random
            Folder with properly encoded wav files of
            random audio that should not cause an activation

        :-th --threshold float 0.5
            Network output to be considered activated

        ...
    ''') | TrainScript.usage

    def __init__(self, args):
        super().__init__(args)

        for i in (
                join(self.args.folder, 'not-wake-word', 'generated'),
                join(self.args.folder, 'test', 'not-wake-word', 'generated')
        ):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(
            skip_acc=self.args.no_validation, extra_metrics=self.args.extra_metrics,
            loss_bias=1.0 - self.args.sensitivity
        )
        model = create_model(self.args.model, params)
        self.listener = Listener(self.args.model, self.args.chunk_size, runner_cls=KerasRunner)
        self.listener.runner = KerasRunner(self.args.model)
        self.listener.runner.model = model
        self.samples_since_train = 0

    @staticmethod
    def load_data(args: Any):
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        return data.load(True, not args.no_validation)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.sampled_data)
        test_data = TrainData.merge(test_data, self.test)
        train_inputs, train_outputs = train_data
        print()
        try:
            self.listener.runner.model.fit(
                train_inputs, train_outputs, self.args.batch_size, self.epoch + self.args.epochs,
                validation_data=test_data, callbacks=self.callbacks, initial_epoch=self.epoch
            )
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate((self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '', 'not-wake-word',
                            'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()

    def run(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
Esempio n. 24
0
def main():
    args = create_parser(usage).parse_args()
    os.chdir(os.getcwd() + "/Precise")

    def on_activation_normal():
        print("activated\n", flush=True)

    def on_activation_safe():
        global chunk_num
        nm = join(
            args.save_dir,
            args.save_prefix + session_id + '.' + str(chunk_num) + '.wav')
        save_audio(nm, audio_buffer)
        print()
        print('Saved to ' + nm + '.')
        chunk_num += 1

    def on_prediction(conf):
        max_width = 80
        width = min(get_terminal_size()[0], max_width)
        units = int(round(conf * width))
        bar = 'X' * units + '-' * (width - units)
        cutoff = round((1.0 - args.sensitivity) * width)
        print(bar[:cutoff] + bar[cutoff:].replace('X', 'x') + "\n", flush=True)

    def get_prediction(chunk):
        nonlocal audio_buffer
        audio = buffer_to_audio(chunk)
        audio_buffer = np.concatenate((audio_buffer[len(audio):], audio))
        return listener.update(chunk)

    while True:
        line = sys.stdin.readline().rstrip()

        if ("start" in line):
            if (os.path.isfile("./model.pb")):
                listener = Listener("model.pb", args.chunk_size)
            else:
                listener = Listener("model.net", args.chunk_size)
            engine = ListenerEngine(listener, args.chunk_size)
            if ("visual" not in line):
                if (line == "start normal"):
                    runner = PreciseRunner(engine,
                                           args.trigger_level,
                                           sensitivity=args.sensitivity,
                                           on_activation=on_activation_normal)
                elif (line == "start safe"):
                    runner = PreciseRunner(engine,
                                           args.trigger_level,
                                           sensitivity=args.sensitivity,
                                           on_activation=on_activation_safe)
            else:
                if (line == "start normal visual"):
                    audio_buffer = np.zeros(listener.pr.buffer_samples,
                                            dtype=float)
                    engine.get_prediction = get_prediction
                    runner = PreciseRunner(engine,
                                           args.trigger_level,
                                           sensitivity=args.sensitivity,
                                           on_activation=on_activation_normal,
                                           on_prediction=on_prediction)
                elif (line == "start safe visual"):
                    audio_buffer = np.zeros(listener.pr.buffer_samples,
                                            dtype=float)
                    engine.get_prediction = get_prediction
                    runner = PreciseRunner(engine,
                                           args.trigger_level,
                                           sensitivity=args.sensitivity,
                                           on_activation=on_activation_safe,
                                           on_prediction=on_prediction)
            runner.start()
        elif (line == "stop"):
            runner.stop()
        elif (line == "running"):
            print(runner.running)
Esempio n. 25
0
class ListenScript(BaseScript):
    usage = Usage('''
        Run a model on microphone audio input

        :model str
            Either Keras (.net) or TensorFlow (.pb) model to run

        :-c --chunk-size int 2048
            Samples between inferences

        :-l --trigger-level int 3
            Number of activated chunks to cause an activation

        :-s --sensitivity float 0.5
            Network output required to be considered activated

        :-b --basic-mode
            Report using . or ! rather than a visual representation

        :-d --save-dir str -
            Folder to save false positives

        :-p --save-prefix str -
            Prefix for saved filenames
    ''')

    def __init__(self, args):
        super().__init__(args)
        self.listener = Listener(args.model, args.chunk_size)
        self.audio_buffer = np.zeros(self.listener.pr.buffer_samples,
                                     dtype=float)
        self.engine = ListenerEngine(self.listener, args.chunk_size)
        self.engine.get_prediction = self.get_prediction
        self.runner = PreciseRunner(self.engine,
                                    args.trigger_level,
                                    sensitivity=args.sensitivity,
                                    on_activation=self.on_activation,
                                    on_prediction=self.on_prediction)
        self.session_id, self.chunk_num = '%09d' % randint(0, 999999999), 0

    def on_activation(self):
        activate_notify()

        if self.args.save_dir:
            nm = join(
                self.args.save_dir, self.args.save_prefix + self.session_id +
                '.' + str(self.chunk_num) + '.wav')
            save_audio(nm, self.audio_buffer)
            print()
            print('Saved to ' + nm + '.')
            self.chunk_num += 1

    def on_prediction(self, conf):
        if self.args.basic_mode:
            print('!' if conf > 0.7 else '.', end='', flush=True)
        else:
            max_width = 80
            width = min(get_terminal_size()[0], max_width)
            units = int(round(conf * width))
            bar = 'X' * units + '-' * (width - units)
            cutoff = round((1.0 - self.args.sensitivity) * width)
            print(bar[:cutoff] + bar[cutoff:].replace('X', 'x'))

    def get_prediction(self, chunk):
        audio = buffer_to_audio(chunk)
        self.audio_buffer = np.concatenate(
            (self.audio_buffer[len(audio):], audio))
        return self.listener.update(chunk)

    def run(self):
        self.runner.start()
        Event().wait()  # Wait forever
class IncrementalTrainer(Trainer):
    def __init__(self):
        super().__init__(create_parser(usage))

        for i in (join(self.args.folder, 'not-wake-word', 'generated'),
                  join(self.args.folder, 'test', 'not-wake-word',
                       'generated')):
            makedirs(i, exist_ok=True)

        self.trained_fns = load_trained_fns(self.args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=self.args.no_validation,
                             extra_metrics=self.args.extra_metrics,
                             loss_bias=1.0 - self.args.sensitivity)
        model = create_model(self.args.model, params)
        self.listener = Listener(self.args.model,
                                 self.args.chunk_size,
                                 runner_cls=KerasRunner)
        self.listener.runner = KerasRunner(self.args.model)
        self.listener.runner.model = model
        self.samples_since_train = 0

    @staticmethod
    def load_data(args: Any):
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        return data.load(True, not args.no_validation)

    def retrain(self):
        """Train for a session, pulling in any new data from the filesystem"""
        folder = TrainData.from_folder(self.args.folder)
        train_data, test_data = folder.load(True, not self.args.no_validation)

        train_data = TrainData.merge(train_data, self.sampled_data)
        test_data = TrainData.merge(test_data, self.test)
        train_inputs, train_outputs = train_data
        print()
        try:
            self.listener.runner.model.fit(train_inputs,
                                           train_outputs,
                                           self.args.batch_size,
                                           self.epoch + self.args.epochs,
                                           validation_data=test_data,
                                           callbacks=self.callbacks,
                                           initial_epoch=self.epoch)
        finally:
            self.listener.runner.model.save(self.args.model)

    def train_on_audio(self, fn: str):
        """Run through a single audio file"""
        save_test = random() > 0.8
        audio = load_audio(fn)
        num_chunks = len(audio) // self.args.chunk_size

        self.listener.clear()

        for i, chunk in enumerate(chunk_audio(audio, self.args.chunk_size)):
            print('\r' + str(i * 100. / num_chunks) + '%', end='', flush=True)
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            conf = self.listener.update(chunk)
            if conf > self.args.threshold:
                self.samples_since_train += 1
                name = splitext(basename(fn))[0] + '-' + str(i) + '.wav'
                name = join(self.args.folder, 'test' if save_test else '',
                            'not-wake-word', 'generated', name)
                save_audio(name, self.audio_buffer)
                print()
                print('Saved to:', name)

            if not save_test and self.samples_since_train >= self.args.delay_samples and \
                    self.args.epochs > 0:
                self.samples_since_train = 0
                self.retrain()

    def run(self):
        """
        Begin reading through audio files, saving false
        activations and retraining when necessary
        """
        for fn in glob_all(self.args.random_data_folder, '*.wav'):
            if fn in self.trained_fns:
                print('Skipping ' + fn + '...')
                continue

            print('Starting file ' + fn + '...')
            self.train_on_audio(fn)
            print('\r100%                 ')

            self.trained_fns.append(fn)
            save_trained_fns(self.trained_fns, self.args.model)
Esempio n. 27
0
class TrainGeneratedScript(BaseScript):
    usage = Usage('''
        Train a model on infinitely generated batches

        :model str
            Keras .net model file to load from and write to

        :-e --epochs int 100
            Number of epochs to train on

        :-b --batch-size int 200
            Number of samples in each batch

        :-t --steps-per-epoch int 100
            Number of steps that are considered an epoch

        :-c --chunk-size int 2048
            Number of audio samples between generating a training sample

        :-r --random-data-folder str data/random
            Folder with properly encoded wav files of
            random audio that should not cause an activation

        :-s --sensitivity float 0.2
            Weighted loss bias. Higher values decrease increase positives

        :-sb --save-best
            Only save the model each epoch if its stats improve

        :-nv --no-validation
            Disable accuracy and validation calculation
            to improve speed during training

        :-mm --metric-monitor str loss
            Metric used to determine when to save

        :-em --extra-metrics
            Add extra metrics during training

        :-p --save-prob float 0.0
            Probability of saving audio into debug/ww and debug/nww folders

        ...
    ''') | TrainData.usage
    """A trainer the runs on generated data by overlaying wakewords on background audio"""
    def __init__(self, args):
        super().__init__(args)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
        self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity)
        self.model = create_model(args.model, params)
        self.listener = Listener('',
                                 args.chunk_size,
                                 runner_cls=lambda x: None)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(_a, _b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]

        self.data = TrainData.from_both(args.tags_file, args.tags_folder,
                                        args.folder)
        pos_files, neg_files = self.data.train_files
        self.neg_files_it = iter(cycle(neg_files))
        self.pos_files_it = iter(cycle(pos_files))

    def layer_with(self, sample: np.ndarray, value: int) -> np.ndarray:
        """Create an identical 2d array where the second row is filled with value"""
        b = np.full((2, len(sample)), value, dtype=float)
        b[0] = sample
        return b

    def generate_wakeword_pieces(self, volume):
        """Generates chunks of audio that represent the wakeword stream"""
        while True:
            target = 1 if random() > 0.5 else 0
            it = self.pos_files_it if target else self.neg_files_it
            sample_file = next(it)
            yield self.layer_with(
                self.normalize_volume_to(load_audio(sample_file), volume),
                target)
            yield self.layer_with(
                np.zeros(int(pr.sample_rate * (0.5 + 2.0 * random()))), 0)

    def chunk_audio_pieces(self, pieces, chunk_size):
        """Convert chunks of audio into a series of equally sized pieces"""
        left_over = np.array([])
        for piece in pieces:
            if left_over.size == 0:
                combined = piece
            else:
                combined = np.concatenate([left_over, piece], axis=-1)
            for chunk in chunk_audio(combined.T, chunk_size):
                yield chunk.T
            left_over = piece[-(len(piece) % chunk_size):]

    def calc_volume(self, sample: np.ndarray):
        """Find the RMS of the audio"""
        return sqrt(np.mean(np.square(sample)))

    def normalize_volume_to(self, sample, volume):
        """Normalize the volume to a certain RMS"""
        return volume * sample / self.calc_volume(sample)

    def merge(self, a, b, ratio):
        """Perform a weighted sum of a and b. ratio=1.0 means 100% of b and 0% of a"""
        return (1.0 - ratio) * a + ratio * b

    @staticmethod
    def max_run_length(x: np.ndarray, val: int):
        """Finds the maximum continuous length of the given value in the sequence"""
        if x.size == 0:
            return 0
        else:
            y = np.array(x[1:] != x[:-1])
            i = np.append(np.where(y), len(x) - 1)
            run_lengths = np.diff(np.append(-1, i))
            run_length_values = x[i]
            return max([
                rl for rl, v in zip(run_lengths, run_length_values) if v == val
            ],
                       default=0)

    def vectors_from_fn(self, fn: str):
        """
        Run through a single background audio file, overlaying with wake words.
        Generates (mfccs, target) where mfccs is a series of mfcc values and
        target is a single integer classification of the target network output for that chunk
        """
        audio = load_audio(fn)
        audio_volume = self.calc_volume(audio)
        audio_volume *= 0.4 + 0.5 * random()
        audio = self.normalize_volume_to(audio, audio_volume)

        self.listener.clear()
        chunked_bg = chunk_audio(audio, self.args.chunk_size)
        chunked_ww = self.chunk_audio_pieces(
            self.generate_wakeword_pieces(audio_volume), self.args.chunk_size)

        for i, (chunk_bg, (chunk_ww,
                           targets)) in enumerate(zip(chunked_bg, chunked_ww)):
            chunk = self.merge(chunk_bg, chunk_ww, 0.6)
            self.vals_buffer = np.concatenate(
                (self.vals_buffer[len(targets):], targets))
            self.audio_buffer = np.concatenate(
                (self.audio_buffer[len(chunk):], chunk))
            mfccs = self.listener.update_vectors(chunk)
            percent_overlapping = self.max_run_length(
                self.vals_buffer, 1) / len(self.vals_buffer)

            if self.vals_buffer[-1] == 0 and percent_overlapping > 0.8:
                target = 1
            elif percent_overlapping < 0.5:
                target = 0
            else:
                continue

            if random() > 1.0 - self.args.save_prob:
                name = splitext(basename(fn))[0]
                wav_file = join('debug', 'ww' if target == 1 else 'nww',
                                '{} - {}.wav'.format(name, i))
                save_audio(wav_file, self.audio_buffer)
            yield mfccs, target

    @staticmethod
    def samples_to_batches(samples: Iterable, batch_size: int):
        """Chunk a series of network inputs and outputs into larger batches"""
        it = iter(samples)
        while True:
            with suppress(StopIteration):
                batch_in, batch_out = [], []
                for i in range(batch_size):
                    sample_in, sample_out = next(it)
                    batch_in.append(sample_in)
                    batch_out.append(sample_out)
            if not batch_in:
                raise StopIteration
            yield np.array(batch_in), np.array(batch_out)

    def generate_samples(self):
        """Generate training samples (network inputs and outputs)"""
        filenames = glob_all(self.args.random_data_folder, '*.wav')
        shuffle(filenames)
        while True:
            for fn in filenames:
                for x, y in self.vectors_from_fn(fn):
                    yield x, y

    def run(self):
        """Train the model on randomly generated batches"""
        _, test_data = self.data.load(train=False, test=True)
        try:
            self.model.fit_generator(self.samples_to_batches(
                self.generate_samples(), self.args.batch_size),
                                     steps_per_epoch=self.args.steps_per_epoch,
                                     epochs=self.epoch + self.args.epochs,
                                     validation_data=test_data,
                                     callbacks=self.callbacks,
                                     initial_epoch=self.epoch)
        finally:
            self.model.save(self.args.model)
            save_params(self.args.model)
Esempio n. 28
0
import keras
from keras.models import Sequential
from keras.models import load_model

from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import to_categorical
import numpy as np
import os
import librosa
from keras import backend as K
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
from sonopy import mfcc_spec, chop_array, power_spec, filterbanks, safe_log, dct
from precise.network_runner import Listener

samples, sample_rate = librosa.load("/tmp/fixed.wav", sr=16000)

listener = Listener("qqq.pb", -1)
copy = samples[:]
for i in (4096, 4096, 4096, 3532, 4096, 4096, 4096, 4096, 4096, 4096, 4096,
          4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096, 4096):
    chunk = copy[:i]
    print(listener.update(chunk))
    copy = copy[i:]

# This code produces 21 outputs
# bakerloo produces 75. Hmm.