Example #1
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train, shuffle=False)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = Stats(predictions, targets, filenames)

    print('Data:', data)

    if not args.no_filenames:
        fp_files = stats.calc_filenames(False, True, args.threshold)
        fn_files = stats.calc_filenames(False, False, args.threshold)
        print('=== False Positives ===')
        print('\n'.join(fp_files))
        print()
        print('=== False Negatives ===')
        print('\n'.join(fn_files))
        print()
    print(stats.counts_str(args.threshold))
    print()
    print(stats.summary_str(args.threshold))
Example #2
0
def main():
    args = create_parser(usage).parse_args()
    args.tags_file = abspath(args.tags_file) if args.tags_file else None
    args.folder = abspath(args.folder)
    args.output_folder = abspath(args.output_folder)
    noise_min, noise_max = args.noise_ratio_low, args.noise_ratio_high

    data = TrainData.from_both(args.tags_file, args.folder, args.folder)
    noise_data = NoiseData(args.noise_folder)
    print('Data:', data)

    def translate_filename(source: str, n=0) -> str:
        assert source.startswith(args.folder)
        relative_file = source[len(args.folder):].strip(os.path.sep)
        if n > 0:
            base, ext = splitext(relative_file)
            relative_file = base + '.' + str(n) + ext
        return join(args.output_folder, relative_file)

    all_filenames = sum(data.train_files + data.test_files, [])
    for i, filename in enumerate(all_filenames):
        print('{0:.2%}  \r'.format(i / (len(all_filenames) - 1)), end='', flush=True)

        audio = load_audio(filename)
        for n in range(args.inflation_factor):
            altered = noise_data.noised_audio(audio, noise_min + (noise_max - noise_min) * random())
            output_filename = translate_filename(filename, n)

            makedirs(dirname(output_filename), exist_ok=True)
            save_audio(output_filename, altered)

    print('Done!')

    if args.tags_file and args.tags_file.startswith(args.folder):
        shutil.copy2(args.tags_file, translate_filename(args.tags_file))
Example #3
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)
    save_params(args.model)

    data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
    print('Data:', data)
    (inputs, outputs), test_data = data.load(True, not args.no_validation)

    print('Inputs shape:', inputs.shape)
    print('Outputs shape:', outputs.shape)

    if test_data:
        print('Test inputs shape:', test_data[0].shape)
        print('Test outputs shape:', test_data[1].shape)

    if 0 in inputs.shape or 0 in outputs.shape:
        print('Not enough data to train')
        exit(1)

    model = create_model(args.model, args.no_validation, args.extra_metrics)
    model.summary()

    from keras.callbacks import ModelCheckpoint
    checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor,
                                 save_best_only=args.save_best)

    try:
        model.fit(inputs, outputs, 5000, args.epochs, validation_data=test_data,
                  callbacks=[checkpoint])
    except KeyboardInterrupt:
        print()
    finally:
        model.save(args.model)
Example #4
0
def main():
    args = TrainData.parse_args(create_parser(usage))
    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    data_files = data.train_files if args.use_train else data.test_files
    listener = PocketsphinxListener(args.key_phrase, args.dict_file,
                                    args.hmm_folder, args.threshold)

    print('Data:', data)
    stats = test_pocketsphinx(listener, data_files)
    show_stats(stats, not args.no_filenames)
    def __init__(self):
        super().__init__(create_parser(usage))
        self.bb = BlackBoxOptimizer(file=self.args.trials_name)
        if not self.test:
            data = TrainData.from_both(self.args.tags_file,
                                       self.args.tags_folder, self.args.folder)
            _, self.test = data.load(False, True)

        from keras.callbacks import ModelCheckpoint
        for i in list(self.callbacks):
            if isinstance(i, ModelCheckpoint):
                self.callbacks.remove(i)
Example #6
0
    def run(self):
        args = self.args
        if args.models:
            data = TrainData.from_both(args.tags_file, args.tags_folder,
                                       args.folder)
            print('Data:', data)
            filenames = sum(
                data.train_files if args.use_train else data.test_files, [])
            loader = CachedDataLoader(
                partial(data.load,
                        args.use_train,
                        not args.use_train,
                        shuffle=False))
            model_data = calc_stats(args.models, loader, args.use_train,
                                    filenames)
        else:
            model_data = {
                name: Stats.from_np_dict(data)
                for name, data in np.load(args.input_file)
                ['data'].item().items()
            }
            for name, stats in model_data.items():
                print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(),
                                                      stats.summary_str()))

        if args.output_file:
            np.savez(args.output_file,
                     data={
                         name: stats.to_np_dict()
                         for name, stats in model_data.items()
                     })
        else:
            plt = load_plt()
            decoder = ThresholdDecoder(pr.threshold_config,
                                       pr.threshold_center)
            thresholds = [
                decoder.encode(i)
                for i in np.linspace(0.0, 1.0, args.resolution)[1:-1]
            ]
            for model_name, stats in model_data.items():
                x = [stats.false_positives(i) for i in thresholds]
                y = [stats.false_negatives(i) for i in thresholds]
                plt.plot(x, y, marker='x', linestyle='-', label=model_name)
                if args.labels:
                    for x, y, threshold in zip(x, y, thresholds):
                        plt.annotate('{:.4f}'.format(threshold), (x, y))

            plt.legend()
            plt.xlabel('False Positives')
            plt.ylabel('False Negatives')
            plt.show()
Example #7
0
def main():
    parser = create_parser(usage)
    parser.add_argument(
        'models',
        nargs='*',
        help='List of model filenames in format: wake-word.yy-mm-dd.net')
    args = TrainData.parse_args(parser)
    if not (bool(args.pocketsphinx_dict) == bool(args.pocketsphinx_folder) ==
            bool(args.pocketsphinx_wake_word)):
        parser.error('Must pass all or no Pocketsphinx arguments')

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    data_files = data.train_files if args.use_train else data.test_files
    print('Data:', data)

    metrics = {}

    if args.pocketsphinx_dict and args.pocketsphinx_folder and args.pocketsphinx_wake_word:
        if not isfile(args.pocketsphinx_dict):
            parser.error('No such file: ' + args.pocketsphinx_dict)
        if not isdir(args.pocketsphinx_folder):
            parser.error('No such folder: ' + args.pocketsphinx_folder)
        listener = PocketsphinxListener(args.pocketsphinx_wake_word,
                                        args.pocketsphinx_dict,
                                        args.pocketsphinx_folder,
                                        args.pocketsphinx_threshold)
        stats = test_pocketsphinx(listener, data_files)
        metrics[args.pocketsphinx_dict] = stats_to_dict(stats)

    for model_name in args.models:
        print('Calculating', model_name + '...')
        inject_params(model_name)

        train, test = data.load(args.use_train, not args.use_train)
        inputs, targets = train if args.use_train else test
        predictions = Listener.find_runner(model_name)(model_name).predict(
            inputs)

        stats = Stats(predictions, targets, sum(data_files, []))

        print('----', model_name, '----')
        print(stats.counts_str())
        print()
        print(stats.summary_str())
        print()
        metrics[model_name] = stats.to_dict(args.threshold)

    print('Writing to:', args.output)
    with open(args.output, 'w') as f:
        json.dump(metrics, f)
Example #8
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
    train, test = data.load(args.use_train, not args.use_train)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = load_precise_model(args.model).predict(inputs)
    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train, shuffle=False)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
    def load_data(args: Any) -> Tuple[tuple, tuple]:
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        print('Data:', data)
        train, test = data.load(True, not args.no_validation)

        print('Inputs shape:', train[0].shape)
        print('Outputs shape:', train[1].shape)

        if test:
            print('Test inputs shape:', test[0].shape)
            print('Test outputs shape:', test[1].shape)

        if 0 in train[0].shape or 0 in train[1].shape:
            print('Not enough data to train')
            exit(1)

        return train, test
Example #11
0
    def run(self):
        args = self.args
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        data_files = data.train_files if args.use_train else data.test_files
        print('Data:', data)

        metrics = {}

        if self.is_pocketsphinx:
            script = PocketsphinxTestScript.create(
                key_phrase=args.pocketsphinx_wake_word,
                dict_file=args.pocketsphinx_dict,
                hmm_folder=args.pocketsphinx_folder,
                threshold=args.pocketsphinx_threshold)
            ww_files, nww_files = data_files
            script.run_test(ww_files, 'Wake Word', 1.0)
            script.run_test(nww_files, 'Not Wake Word', 0.0)
            stats = script.get_stats()
            metrics[args.pocketsphinx_dict] = stats.to_dict(args.threshold)

        for model_name in args.models:
            print('Calculating', model_name + '...')
            inject_params(model_name)

            train, test = data.load(args.use_train, not args.use_train)
            inputs, targets = train if args.use_train else test
            predictions = Listener.find_runner(model_name)(model_name).predict(
                inputs)

            stats = Stats(predictions, targets, sum(data_files, []))

            print('----', model_name, '----')
            print(stats.counts_str())
            print()
            print(stats.summary_str())
            print()
            metrics[model_name] = stats.to_dict(args.threshold)

        print('Writing to:', args.output)
        with open(args.output, 'w') as f:
            json.dump(metrics, f)
Example #12
0
    def run(self):
        args = self.args
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        print('Data:', data)

        ww_files, nww_files = data.train_files if args.use_train else data.test_files
        self.run_test(ww_files, 'Wake Word', 1.0)
        self.run_test(nww_files, 'Not Wake Word', 0.0)
        stats = self.get_stats()
        if not self.args.no_filenames:
            fp_files = stats.calc_filenames(False, True, 0.5)
            fn_files = stats.calc_filenames(False, False, 0.5)
            print('=== False Positives ===')
            print('\n'.join(fp_files))
            print()
            print('=== False Negatives ===')
            print('\n'.join(fn_files))
            print()
        print(stats.counts_str(0.5))
        print()
        print(stats.summary_str(0.5))
    def __init__(self):
        parser = create_parser(usage)
        self.args = args = TrainData.parse_args(parser)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
        self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity)
        self.model = create_model(args.model, params)
        self.listener = Listener('',
                                 args.chunk_size,
                                 runner_cls=lambda x: None)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(a, b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]

        self.data = TrainData.from_both(args.tags_file, args.tags_folder,
                                        args.folder)
        pos_files, neg_files = self.data.train_files
        self.neg_files_it = iter(cycle(neg_files))
        self.pos_files_it = iter(cycle(pos_files))
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    #print(data)
    #print(args.use_train)

    train, test = data.load(args.use_train, not args.use_train)
    #print(train)
    #print(test)
    inputs, targets = train if args.use_train else test
    #print(inputs)

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])

    #for i in range(0, len(targets)):
    #    print(filenames[i], "\t", targets[i])

    print(args.model)

    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)

    #for i, filename in enumerate(filenames):
    #print(filename, "\t", predictions[i], targets[i])
    #print(predictions[i])

    #print (predictions)
    #print (len(predictions))

    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
Example #15
0
def main():
    parser = create_parser(usage)
    parser.add_argument(
        'models',
        nargs='*',
        help='Either Keras (.net) or TensorFlow (.pb) models to test')
    args = TrainData.parse_args(parser)
    if not args.models and not args.input_file and args.folder:
        args.input_file = args.folder
    if bool(args.models) == bool(args.input_file):
        parser.error('Please specify either a list of models or an input file')

    if not args.output_file:
        load_plt()  # Error early if matplotlib not installed
    import numpy as np

    if args.models:
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        print('Data:', data)
        filenames = sum(
            data.train_files if args.use_train else data.test_files, [])
        loader = CachedDataLoader(
            partial(data.load,
                    args.use_train,
                    not args.use_train,
                    shuffle=False))
        model_data = calc_stats(args.models, loader, args.use_train, filenames)
    else:
        model_data = {
            name: Stats.from_np_dict(data)
            for name, data in np.load(args.input_file)['data'].item().items()
        }
        for name, stats in model_data.items():
            print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(),
                                                  stats.summary_str()))

    if args.output_file:
        np.savez(args.output_file,
                 data={
                     name: stats.to_np_dict()
                     for name, stats in model_data.items()
                 })
    else:
        plt = load_plt()
        decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center)
        thresholds = [
            decoder.encode(i)
            for i in np.linspace(0.0, 1.0, args.resolution)[1:-1]
        ]
        for model_name, stats in model_data.items():
            x = [stats.false_positives(i) for i in thresholds]
            y = [stats.false_negatives(i) for i in thresholds]
            plt.plot(x, y, marker='x', linestyle='-', label=model_name)
            if args.labels:
                for x, y, threshold in zip(x, y, thresholds):
                    plt.annotate('{:.4f}'.format(threshold), (x, y))

        plt.legend()
        plt.xlabel('False Positives')
        plt.ylabel('False Negatives')
        plt.show()