예제 #1
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)
    save_params(args.model)

    data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
    print('Data:', data)
    (inputs, outputs), test_data = data.load(True, not args.no_validation)

    print('Inputs shape:', inputs.shape)
    print('Outputs shape:', outputs.shape)

    if test_data:
        print('Test inputs shape:', test_data[0].shape)
        print('Test outputs shape:', test_data[1].shape)

    if 0 in inputs.shape or 0 in outputs.shape:
        print('Not enough data to train')
        exit(1)

    model = create_model(args.model, args.no_validation, args.extra_metrics)
    model.summary()

    from keras.callbacks import ModelCheckpoint
    checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor,
                                 save_best_only=args.save_best)

    try:
        model.fit(inputs, outputs, 5000, args.epochs, validation_data=test_data,
                  callbacks=[checkpoint])
    except KeyboardInterrupt:
        print()
    finally:
        model.save(args.model)
예제 #2
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train, shuffle=False)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = Stats(predictions, targets, filenames)

    print('Data:', data)

    if not args.no_filenames:
        fp_files = stats.calc_filenames(False, True, args.threshold)
        fn_files = stats.calc_filenames(False, False, args.threshold)
        print('=== False Positives ===')
        print('\n'.join(fp_files))
        print()
        print('=== False Negatives ===')
        print('\n'.join(fn_files))
        print()
    print(stats.counts_str(args.threshold))
    print()
    print(stats.summary_str(args.threshold))
예제 #3
0
    def __init__(self, parser=None):
        parser = parser or ArgumentParser()
        add_to_parser(parser, self.usage, True)
        args = TrainData.parse_args(parser)
        self.args = args = self.process_args(args) or args

        if args.invert_samples and not args.samples_file:
            parser.error(
                'You must specify --samples-file when using --invert-samples')
        if args.samples_file and not isfile(args.samples_file):
            parser.error('No such file: ' +
                         (args.invert_samples or args.samples_file))
        if not 0.0 <= args.sensitivity <= 1.0:
            parser.error('sensitivity must be between 0.0 and 1.0')

        output_folder = os.path.join(args.folder, splitext(args.model)[0])
        if not os.path.exists(output_folder):
            print('Creating output folder:', output_folder)
            os.makedirs(output_folder)

        args.model = os.path.join(output_folder, args.model)

        inject_params(args.model)
        save_params(args.model)
        self.train, self.test = self.load_data(self.args)

        set_loss_bias(1.0 - args.sensitivity)
        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics)
        self.model = create_model(args.model, params)
        self.model.summary()

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_file = splitext(args.model)[0]
        epoch_file = os.path.join(epoch_file + '.epoch')
        epoch_fiti = Fitipy(epoch_file)
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(a, b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        if args.samples_file:
            self.samples, self.hash_to_ind = self.load_sample_data(
                args.samples_file, self.train)
        else:
            self.samples = set()
            self.hash_to_ind = {}

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]
예제 #4
0
def main():
    args = TrainData.parse_args(create_parser(usage))
    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    data_files = data.train_files if args.use_train else data.test_files
    listener = PocketsphinxListener(args.key_phrase, args.dict_file,
                                    args.hmm_folder, args.threshold)

    print('Data:', data)
    stats = test_pocketsphinx(listener, data_files)
    show_stats(stats, not args.no_filenames)
def main():
    args = TrainData.parse_args(create_parser(usage))

    for i in (join(args.folder, 'not-wake-word', 'generated'),
              join(args.folder, 'test', 'not-wake-word', 'generated')):
        makedirs(i, exist_ok=True)

    trainer = IncrementalTrainer(args)
    try:
        trainer.train_incremental()
    except KeyboardInterrupt:
        print()
예제 #6
0
def main():
    parser = create_parser(usage)
    parser.add_argument(
        'models',
        nargs='*',
        help='List of model filenames in format: wake-word.yy-mm-dd.net')
    args = TrainData.parse_args(parser)
    if not (bool(args.pocketsphinx_dict) == bool(args.pocketsphinx_folder) ==
            bool(args.pocketsphinx_wake_word)):
        parser.error('Must pass all or no Pocketsphinx arguments')

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    data_files = data.train_files if args.use_train else data.test_files
    print('Data:', data)

    metrics = {}

    if args.pocketsphinx_dict and args.pocketsphinx_folder and args.pocketsphinx_wake_word:
        if not isfile(args.pocketsphinx_dict):
            parser.error('No such file: ' + args.pocketsphinx_dict)
        if not isdir(args.pocketsphinx_folder):
            parser.error('No such folder: ' + args.pocketsphinx_folder)
        listener = PocketsphinxListener(args.pocketsphinx_wake_word,
                                        args.pocketsphinx_dict,
                                        args.pocketsphinx_folder,
                                        args.pocketsphinx_threshold)
        stats = test_pocketsphinx(listener, data_files)
        metrics[args.pocketsphinx_dict] = stats_to_dict(stats)

    for model_name in args.models:
        print('Calculating', model_name + '...')
        inject_params(model_name)

        train, test = data.load(args.use_train, not args.use_train)
        inputs, targets = train if args.use_train else test
        predictions = Listener.find_runner(model_name)(model_name).predict(
            inputs)

        stats = Stats(predictions, targets, sum(data_files, []))

        print('----', model_name, '----')
        print(stats.counts_str())
        print()
        print(stats.summary_str())
        print()
        metrics[model_name] = stats.to_dict(args.threshold)

    print('Writing to:', args.output)
    with open(args.output, 'w') as f:
        json.dump(metrics, f)
예제 #7
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
    train, test = data.load(args.use_train, not args.use_train)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = load_precise_model(args.model).predict(inputs)
    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
예제 #8
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train, shuffle=False)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
예제 #9
0
    def __init__(self):
        parser = create_parser(usage)
        self.args = args = TrainData.parse_args(parser)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
        self.vals_buffer = np.zeros(pr.buffer_samples, dtype=float)

        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity)
        self.model = create_model(args.model, params)
        self.listener = Listener('',
                                 args.chunk_size,
                                 runner_cls=lambda x: None)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(a, b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]

        self.data = TrainData.from_both(args.tags_file, args.tags_folder,
                                        args.folder)
        pos_files, neg_files = self.data.train_files
        self.neg_files_it = iter(cycle(neg_files))
        self.pos_files_it = iter(cycle(pos_files))
예제 #10
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    #print(data)
    #print(args.use_train)

    train, test = data.load(args.use_train, not args.use_train)
    #print(train)
    #print(test)
    inputs, targets = train if args.use_train else test
    #print(inputs)

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])

    #for i in range(0, len(targets)):
    #    print(filenames[i], "\t", targets[i])

    print(args.model)

    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)

    #for i, filename in enumerate(filenames):
    #print(filename, "\t", predictions[i], targets[i])
    #print(predictions[i])

    #print (predictions)
    #print (len(predictions))

    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
예제 #11
0
def main():
    parser = create_parser(usage)
    parser.add_argument(
        'models',
        nargs='*',
        help='Either Keras (.net) or TensorFlow (.pb) models to test')
    args = TrainData.parse_args(parser)
    if not args.models and not args.input_file and args.folder:
        args.input_file = args.folder
    if bool(args.models) == bool(args.input_file):
        parser.error('Please specify either a list of models or an input file')

    if not args.output_file:
        load_plt()  # Error early if matplotlib not installed
    import numpy as np

    if args.models:
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        print('Data:', data)
        filenames = sum(
            data.train_files if args.use_train else data.test_files, [])
        loader = CachedDataLoader(
            partial(data.load,
                    args.use_train,
                    not args.use_train,
                    shuffle=False))
        model_data = calc_stats(args.models, loader, args.use_train, filenames)
    else:
        model_data = {
            name: Stats.from_np_dict(data)
            for name, data in np.load(args.input_file)['data'].item().items()
        }
        for name, stats in model_data.items():
            print('=== {} ===\n{}\n\n{}\n'.format(name, stats.counts_str(),
                                                  stats.summary_str()))

    if args.output_file:
        np.savez(args.output_file,
                 data={
                     name: stats.to_np_dict()
                     for name, stats in model_data.items()
                 })
    else:
        plt = load_plt()
        decoder = ThresholdDecoder(pr.threshold_config, pr.threshold_center)
        thresholds = [
            decoder.encode(i)
            for i in np.linspace(0.0, 1.0, args.resolution)[1:-1]
        ]
        for model_name, stats in model_data.items():
            x = [stats.false_positives(i) for i in thresholds]
            y = [stats.false_negatives(i) for i in thresholds]
            plt.plot(x, y, marker='x', linestyle='-', label=model_name)
            if args.labels:
                for x, y, threshold in zip(x, y, thresholds):
                    plt.annotate('{:.4f}'.format(threshold), (x, y))

        plt.legend()
        plt.xlabel('False Positives')
        plt.ylabel('False Negatives')
        plt.show()