コード例 #1
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)
    save_params(args.model)

    data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
    print('Data:', data)
    (inputs, outputs), test_data = data.load(True, not args.no_validation)

    print('Inputs shape:', inputs.shape)
    print('Outputs shape:', outputs.shape)

    if test_data:
        print('Test inputs shape:', test_data[0].shape)
        print('Test outputs shape:', test_data[1].shape)

    if 0 in inputs.shape or 0 in outputs.shape:
        print('Not enough data to train')
        exit(1)

    model = create_model(args.model, args.no_validation, args.extra_metrics)
    model.summary()

    from keras.callbacks import ModelCheckpoint
    checkpoint = ModelCheckpoint(args.model, monitor=args.metric_monitor,
                                 save_best_only=args.save_best)

    try:
        model.fit(inputs, outputs, 5000, args.epochs, validation_data=test_data,
                  callbacks=[checkpoint])
    except KeyboardInterrupt:
        print()
    finally:
        model.save(args.model)
コード例 #2
0
 def load_for(self, model: str) -> Tuple[list, list]:
     """Injects the model parameters, reloading if they changed, and returning the data"""
     inject_params(model)
     if self.prev_cache != pr.vectorization_md5_hash():
         self.prev_cache = pr.vectorization_md5_hash()
         self.data = self.loader()
     return self.data
コード例 #3
0
def load_precise_model(model_name: str) -> Any:
    """Loads a Keras model from file, handling custom loss function"""
    if not model_name.endswith('.net'):
        print('Warning: Unknown model type, ', model_name)

    inject_params(model_name)
    return load_keras().models.load_model(model_name)
コード例 #4
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train, shuffle=False)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = Stats(predictions, targets, filenames)

    print('Data:', data)

    if not args.no_filenames:
        fp_files = stats.calc_filenames(False, True, args.threshold)
        fn_files = stats.calc_filenames(False, False, args.threshold)
        print('=== False Positives ===')
        print('\n'.join(fp_files))
        print()
        print('=== False Negatives ===')
        print('\n'.join(fn_files))
        print()
    print(stats.counts_str(args.threshold))
    print()
    print(stats.summary_str(args.threshold))
コード例 #5
0
    def __init__(self, parser=None):
        parser = parser or ArgumentParser()
        add_to_parser(parser, self.usage, True)
        args = TrainData.parse_args(parser)
        self.args = args = self.process_args(args) or args

        if args.invert_samples and not args.samples_file:
            parser.error(
                'You must specify --samples-file when using --invert-samples')
        if args.samples_file and not isfile(args.samples_file):
            parser.error('No such file: ' +
                         (args.invert_samples or args.samples_file))
        if not 0.0 <= args.sensitivity <= 1.0:
            parser.error('sensitivity must be between 0.0 and 1.0')

        output_folder = os.path.join(args.folder, splitext(args.model)[0])
        if not os.path.exists(output_folder):
            print('Creating output folder:', output_folder)
            os.makedirs(output_folder)

        args.model = os.path.join(output_folder, args.model)

        inject_params(args.model)
        save_params(args.model)
        self.train, self.test = self.load_data(self.args)

        set_loss_bias(1.0 - args.sensitivity)
        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics)
        self.model = create_model(args.model, params)
        self.model.summary()

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_file = splitext(args.model)[0]
        epoch_file = os.path.join(epoch_file + '.epoch')
        epoch_fiti = Fitipy(epoch_file)
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(a, b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        if args.samples_file:
            self.samples, self.hash_to_ind = self.load_sample_data(
                args.samples_file, self.train)
        else:
            self.samples = set()
            self.hash_to_ind = {}

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]
コード例 #6
0
ファイル: eval.py プロジェクト: jarkovic/veracrypt-voice
def main():
    parser = create_parser(usage)
    parser.add_argument(
        'models',
        nargs='*',
        help='List of model filenames in format: wake-word.yy-mm-dd.net')
    args = TrainData.parse_args(parser)
    if not (bool(args.pocketsphinx_dict) == bool(args.pocketsphinx_folder) ==
            bool(args.pocketsphinx_wake_word)):
        parser.error('Must pass all or no Pocketsphinx arguments')

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    data_files = data.train_files if args.use_train else data.test_files
    print('Data:', data)

    metrics = {}

    if args.pocketsphinx_dict and args.pocketsphinx_folder and args.pocketsphinx_wake_word:
        if not isfile(args.pocketsphinx_dict):
            parser.error('No such file: ' + args.pocketsphinx_dict)
        if not isdir(args.pocketsphinx_folder):
            parser.error('No such folder: ' + args.pocketsphinx_folder)
        listener = PocketsphinxListener(args.pocketsphinx_wake_word,
                                        args.pocketsphinx_dict,
                                        args.pocketsphinx_folder,
                                        args.pocketsphinx_threshold)
        stats = test_pocketsphinx(listener, data_files)
        metrics[args.pocketsphinx_dict] = stats_to_dict(stats)

    for model_name in args.models:
        print('Calculating', model_name + '...')
        inject_params(model_name)

        train, test = data.load(args.use_train, not args.use_train)
        inputs, targets = train if args.use_train else test
        predictions = Listener.find_runner(model_name)(model_name).predict(
            inputs)

        stats = Stats(predictions, targets, sum(data_files, []))

        print('----', model_name, '----')
        print(stats.counts_str())
        print()
        print(stats.summary_str())
        print()
        metrics[model_name] = stats.to_dict(args.threshold)

    print('Writing to:', args.output)
    with open(args.output, 'w') as f:
        json.dump(metrics, f)
コード例 #7
0
ファイル: test.py プロジェクト: vlinhd11/mycroft-precise
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.db_file, args.db_folder, args.data_dir)
    train, test = data.load(args.use_train, not args.use_train)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = load_precise_model(args.model).predict(inputs)
    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
コード例 #8
0
def main():
    args = create_parser(usage).parse_args()
    import numpy as np

    model_data = {
        name: Stats.from_np_dict(data)
        for name, data in np.load(args.input_file)['data'].item().items()
    }
    model_name = args.model_key or basename(splitext(args.model)[0])

    if model_name not in model_data:
        print("Could not find model '{}' in saved models in stats file: {}".
              format(model_name, list(model_data)))
        raise SystemExit(1)

    stats = model_data[model_name]

    save_spots = (stats.outputs != 0) & (stats.outputs != 1)
    if save_spots.sum() == 0:
        print('No data (or all NaN)')
        return

    stats.outputs = stats.outputs[save_spots]
    stats.targets = stats.targets[save_spots]
    inv = -np.log(1 / stats.outputs - 1)

    pos = np.extract(stats.targets > 0.5, inv)
    pos_mu = pos.mean().item()
    pos_std = sqrt(np.mean((pos - pos_mu)**2)) * args.smoothing

    print('Peak: {:.2f} mu, {:.2f} std'.format(pos_mu, pos_std))
    pr = inject_params(args.model)
    pr.__dict__.update(threshold_config=((pos_mu, pos_std), ))
    save_params(args.model)
    print('Saved params to {}.params'.format(args.model))
コード例 #9
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    train, test = data.load(args.use_train, not args.use_train, shuffle=False)
    inputs, targets = train if args.use_train else test

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])
    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)
    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
コード例 #10
0
 def __init__(self, model_name: str, chunk_size: int = -1, runner_cls: type = None):
     self.window_audio = np.array([])
     self.pr = inject_params(model_name)
     self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc))
     self.chunk_size = chunk_size
     runner_cls = runner_cls or self.find_runner(model_name)
     self.runner = runner_cls(model_name)
     self.threshold_decoder = ThresholdDecoder(self.pr.threshold_config, pr.threshold_center)
コード例 #11
0
ファイル: train.py プロジェクト: poveteen/CS-460-Project
    def __init__(self, args):
        super().__init__(args)

        if args.invert_samples and not args.samples_file:
            raise ValueError(
                'You must specify --samples-file when using --invert-samples')
        if args.samples_file and not isfile(args.samples_file):
            raise ValueError('No such file: ' +
                             (args.invert_samples or args.samples_file))
        if not 0.0 <= args.sensitivity <= 1.0:
            raise ValueError('sensitivity must be between 0.0 and 1.0')

        inject_params(args.model)
        save_params(args.model)
        params = ModelParams(skip_acc=args.no_validation,
                             extra_metrics=args.extra_metrics,
                             loss_bias=1.0 - args.sensitivity,
                             freeze_till=args.freeze_till)
        self.model = create_model(args.model, params)
        self.train, self.test = self.load_data(self.args)

        from keras.callbacks import ModelCheckpoint, TensorBoard
        checkpoint = ModelCheckpoint(args.model,
                                     monitor=args.metric_monitor,
                                     save_best_only=args.save_best)
        epoch_fiti = Fitipy(splitext(args.model)[0] + '.epoch')
        self.epoch = epoch_fiti.read().read(0, int)

        def on_epoch_end(_a, _b):
            self.epoch += 1
            epoch_fiti.write().write(self.epoch, str)

        self.model_base = splitext(self.args.model)[0]

        if args.samples_file:
            self.samples, self.hash_to_ind = self.load_sample_data(
                args.samples_file, self.train)
        else:
            self.samples = set()
            self.hash_to_ind = {}

        self.callbacks = [
            checkpoint,
            TensorBoard(log_dir=self.model_base + '.logs', ),
            LambdaCallback(on_epoch_end=on_epoch_end)
        ]
コード例 #12
0
ファイル: eval.py プロジェクト: poveteen/CS-460-Project
    def run(self):
        args = self.args
        data = TrainData.from_both(args.tags_file, args.tags_folder,
                                   args.folder)
        data_files = data.train_files if args.use_train else data.test_files
        print('Data:', data)

        metrics = {}

        if self.is_pocketsphinx:
            script = PocketsphinxTestScript.create(
                key_phrase=args.pocketsphinx_wake_word,
                dict_file=args.pocketsphinx_dict,
                hmm_folder=args.pocketsphinx_folder,
                threshold=args.pocketsphinx_threshold)
            ww_files, nww_files = data_files
            script.run_test(ww_files, 'Wake Word', 1.0)
            script.run_test(nww_files, 'Not Wake Word', 0.0)
            stats = script.get_stats()
            metrics[args.pocketsphinx_dict] = stats.to_dict(args.threshold)

        for model_name in args.models:
            print('Calculating', model_name + '...')
            inject_params(model_name)

            train, test = data.load(args.use_train, not args.use_train)
            inputs, targets = train if args.use_train else test
            predictions = Listener.find_runner(model_name)(model_name).predict(
                inputs)

            stats = Stats(predictions, targets, sum(data_files, []))

            print('----', model_name, '----')
            print(stats.counts_str())
            print()
            print(stats.summary_str())
            print()
            metrics[model_name] = stats.to_dict(args.threshold)

        print('Writing to:', args.output)
        with open(args.output, 'w') as f:
            json.dump(metrics, f)
コード例 #13
0
 def __init__(self,
              model_name: str,
              chunk_size: int = -1,
              runner_cls: type = None):
     self.window_audio = np.array([])
     self.pr = inject_params(model_name)
     self.features = np.zeros((self.pr.n_features, self.pr.feature_size))
     self.chunk_size = chunk_size
     runner_cls = runner_cls or self.find_runner(model_name)
     self.runner = runner_cls(model_name)
     self.mfcc = import_module('speechpy.feature').mfcc
コード例 #14
0
    def __init__(self,
                 model_name: str,
                 chunk_size: int = -1,
                 runner_cls: type = None):
        print('model_name: ', model_name)
        print('chunk_size: ', chunk_size)

        self.window_audio = np.array([])
        self.pr = inject_params(model_name)
        self.mfccs = np.zeros((self.pr.n_features, self.pr.n_mfcc))
        self.chunk_size = chunk_size
        runner_cls = runner_cls or self.find_runner(model_name)
        self.runner = runner_cls(model_name)
コード例 #15
0
def main():
    args = TrainData.parse_args(create_parser(usage))

    inject_params(args.model)

    data = TrainData.from_both(args.tags_file, args.tags_folder, args.folder)
    #print(data)
    #print(args.use_train)

    train, test = data.load(args.use_train, not args.use_train)
    #print(train)
    #print(test)
    inputs, targets = train if args.use_train else test
    #print(inputs)

    filenames = sum(data.train_files if args.use_train else data.test_files,
                    [])

    #for i in range(0, len(targets)):
    #    print(filenames[i], "\t", targets[i])

    print(args.model)

    predictions = Listener.find_runner(args.model)(args.model).predict(inputs)

    #for i, filename in enumerate(filenames):
    #print(filename, "\t", predictions[i], targets[i])
    #print(predictions[i])

    #print (predictions)
    #print (len(predictions))

    stats = calc_stats(filenames, targets, predictions)

    print('Data:', data)
    show_stats(stats, not args.no_filenames)
コード例 #16
0
    def __init__(self, args):
        self.args = args
        self.trained_fns = load_trained_fns(args.model)
        pr = inject_params(args.model)
        self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)

        from keras.callbacks import ModelCheckpoint
        self.checkpoint = ModelCheckpoint(args.model,
                                          monitor=args.metric_monitor,
                                          save_best_only=args.save_best)
        data = TrainData.from_tags(args.tags_file, args.tags_folder)
        self.tags_data = data.load(True, not args.no_validation)

        if not isfile(args.model):
            create_model(args.model, args.no_validation,
                         args.extra_metrics).save(args.model)
        self.listener = Listener(args.model,
                                 args.chunk_size,
                                 runner_cls=KerasRunner)
コード例 #17
0
 def __init__(self):
     self.args = create_parser(usage).parse_args()
     inject_params(self.args.model)
     self.runner = Listener.find_runner(self.args.model)(self.args.model)
     self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)
コード例 #18
0
 def __init__(self, args):
     super().__init__(args)
     inject_params(self.args.model)
     self.runner = Listener.find_runner(self.args.model)(self.args.model)
     self.audio_buffer = np.zeros(pr.buffer_samples, dtype=float)