Exemple #1
0
    def _iter(self):

        for name, path in self.dataset_dir.items():

            if name == 'lapsbm':
                continue

            try:
                dataset_cls = get_from_module('datasets*', name, regex=True)
                dataset = dataset_cls(dataset_dir=path)

                for d in dataset._iter():
                    yield {
                        'duration': d['duration'],
                        'input': d['input'],
                        'label': d['label'],
                        'speaker': '%s_%s' % (str(dataset), d['speaker']),
                        'dataset': 'train'
                    }
            except ValueError as e:
                self._logger.warning('Skipping dataset %s: %s' % (name, e))

        # Test and valid set
        lapsbm = LapsBM(dataset_dir=self.dataset_dir['lapsbm'], split=True)
        for d in lapsbm._iter():
            yield {
                'duration': d['duration'],
                'input': d['input'],
                'label': d['label'],
                'speaker': '%s_%s' % (str(dataset), d['speaker']),
                'dataset': d['dataset']
            }
Exemple #2
0
    def _iter(self):

        for name, path in self.dataset_dir.items():

            if name == 'cv_corpus_v1':
                continue

            try:
                dataset_cls = get_from_module('datasets*', name, regex=True)
                dataset = dataset_cls(dataset_dir=path)
                print(dataset)
                for d in dataset._iter():
                    yield {
                        'duration': d['duration'],
                        'input': d['input'],
                        'label': d['label'],
                        'speaker': '%s_%s' % (str(dataset), d['speaker']),
                        'dataset': 'train'
                    }
            except ValueError as e:
                self._logger.warning('Skipping dataset %s: %s' % (name, e))

        # Test and valid set
        cvc = CVC(dataset_dir=self.dataset_dir['cv_corpus_v1'])
        for d in cvc._iter():
            yield {
                'duration': d['duration'],
                'input': d['input'],
                'label': d['label'],
                'audio_file': d['audio_file'],
                'dataset': d['dataset']
            }
Exemple #3
0
    if args.dataset and args.file:
        print('Both dataset and file args was set. Ignoring file args.')

    # GPU configuration
    setup_gpu(args.gpu, args.allow_growth)

    # Loading model
    model, meta = load_model(args.model, return_meta=True,
                             mode='predict', decoder=(not args.no_decoder))

    args = HParams(**meta['training_args']).update(vars(args_nondefault))

    # Features extractor
    input_parser = utils.get_from_module('preprocessing.audio',
                                         args.input_parser,
                                         params=args.input_parser_params)

    # Recovering text parser
    label_parser = utils.get_from_module('preprocessing.text',
                                         args.label_parser,
                                         params=args.label_parser_params)

    if args.dataset is not None:
        data_gen = DatasetGenerator(input_parser, label_parser,
                                    batch_size=1, seed=0, mode='predict',
                                    shuffle=False)
        test_flow = data_gen.flow_from_fname(args.dataset,
                                             datasets=args.subset)
    else:
        test_flow = DatasetIterator(np.array([args.file]), None,
Exemple #4
0
def get(identifier):
    return get_from_module(identifier, globals(), 'objective')
def get(identifier):
    return get_from_module(identifier, globals(), 'initialization')
Exemple #6
0
def get(identifier):
    return get_from_module(identifier, globals(), 'optimizer', instantiate=True)
Exemple #7
0
def get(identifier):
    return get_from_module(identifier, globals(), 'activation function')
Exemple #8
0
    parser.add_argument('--parser', type=str, required=True)
    parser.add_argument('--parser_params', nargs='+', default=[])

    parser.add_argument('--output_file', type=str, default=None)

    parser.add_argument('--input_parser', type=str, default=None)
    parser.add_argument('--input_parser_params', nargs='+', default=[])

    parser.add_argument('--label_parser', type=str, default=None)
    parser.add_argument('--label_parser_params', nargs='+', default=[])

    parser.add_argument('--override', action='store_true')

    args = parser.parse_args()

    parser = utils.get_from_module('datasets*', args.parser, regex=True)
    print('input args ', args.input_parser_params)
    input_parser = utils.get_from_module('preprocessing.audio',
                                         args.input_parser,
                                         params=args.input_parser_params)
    label_parser = utils.get_from_module('preprocessing.text',
                                         args.label_parser,
                                         params=args.label_parser_params)

    dataset = parser(args.dataset_dir,
                     **HParams().parse(args.parser_params).values())

    output_file = dataset.to_h5(fname=args.output_file,
                                input_parser=input_parser,
                                label_parser=label_parser,
                                override=args.override)
Exemple #9
0
def get(identifier):
    return get_from_module(identifier, globals(), 'activation function')
Exemple #10
0
        model, meta = load_model(args.load, return_meta=True)

        logger.info('Loading parameters...')
        args = HParams(**meta['training_args']).update(vars(args_nondefault))

        epoch_offset = len(meta['epochs'])
        logger.info('Current epoch: %d' % epoch_offset)

        if args_nondefault.lr:
            logger.info('Setting current learning rate to %f...' % args.lr)
            K.set_value(model.optimizer.lr, args.lr)

    else:
        logger.info('Creating model...')
        # Recovering all valid models
        model_fn = utils.get_from_module('core.models', args.model)
        # Loading model
        model = model_fn(**(HParams().parse(args.model_params).values()))

        logger.info('Setting the optimizer...')
        # Optimization
        if args.opt.strip().lower() == 'sgd':
            opt = SGD(lr=args.lr,
                      momentum=args.momentum,
                      clipnorm=args.clipnorm)
        elif args.opt.strip().lower() == 'adam':
            opt = Adam(lr=args.lr, clipnorm=args.clipnorm)

        # Compile with dummy loss
        model.compile(loss={
            'ctc': ctc_dummy_loss,
Exemple #11
0
        for audio_fname in args.source:
            with sr.AudioFile(audio_fname) as source:
                audios.append((r.record(source), audio_fname))
                # read the entire audio file

    if args.model is not None:
        setup_gpu(args.gpu, args.allow_growth)

        model, meta = utils.load_model(args.model,
                                       return_meta=True,
                                       mode='predict')
        training_args = meta['training_args']

        # Features extractor
        input_parser = utils.get_from_module('preprocessing.audio',
                                             training_args['feats'],
                                             params=training_args['feats_params'])

        # Recovering text parser
        label_parser = utils.get_from_module('preprocessing.text',
                                             training_args['label_parser'],
                                             params=training_args['label_parser_params']
                                            )

        data_it = DatasetIterator(np.array([f for a, f in audios]),
                                  input_parser=input_parser,
                                  label_parser=label_parser)

        model_predictions = model.predict_generator(
            data_it, val_samples=len(audios))
Exemple #12
0
def get(identifier):
    return get_from_module(identifier,
                           globals(),
                           'optimizer',
                           instantiate=True)
Exemple #13
0
def get(identifier):
    return get_from_module(identifier, globals(), 'initialization')
def get(identifier, kwargs=None):
    return get_from_module(identifier, globals(), 'optimizer', instantiate=True, kwargs=kwargs)
            
Exemple #15
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description=
        'Generates a preprocessed dataset (hdf5 file) by providing the path to the dataset and the correct parser.'
    )

    parser.add_argument('--dataset_dir', type=str, default=None)
    parser.add_argument('--parser', type=str, required=True)
    parser.add_argument('--parser_params', nargs='+', default=[])

    parser.add_argument('--output_file', type=str, default=None)

    parser.add_argument('--override', action='store_true')

    args = parser.parse_args()

    parser = utils.get_from_module('datasets*', args.parser, regex=True)

    input_parser = preprocessing.MFCC()
    label_parser = preprocessing.SimpleCharParser()

    dataset = parser(args.dataset_dir,
                     **HParams().parse(args.parser_params).values())

    output_file = dataset.to_h5(fname=args.output_file,
                                input_parser=input_parser,
                                label_parser=label_parser,
                                override=args.override)

    print('Dataset %s saved at %s' % (parser.name, output_file))