def run(cls, args):
        # overload the default of use-energy to be false instead of
        # true (if not already specified in options) TODO unify that
        # with the overloaded_options above
        if all('use-energy' not in c[0] for c in cls.parsed_options):
            cls.parsed_options.append(('use-energy', 'false'))

        corpus_dir, output_dir = cls._parse_io_dirs(args, 'features')
        log = utils.logger.get_log(
            os.path.join(output_dir, 'features.log'), verbose=args.verbose)
        corpus = Corpus.load(corpus_dir, validate=args.validate, log=log)

        recipe = features.Features(corpus, output_dir, log=log)
        recipe.type = cls.feat_name
        recipe.use_pitch = utils.str2bool(args.pitch)  # 'true' to True
        recipe.use_cmvn = utils.str2bool(args.cmvn)
        recipe.delta_order = args.delta_order
        recipe.features_options = cls.parsed_options
        recipe.njobs = args.njobs
        recipe.delete_recipe = False if args.recipe else True
        recipe.compute()

        # export to h5features if asked for
        if args.h5f:
            recipe.log.info('exporting Kaldi ark features to h5features...')
            kaldi.scp_to_h5f(
                os.path.join(recipe.output_dir, 'feats.scp'),
                os.path.join(recipe.output_dir, 'feats.h5f'))
def test_features(pitch, ftype, corpus, tmpdir):
    output_dir = str(tmpdir.mkdir('feats'))
    flog = os.path.join(output_dir, 'feats.log')
    log = utils.logger.get_log(flog)

    # keep only 3 utterances for testing speed
    subcorpus = corpus.subcorpus(list(corpus.utts())[0:3])
    assert len(list(subcorpus.utts())) == 3

    # mfcc with few channels
    nbc = 3
    feat = features.Features(subcorpus, output_dir, log=log)
    feat.type = ftype
    feat.njobs = 1
    feat.use_pitch = pitch
    feat.delete_recipe = False
    feat.features_options.append(
        ('num-ceps' if ftype in ('mfcc', 'plp') else 'num-mel-bins', nbc))

    try:
        feat.compute()
    except RuntimeError as err:
        import sys
        sys.stdout.write(open(flog, 'r').read())
        sys.stdout.write(
            open(
                os.path.join(
                    output_dir, 'recipe',
                    'exp/make_mfcc/features/make_mfcc_pitch_features.1.log'),
                'r').read())
        sys.stdout.write(
            open(os.path.join(output_dir, 'recipe/conf/mfcc.conf'),
                 'r').read())
        raise err

    # # actually ERROR is in the vocabulary so this test fails...
    # assert_no_expr_in_log(flog, 'error')

    # basic asserts on files
    assert os.path.isfile(os.path.join(output_dir, 'meta.txt'))
    features.Features.check_features(output_dir)

    # convert to h5features and read it back
    h5 = os.path.join(output_dir, 'feats.h5')
    ark.scp_to_h5f(os.path.join(output_dir, 'feats.scp'), h5)
    data = h5features.Reader(h5, 'features').read()

    # check we have nbc or nbc+3 channels
    dim = data.features()[0].shape[1]
    exp = nbc + 3 if pitch else nbc
    assert dim == exp, 'bad dim: {}, expected {}'.format(dim, exp)

    # check utt_ids in h5f are consistent with corpus
    times = data.dict_labels()
    assert len(times.keys()) == len(subcorpus.utts())
    for t, c in zip(times.keys(), subcorpus.utts()):
        assert t == c
def test_monophone_cmvn_bad(corpus, tmpdir, lang_args):
    features_dir = str(tmpdir.mkdir('feats'))
    feat = features.Features(corpus, features_dir)
    feat.use_pitch = False
    feat.use_cmvn = False
    feat.delta_order = 0
    feat.compute()

    output_dir = str(tmpdir.mkdir('am_mono'))
    am = acoustic.Monophone(corpus, features_dir, output_dir, lang_args)
    with pytest.raises(IOError) as err:
        am.check_parameters()
    assert 'cmvn' in str(err.value)