예제 #1
0
def test_extract_features(utterances_index, features):
    config = pipeline.get_default_config(features,
                                         with_cmvn=False,
                                         with_pitch=False)
    feats = pipeline.extract_features(config, utterances_index)
    feat1 = feats[utterances_index[0][0]]
    assert feat1.is_valid()
    assert feat1.shape[0] == 140
    assert feat1.dtype == np.float32

    config = pipeline.get_default_config(features,
                                         with_cmvn=False,
                                         with_pitch=True)
    feats = pipeline.extract_features(config, utterances_index)
    feat2 = feats[utterances_index[0][0]]
    assert feat2.is_valid()
    assert feat2.shape[0] == 140
    assert feat2.shape[1] == feat1.shape[1] + 3

    utterances_index = [('u1', utterances_index[0][1], 0, 1)]
    config = pipeline.get_default_config(features,
                                         with_cmvn=False,
                                         with_pitch=False)
    feats = pipeline.extract_features(config, utterances_index)
    feat3 = feats[utterances_index[0][0]]
    assert feat3.is_valid()
    assert feat3.shape[0] == 98
    assert feat3.shape[1] == feat1.shape[1]
예제 #2
0
def test_cmvn(utterances_index, by_speaker, with_vad):
    config = pipeline.get_default_config(
        'mfcc', with_cmvn=True, with_pitch=False, with_delta=False)
    config['cmvn']['by_speaker'] = by_speaker
    config['cmvn']['with_vad'] = with_vad
    feats = pipeline.extract_features(config, utterances_index)
    feat2 = feats[utterances_index[0][0]]
    assert feat2.is_valid()
    assert feat2.shape[0] == 140
    assert feat2.shape[1] == 13
예제 #3
0
def command_extract(args):
    # setup the logger (level given by -q/-v arguments)
    if args.quiet:
        log = utils.null_logger()
    else:
        if args.verbose == 0:
            level = 'warning'
        elif args.verbose == 1:
            level = 'info'
        else:  # verbose >= 2
            level = 'debug'
        log = utils.get_logger(name='speech-features', level=level)
    # forward the initialized log to shennong
    utils._logger = log

    # make sure the output file is not already existing and have a
    # valid extension
    output_file = args.output_file
    if os.path.exists(output_file):
        log.error('output file already exist: %s', output_file)
        return
    output_ext = os.path.splitext(output_file)[1]
    if output_ext not in supported_extensions().keys():
        log.error(
            'output file has an unsupported extension "%s", must be in %s',
            output_ext, ", ".join(supported_extensions().keys()))
        return

    # make sure the input config and wavs_index exists
    for filename in (args.config, args.utts_index):
        if not os.path.exists(filename):
            log.error('input file not found: %s', filename)

    # read the utterances file as a list of lists, ignore empty lines
    # in the file
    utterances = [
        utt.split(' ') for utt in
        (utt.strip() for utt in open(args.utts_index, 'r'))
        if utt]

    # run the pipeline
    features = pipeline.extract_features(
        args.config, utterances, njobs=args.njobs, log=log)

    # save the features
    log.info('saving the features to %s', output_file)
    features.save(output_file)
예제 #4
0
def main():
    # parse input arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('data_directory', help='input/output data directory')
    parser.add_argument('config_file', help='YAML configuration file')
    parser.add_argument(
        'corpus', choices=['english', 'xitsonga'], help='corpus to process')
    parser.add_argument(
        '-j', '--njobs', type=int, default=4, metavar='<int>',
        help='number of parallel jobs (default to %(default)s)')
    parser.add_argument(
        '-v', '--verbose', action='store_true', help='increase log level')
    args = parser.parse_args()

    # check and setup arguments
    data_directory = args.data_directory
    if not os.path.isdir(data_directory):
        raise ValueError(f'directory not found: {data_directory}')
    config = args.config_file
    if not os.path.isfile(config):
        raise ValueError(f'file not found: {config}')
    try:
        os.makedirs(os.path.join(data_directory, 'features'))
    except FileExistsError:
        pass
    log = get_logger(level='debug' if args.verbose else 'info')

    # load input utterances
    utterances = [line.split(' ') for line in open(os.path.join(
        data_directory, f'{args.corpus}.utts'), 'r')]

    # extract the features
    features = pipeline.extract_features(
        config, utterances, njobs=args.njobs, log=log)

    # save them
    h5f_file = os.path.join(
        data_directory, 'features', f'{args.corpus}_{os.path.basename(config)}'
        .replace('.yaml', '.h5f'))
    features.save(h5f_file)
예제 #5
0
def test_check_speakers(utterances_index, capsys):
    log = utils.get_logger(level='info')

    config = pipeline.get_default_config('mfcc')
    with pytest.raises(ValueError) as err:
        pipeline.extract_features(config, [(utterances_index[0][1], )],
                                  log=log)
    assert 'no speaker information provided' in str(err)

    capsys.readouterr()  # clean the buffer
    config = pipeline.get_default_config('mfcc', with_cmvn=False)
    pipeline.extract_features(config, utterances_index, log=log)
    log_out = capsys.readouterr()
    assert 'cmvn' not in log_out.err
    assert '(CMVN disabled)' in log_out.err

    config = pipeline.get_default_config('mfcc', with_cmvn=True)
    config['cmvn']['by_speaker'] = False
    pipeline.extract_features(config, utterances_index, log=log)
    log_out = capsys.readouterr().err
    assert 'cmvn by utterance' in log_out
    assert '(CMVN by speaker disabled)' in log_out
예제 #6
0
def test_config_bad(utterances_index):
    with pytest.raises(ValueError) as err:
        pipeline.get_default_config('bad')
    assert 'invalid features "bad"' in str(err)

    config = pipeline.get_default_config('mfcc')
    del config['mfcc']
    with pytest.raises(ValueError) as err:
        pipeline.extract_features(config, utterances_index)
    assert 'the configuration does not define any features' in str(err)

    config = pipeline.get_default_config('mfcc')
    config['plp'] = config['mfcc']
    with pytest.raises(ValueError) as err:
        pipeline.extract_features(config, utterances_index)
    assert 'more than one features extraction processor' in str(err)

    config = pipeline.get_default_config('mfcc')
    config['invalid'] = config['mfcc']
    with pytest.raises(ValueError) as err:
        pipeline.extract_features(config, utterances_index)
    assert 'invalid keys in configuration' in str(err)

    config = pipeline.get_default_config('mfcc')
    del config['cmvn']['with_vad']
    parsed = pipeline._init_config(config)
    assert 'cmvn' in parsed
    assert parsed['cmvn']['with_vad']

    config = pipeline.get_default_config('mfcc')
    del config['cmvn']['by_speaker']
    c = pipeline._init_config(config)
    assert not c['cmvn']['by_speaker']

    config = pipeline.get_default_config('mfcc')
    del config['pitch']['postprocessing']
    c = pipeline._init_config(config)
    assert c['pitch']['postprocessing'] == {}
예제 #7
0
        c for c in pairs.columns if len(c) > 2 and c[-2:] == "_1"
    ]]
    items_1.columns = [c[:-2] for c in items_1.columns]
    items_2 = pairs[[
        c for c in pairs.columns if len(c) > 2 and c[-2:] == "_2"
    ]]
    items_2.columns = [c[:-2] for c in items_2.columns]
    if set(items_1.columns) != set(items_2.columns):
        eprint("""Issue with pair file (<F>):
columns don't match""".replace("<F>", str(args.pair_file)))
        sys.exit(1)
    if not set(['file', 'onset', 'offset', 'speaker']).issubset(
            items_1.columns):
        eprint("""Issue with pair file (<F>): missing 'file', 'speaker',
'onset', or 'offset' column(s)""".replace("<F>", str(args.pair_file)))
        sys.exit(1)
    items = pd.concat([items_1, items_2], sort=True).drop_duplicates()
    file_spk_ = items[['file', 'speaker']].drop_duplicates()
    utterance_index = [(str(i), ) + tuple(x)
                       for (i, x) in enumerate(file_spk_.values)]
    utterance_ids = dict(((f, s), uid) for (uid, f, s) in utterance_index)

    features = snpipeline.extract_features(args.shennong_config_file,
                                           utterance_index,
                                           njobs=args.njobs)
    pairs['distance'] = calculate_distances(pairs,
                                            features,
                                            utterance_ids,
                                            njobs=args.njobs)
    pairs.to_csv(args.output_file, index=False)
예제 #8
0
def test_extract_features_full(ext, wav_file, wav_file_8k, wav_file_float32,
                               capsys, tmpdir):
    # difficult case with parallel jobs, different sampling rates,
    # speakers and segments
    index = [('u1', wav_file, 's1', 0, 1),
             ('u2', wav_file_float32, 's2', 1, 1.2),
             ('u3', wav_file_8k, 's1', 1, 3)]
    config = pipeline.get_default_config('mfcc')

    # disable VAD because it can alter the cmvn result (far from (0,
    # 1) when the signal includes non-voiced frames)
    config['cmvn']['with_vad'] = False

    feats = pipeline.extract_features(config,
                                      index,
                                      njobs=2,
                                      log=utils.get_logger())

    # ensure we have the expected log messages
    messages = capsys.readouterr().err
    assert 'INFO - get 3 utterances from 2 speakers in 3 wavs' in messages
    assert 'WARNING - several sample rates found in wav files' in messages

    for utt in ('u1', 'u2', 'u3'):
        assert utt in feats
        assert feats[utt].dtype == np.float32

    # check properies
    p1 = feats['u1'].properties
    p2 = feats['u2'].properties
    p3 = feats['u3'].properties
    assert p1['audio']['file'] == wav_file
    assert p1['audio']['duration'] == 1.0
    assert p2['audio']['file'] == wav_file_float32
    assert p2['audio']['duration'] == pytest.approx(0.2)
    assert p3['audio']['file'] == wav_file_8k
    assert p3['audio']['duration'] < 0.5  # ask 3s but get duration-tstart
    assert p1['mfcc'] == p2['mfcc']
    assert p1['mfcc']['sample_rate'] != p3['mfcc']['sample_rate']
    assert p1.keys() == {
        'audio', 'mfcc', 'cmvn', 'pitch', 'delta', 'speaker', 'pipeline'
    }
    assert p1.keys() == p2.keys() == p3.keys()
    assert p1['pipeline'] == p2['pipeline'] == p3['pipeline']

    # check shape. mfcc*delta + pitch = 13 * 3 + 3 = 42
    assert feats['u1'].shape == (98, 42)
    assert feats['u2'].shape == (18, 42)
    assert feats['u3'].shape == (40, 42)

    # check cmvn
    assert feats['u2'].data[:, :13].mean() == pytest.approx(0.0, abs=1e-6)
    assert feats['u2'].data[:, :13].std() == pytest.approx(1.0, abs=1e-6)

    data = np.vstack((feats['u1'].data[:, :13], feats['u3'].data[:, :13]))
    assert data.mean() == pytest.approx(0.0, abs=1e-6)
    assert data.std() == pytest.approx(1.0, abs=1e-6)
    assert np.abs(data.mean()) <= np.abs(feats['u1'].data[:, :13].mean())
    assert np.abs(data.std() - 1.0) <= np.abs(feats['u1'].data[:, :13].std() -
                                              1.0)
    assert np.abs(data.mean()) <= np.abs(feats['u3'].data[:, :13].mean())
    assert np.abs(data.std() - 1.0) <= np.abs(feats['u3'].data[:, :13].std() -
                                              1.0)

    # save / load the features
    filename = str(tmpdir.join('feats' + ext))
    feats.save(filename)
    feats2 = FeaturesCollection.load(filename)
    assert feats2 == feats