예제 #1
0
def command_extract(args):
    # setup the logger (level given by -q/-v arguments)
    if args.quiet:
        log = utils.null_logger()
    else:
        if args.verbose == 0:
            level = 'warning'
        elif args.verbose == 1:
            level = 'info'
        else:  # verbose >= 2
            level = 'debug'
        log = utils.get_logger(name='speech-features', level=level)
    # forward the initialized log to shennong
    utils._logger = log

    # make sure the output file is not already existing and have a
    # valid extension
    output_file = args.output_file
    if os.path.exists(output_file):
        log.error('output file already exist: %s', output_file)
        return
    output_ext = os.path.splitext(output_file)[1]
    if output_ext not in supported_extensions().keys():
        log.error(
            'output file has an unsupported extension "%s", must be in %s',
            output_ext, ", ".join(supported_extensions().keys()))
        return

    # make sure the input config and wavs_index exists
    for filename in (args.config, args.utts_index):
        if not os.path.exists(filename):
            log.error('input file not found: %s', filename)

    # read the utterances file as a list of lists, ignore empty lines
    # in the file
    utterances = [
        utt.split(' ') for utt in
        (utt.strip() for utt in open(args.utts_index, 'r'))
        if utt]

    # run the pipeline
    features = pipeline.extract_features(
        args.config, utterances, njobs=args.njobs, log=log)

    # save the features
    log.info('saving the features to %s', output_file)
    features.save(output_file)
예제 #2
0
def main():
    parser = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument('data_dir', help='input directory with wavs')
    parser.add_argument(
        'output_dir',
        default='/tmp',
        nargs='?',
        help='output directory (created files are deleted at exit)')

    args = parser.parse_args()

    # load audio data and compute total duration
    audio_data = {
        os.path.basename(f): Audio.load(f)
        for f in list_files_with_extension(args.data_dir, '.wav')
    }
    total_duration = datetime.timedelta(
        seconds=int(sum(a.duration for a in audio_data.values())))
    print('found {} wav files, total duration of {}'.format(
        len(audio_data), str(total_duration)))

    # compute the features (default MFCC)
    print('computing MFCC features...')
    t1 = datetime.datetime.now()
    processor = MfccProcessor()
    features = FeaturesCollection(
        **{k: processor.process(v)
           for k, v in audio_data.items()})
    t2 = datetime.datetime.now()
    print('took {}'.format(t2 - t1))

    # save the features in all the supported formats
    data = {
        'duration': total_duration,
        'data': {
            ext: analyze_serializer(features, ext, args.output_dir)
            for ext in supported_extensions().keys()
        }
    }

    print_results(data)
예제 #3
0
                                                  for v in (True, False)])
def test_cmvn(utterances_index, by_speaker, with_vad):
    config = pipeline.get_default_config('mfcc',
                                         with_cmvn=True,
                                         with_pitch=False,
                                         with_delta=False)
    config['cmvn']['by_speaker'] = by_speaker
    config['cmvn']['with_vad'] = with_vad
    feats = pipeline.extract_features(config, utterances_index)
    feat2 = feats[utterances_index[0][0]]
    assert feat2.is_valid()
    assert feat2.shape[0] == 140
    assert feat2.shape[1] == 13


@pytest.mark.parametrize('ext', supported_extensions().keys())
def test_extract_features_full(ext, wav_file, wav_file_8k, wav_file_float32,
                               capsys, tmpdir):
    # difficult case with parallel jobs, different sampling rates,
    # speakers and segments
    index = [('u1', wav_file, 's1', 0, 1),
             ('u2', wav_file_float32, 's2', 1, 1.2),
             ('u3', wav_file_8k, 's1', 1, 3)]
    config = pipeline.get_default_config('mfcc')

    # disable VAD because it can alter the cmvn result (far from (0,
    # 1) when the signal includes non-voiced frames)
    config['cmvn']['with_vad'] = False

    feats = pipeline.extract_features(config,
                                      index,
예제 #4
0
def test_get_serializer_byext(ext):
    h = serializers.get_serializer(FeaturesCollection, 'foo' + ext, None)
    assert not os.path.isfile('foo' + ext)
    assert isinstance(h, serializers.supported_extensions()[ext])