Beispiel #1
0
def evaluate(args):
    filepath = args.file
    lang = args.lang

    with open(filepath, 'r') as f:
        data = json.load(f)

    total_wer = {}
    total_cer = {}
    num_tokens = {}
    num_chars = {}
    for utterance in tqdm(data):
        ref = _parse_numbers(data[utterance]['ref'].lower(), lang)

        for api in set(data[utterance].keys()).difference(set(['ref'])):
            total_wer.setdefault(api, 0)
            total_cer.setdefault(api, 0)
            num_tokens.setdefault(api, 0)
            num_chars.setdefault(api, 0)

            transcript = _parse_numbers(data[utterance][api].lower(), lang)

            wer_inst = wer(transcript, ref)
            cer_inst = cer(transcript, ref)

            total_wer[api] += wer_inst
            total_cer[api] += cer_inst
            num_tokens[api] += len(ref.split())
            num_chars[api] += len(ref)

    for api in set(total_wer.keys()):
        print("{} - WER: {:.02f}% CER: {:.02f}%".format(
            api, (float(total_wer[api]) / num_tokens[api]) * 100,
            (float(total_cer[api]) / num_chars[api]) * 100))
def test_cer_5():
    ref = ''
    hyp = 'Hypothesis'
    with pytest.raises(ValueError):
        error_rate.cer(ref, hyp)
def test_cer_4():
    ref = 'werewolf'
    char_error_rate = error_rate.cer(ref, ref)
    assert char_error_rate == 0.0
def test_cer_3():
    ref = 'were wolf'
    hyp = 'were  wolf'
    char_error_rate = error_rate.cer(ref, hyp)
    assert np.allclose(char_error_rate, 0.0)
def test_cer_2():
    ref = 'werewolf'
    hyp = 'weae  wolf'
    char_error_rate = error_rate.cer(ref, hyp, remove_space=True)
    assert np.allclose(char_error_rate, 0.125)