def calculate_report(results_tuple):
    r'''
    This routine will calculate a WER report.
    It'll compute the `mean` WER and create ``Sample`` objects of the ``report_count`` top lowest
    loss items from the provided WER results tuple (only items with WER!=0 and ordered by their WER).
    '''
    samples = []
    items = list(zip(*results_tuple))
    total_levenshtein = 0.0
    total_label_length = 0.0
    for label, decoding, distance, loss in items:
        sample_wer = wer(label, decoding)
        sample = Sample(label, decoding, loss, distance, sample_wer)
        samples.append(sample)
        total_levenshtein += levenshtein(label.split(), decoding.split())
        total_label_length += float(len(label.split()))

    # Getting the WER from the accumulated levenshteins and lengths
    samples_wer = total_levenshtein / total_label_length

    # Filter out all items with WER=0
    samples = [s for s in samples if s.wer > 0]

    # Order the remaining items by their loss (lowest loss on top)
    samples.sort(key=lambda s: s.loss)

    # Take only the first report_count items
    samples = samples[:FLAGS.report_count]

    # Order this top FLAGS.report_count items by their WER (lowest WER on top)
    samples.sort(key=lambda s: s.wer)

    return samples_wer, samples
Exemple #2
0
def process_decode_result(item):
    label, decoding, distance, loss = item
    sample_wer = wer(label, decoding)
    return AttrDict({
        'src': label,
        'res': decoding,
        'loss': loss,
        'distance': distance,
        'wer': sample_wer,
        'levenshtein': levenshtein(label.split(), decoding.split()),
        'label_length': float(len(label.split())),
    })
def get_bestmatch_keywords_using_wer(str):
    global allwords
    if str in allwords:
        return str
    r = []
    str1 = ' '.join(list(str))
    for o in allwords:
        o1 = ' '.join(list(o))
        # print (type(o), type(str), o1, str1)
        r.append(wer(o1, str1))
    idx = np.argmin(np.array(r), axis=0)
    # print (idx)
    # print(str, allwords[idx])
    return allwords[idx]