コード例 #1
0
def main(fpath):

    # Path to the mp4 file
    fpath = glob.glob(fpath + "/*_df.b")[0]
    logging.info("Reading file: " + fpath)
    with open(fpath, "rb") as f:
        vdf = pickle.load(f)

    # Find the shape
    logging.info("vdf: Number of audio segments:{}".format(vdf.shape[0]))

    wer_v = []
    wer_wer = []
    for val in vdf.iterrows():
        ref = val[1]["Reference"]
        ds_hyp = val[1]["Deepspeech hypothesis"]
        la_hyp = val[1]["Livai hypothesis"]
        if not ref:
            # Because text.wer throws ZeroDivisionError if ref is null
            wer_v.append([1.0, 1.0])
            continue
        wer_v.append([text.wer(ref, ds_hyp), text.wer(ref, la_hyp)])
        wer_wer.append([wer.wer(ref, ds_hyp), wer.wer(ref, la_hyp)])

    # Push the wer to data frame for easier calculations
    werds_df = pd.DataFrame(wer_v, columns=["WER for DS", "WER for LA"])

    #   Remove all the values whose WER > 1
    werds_df = werds_df[werds_df["WER for DS"] <= 1]
    werds_df = werds_df[werds_df["WER for LA"] <= 1]

    # Push the wer to data frame for easier calculations
    wer_df = pd.DataFrame(wer_wer, columns=["WER for DS", "WER for LA"])

    #   Remove all the values whose WER > 1
    wer_df = wer_df[wer_df["WER for DS"] <= 100]
    wer_df = wer_df[wer_df["WER for LA"] <= 100]

    # ### Lower WER is better
    # Look at these stats
    werds_df.describe()
    wer_df.describe()

    # ### Average of WER
    logging.info("Mean of WER using text.py and wer.py are given below: ")
    logging.info("text.py WER average for " + fpath + ": " +
                 str(werds_df.mean()))
    logging.info("wer.py WER average for " + fpath + ": " + str(wer_df.mean()))
コード例 #2
0
def convert_to_wer(error):

    reference = error[1]
    hypothesis = error[2]

    wer_v = []
    wer_wer = []
    for index, ref in enumerate(reference):
        try:
            wer_v.append([text.wer(ref, hypothesis[index])])
        except ZeroDivisionError:
            wer_v.append([1.0])
        try:
            wer_wer.append([wer.wer(ref, hypothesis[index])])
        except ZeroDivisionError:
            wer_wer.append([100])

    # Push the wer to data frame for easier calculations
    col_name = "WER for DS"
    werds_df = pd.DataFrame(wer_v, columns=[col_name])

    #   Remove all the values whose WER > 1
    werds_df = werds_df[werds_df[col_name] <= 1]

    # Push the wer to data frame for easier calculations
    wer_df = pd.DataFrame(wer_wer, columns=[col_name])

    #   Remove all the values whose WER > 1
    wer_df = wer_df[wer_df[col_name] <= 100]

    # ### Lower WER is better
    # Look at these stats

    print("{} : {} : {}".format(error[0], werds_df["WER for DS"].mean(),
                                wer_df["WER for DS"].mean()))
コード例 #3
0
def convert_to_wer(fpath, model_type):
    foldername = os.path.basename(fpath)
    fpath = os.path.join(fpath, "output_df.b")
    # Read the bianry file with lists
    logging.info("Reading file: " + fpath)
    with open(fpath, "rb") as f:
        text_list = pickle.load(f)

    # Find the shape
    logging.info("Number of audio segments: {}".format(len(text_list[0])))

    reference = text_list[0]
    if model_type == "la":
        hypothesis = text_list[2]
    else:
        hypothesis = text_list[1]

    wer_v = []
    wer_wer = []
    for index, ref in enumerate(reference):
        if not ref:
            # Because text.wer throws ZeroDivisionError if ref is null
            wer_v.append([1.0, 1.0])
            continue
        wer_v.append([text.wer(ref, hypothesis[index])])
        wer_wer.append([wer.wer(ref, hypothesis[index])])

    # Push the wer to data frame for easier calculations
    col_name = "WER for " + model_type
    werds_df = pd.DataFrame(wer_v, columns=[col_name])

    #   Remove all the values whose WER > 1
    werds_df = werds_df[werds_df[col_name] <= 1]

    # Push the wer to data frame for easier calculations
    wer_df = pd.DataFrame(wer_wer, columns=[col_name])

    #   Remove all the values whose WER > 1
    wer_df = wer_df[wer_df[col_name] <= 100]

    # ### Lower WER is better
    # Look at these stats
    print(werds_df.describe())
    print(wer_df.describe())

    # ### Average of WER
    logging.info("Mean of WER using text.py and wer.py are given below: ")
    logging.info("text.py WER average for " + fpath + ": " +
                 str(werds_df.mean()))
    logging.info("wer.py WER average for " + fpath + ": " + str(wer_df.mean()))

    return [foldername, werds_df.mean(), wer_df.mean()]
コード例 #4
0
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Jan  2 18:01:10 2018

@author: dalonlobo
"""
from __future__ import absolute_import, division, print_function
import text
import sys
import os

if __name__ == "__main__":
    folders = []
    fpath = os.path.abspath("temp_playlist")
    for root, dirs, files in os.walk(fpath):
        folders.append(root) 
    print(folders[1:], file=sys.stderr)
    for folder in folders[1:]:
        ref_fpath = glob.glob(os.path.join(fpath,folder) + "/*_ref.txt")[0]
        hyp_fpath = glob.glob(os.path.join(fpath,folder) + "/*_hyp.txt")[0]
        print(ref_path, hyp_path)
        with open(ref_fpath) as f1, open(ref_fpath) as f2:
            text.wer(f1.read(), f2.read())
コード例 #5
0
def compare_wer(ref, hyp):
    with open(ref, "r") as ref, open(hyp, "r") as hyp:
        return text.wer(ref.read(), hyp.read())