def main(fpath): # Path to the mp4 file fpath = glob.glob(fpath + "/*_df.b")[0] logging.info("Reading file: " + fpath) with open(fpath, "rb") as f: vdf = pickle.load(f) # Find the shape logging.info("vdf: Number of audio segments:{}".format(vdf.shape[0])) wer_v = [] wer_wer = [] for val in vdf.iterrows(): ref = val[1]["Reference"] ds_hyp = val[1]["Deepspeech hypothesis"] la_hyp = val[1]["Livai hypothesis"] if not ref: # Because text.wer throws ZeroDivisionError if ref is null wer_v.append([1.0, 1.0]) continue wer_v.append([text.wer(ref, ds_hyp), text.wer(ref, la_hyp)]) wer_wer.append([wer.wer(ref, ds_hyp), wer.wer(ref, la_hyp)]) # Push the wer to data frame for easier calculations werds_df = pd.DataFrame(wer_v, columns=["WER for DS", "WER for LA"]) # Remove all the values whose WER > 1 werds_df = werds_df[werds_df["WER for DS"] <= 1] werds_df = werds_df[werds_df["WER for LA"] <= 1] # Push the wer to data frame for easier calculations wer_df = pd.DataFrame(wer_wer, columns=["WER for DS", "WER for LA"]) # Remove all the values whose WER > 1 wer_df = wer_df[wer_df["WER for DS"] <= 100] wer_df = wer_df[wer_df["WER for LA"] <= 100] # ### Lower WER is better # Look at these stats werds_df.describe() wer_df.describe() # ### Average of WER logging.info("Mean of WER using text.py and wer.py are given below: ") logging.info("text.py WER average for " + fpath + ": " + str(werds_df.mean())) logging.info("wer.py WER average for " + fpath + ": " + str(wer_df.mean()))
def convert_to_wer(error): reference = error[1] hypothesis = error[2] wer_v = [] wer_wer = [] for index, ref in enumerate(reference): try: wer_v.append([text.wer(ref, hypothesis[index])]) except ZeroDivisionError: wer_v.append([1.0]) try: wer_wer.append([wer.wer(ref, hypothesis[index])]) except ZeroDivisionError: wer_wer.append([100]) # Push the wer to data frame for easier calculations col_name = "WER for DS" werds_df = pd.DataFrame(wer_v, columns=[col_name]) # Remove all the values whose WER > 1 werds_df = werds_df[werds_df[col_name] <= 1] # Push the wer to data frame for easier calculations wer_df = pd.DataFrame(wer_wer, columns=[col_name]) # Remove all the values whose WER > 1 wer_df = wer_df[wer_df[col_name] <= 100] # ### Lower WER is better # Look at these stats print("{} : {} : {}".format(error[0], werds_df["WER for DS"].mean(), wer_df["WER for DS"].mean()))
def convert_to_wer(fpath, model_type): foldername = os.path.basename(fpath) fpath = os.path.join(fpath, "output_df.b") # Read the bianry file with lists logging.info("Reading file: " + fpath) with open(fpath, "rb") as f: text_list = pickle.load(f) # Find the shape logging.info("Number of audio segments: {}".format(len(text_list[0]))) reference = text_list[0] if model_type == "la": hypothesis = text_list[2] else: hypothesis = text_list[1] wer_v = [] wer_wer = [] for index, ref in enumerate(reference): if not ref: # Because text.wer throws ZeroDivisionError if ref is null wer_v.append([1.0, 1.0]) continue wer_v.append([text.wer(ref, hypothesis[index])]) wer_wer.append([wer.wer(ref, hypothesis[index])]) # Push the wer to data frame for easier calculations col_name = "WER for " + model_type werds_df = pd.DataFrame(wer_v, columns=[col_name]) # Remove all the values whose WER > 1 werds_df = werds_df[werds_df[col_name] <= 1] # Push the wer to data frame for easier calculations wer_df = pd.DataFrame(wer_wer, columns=[col_name]) # Remove all the values whose WER > 1 wer_df = wer_df[wer_df[col_name] <= 100] # ### Lower WER is better # Look at these stats print(werds_df.describe()) print(wer_df.describe()) # ### Average of WER logging.info("Mean of WER using text.py and wer.py are given below: ") logging.info("text.py WER average for " + fpath + ": " + str(werds_df.mean())) logging.info("wer.py WER average for " + fpath + ": " + str(wer_df.mean())) return [foldername, werds_df.mean(), wer_df.mean()]
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Tue Jan 2 18:01:10 2018 @author: dalonlobo """ from __future__ import absolute_import, division, print_function import text import sys import os if __name__ == "__main__": folders = [] fpath = os.path.abspath("temp_playlist") for root, dirs, files in os.walk(fpath): folders.append(root) print(folders[1:], file=sys.stderr) for folder in folders[1:]: ref_fpath = glob.glob(os.path.join(fpath,folder) + "/*_ref.txt")[0] hyp_fpath = glob.glob(os.path.join(fpath,folder) + "/*_hyp.txt")[0] print(ref_path, hyp_path) with open(ref_fpath) as f1, open(ref_fpath) as f2: text.wer(f1.read(), f2.read())
def compare_wer(ref, hyp): with open(ref, "r") as ref, open(hyp, "r") as hyp: return text.wer(ref.read(), hyp.read())