def calculate_wer2(input_to_softmax, model_path, words=False): # data_gen = AudioGenerator() # data_gen.load_train_data() # data_gen.load_validation_data() wers = [] input_to_softmax.load_weights(model_path) l = len(data_gen.valid_texts) l = 100 for index in range(l): transcr = data_gen.valid_texts[index] # audio_path = data_gen.valid_audio_paths[index] # data_point = data_gen.normalize(data_gen.featurize(audio_path)) data_point = valid_cache[index] prediction = input_to_softmax.predict( np.expand_dims(data_point, axis=0)) output_length = [input_to_softmax.output_length(data_point.shape[0])] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() pred = ''.join(int_sequence_to_text(pred_ints)) if words: w = wer(transcr.split(), pred.split()) else: w = wer(list(transcr), list(pred)) wers.append(w) if index % 100 == 0: print(index, len(data_gen.valid_texts), wers[-1]) print("FINAL WER:", sum(wers) / len(wers), "words:", words) return sum(wers) / len(wers)
def main(): parser = argparse.ArgumentParser(description='Edge Computing for ASR - Test Run') parser.add_argument('audio_file', type=str, help='Path to the audio file to run (WAV format)') parser.add_argument('--ref_file', nargs="?", type=str, help='Path to the audio reference file') parser.add_argument('--cloud_ip', type=str, nargs='?', help='IP of cloud server') parser.add_argument('--cloud_port', type=int, nargs='?', help='Port of cloud server') args = parser.parse_args() # TODO: set IP and Port # python_bind.set(IP) # # TODO: check audio file! # TODO: check ref file! # Main Logic edge_args = get_args_edge() cloud_args = get_args_cloud_ds2() get_results_cloud = get_results_cloud_ds2 edge_start_time = timer() edge_results = get_results_edge(edge_args, args.audio_file) edge_total_time = timer() - edge_start_time print("Edge Result:\n"+edge_results['edge_result']) print("Log Prob:"+str(edge_results['edge_log_prob'])) if (edge_results['edge_log_prob']>0.1): print("Result is good enough") else: cloud_start_time = timer() cloud_results = get_results_cloud(args.audio_file, args=cloud_args) cloud_total_time = timer() - cloud_start_time print("Cloud Result:\n"+cloud_results['cloud_result']) # Calculate WER if args.ref_file: from wer import wer with open(args.ref_file, 'r') as ref_text: ref = ref_text.readlines()[0] edge_wer, edge_match_count, ref_token_count = wer(ref, edge_results['edge_result']) cloud_wer, cloud_match_count, ref_token_count = wer(ref, cloud_results['cloud_result']) print('Edge WER: {:10.3%} ({:4d} / {:4d})'.format(edge_wer, edge_match_count, ref_token_count)) print('Edge WER: {:10.3%} ({:4d} / {:4d})'.format(cloud_wer, cloud_match_count, ref_token_count)) times = {**edge_results, **cloud_results} times.pop('cloud_result', None) times.pop('edge_result', None) times.pop('edge_log_prob', None) print(edge_total_time) print(cloud_total_time) pprint.pprint(times)
def main(fpath): # Path to the mp4 file fpath = glob.glob(fpath + "/*_df.b")[0] logging.info("Reading file: " + fpath) with open(fpath, "rb") as f: vdf = pickle.load(f) # Find the shape logging.info("vdf: Number of audio segments:{}".format(vdf.shape[0])) wer_v = [] wer_wer = [] for val in vdf.iterrows(): ref = val[1]["Reference"] ds_hyp = val[1]["Deepspeech hypothesis"] la_hyp = val[1]["Livai hypothesis"] if not ref: # Because text.wer throws ZeroDivisionError if ref is null wer_v.append([1.0, 1.0]) continue wer_v.append([text.wer(ref, ds_hyp), text.wer(ref, la_hyp)]) wer_wer.append([wer.wer(ref, ds_hyp), wer.wer(ref, la_hyp)]) # Push the wer to data frame for easier calculations werds_df = pd.DataFrame(wer_v, columns=["WER for DS", "WER for LA"]) # Remove all the values whose WER > 1 werds_df = werds_df[werds_df["WER for DS"] <= 1] werds_df = werds_df[werds_df["WER for LA"] <= 1] # Push the wer to data frame for easier calculations wer_df = pd.DataFrame(wer_wer, columns=["WER for DS", "WER for LA"]) # Remove all the values whose WER > 1 wer_df = wer_df[wer_df["WER for DS"] <= 100] wer_df = wer_df[wer_df["WER for LA"] <= 100] # ### Lower WER is better # Look at these stats werds_df.describe() wer_df.describe() # ### Average of WER logging.info("Mean of WER using text.py and wer.py are given below: ") logging.info("text.py WER average for " + fpath + ": " + str(werds_df.mean())) logging.info("wer.py WER average for " + fpath + ": " + str(wer_df.mean()))
def convert_to_wer(error): reference = error[1] hypothesis = error[2] wer_v = [] wer_wer = [] for index, ref in enumerate(reference): try: wer_v.append([text.wer(ref, hypothesis[index])]) except ZeroDivisionError: wer_v.append([1.0]) try: wer_wer.append([wer.wer(ref, hypothesis[index])]) except ZeroDivisionError: wer_wer.append([100]) # Push the wer to data frame for easier calculations col_name = "WER for DS" werds_df = pd.DataFrame(wer_v, columns=[col_name]) # Remove all the values whose WER > 1 werds_df = werds_df[werds_df[col_name] <= 1] # Push the wer to data frame for easier calculations wer_df = pd.DataFrame(wer_wer, columns=[col_name]) # Remove all the values whose WER > 1 wer_df = wer_df[wer_df[col_name] <= 100] # ### Lower WER is better # Look at these stats print("{} : {} : {}".format(error[0], werds_df["WER for DS"].mean(), wer_df["WER for DS"].mean()))
def generate_translate(input: TranslationInput): print("we are in generate_translation function--------") if input.target_prefix: tok_tar_pre = [tokenize(input.target_prefix)] else: tok_tar_pre = None print("target_prefix:-", tok_tar_pre) results = translator.translate_batch([tokenize(input.source)], target_prefix=tok_tar_pre, num_hypotheses=input.num_hyp, return_alternatives=True, return_scores=True) api_results = dict(minor_changes=[], major_changes=[]) for r in results[0]: translation = detokenize(r['tokens']) print("translation", translation) score = r['score'] ter = wer(input.reference, translation) if ter['wer_result'] < .2: api_results['minor_changes'].append( dict(translation=translation, score=r['score'], ter=ter['wer_result'])) else: api_results['major_changes'].append( dict(translation=translation, score=r['score'], ter=ter['wer_result'])) return api_results
def evaluate(testset): evaluator = model.copy() # to use different state evaluator.train = False # for item in testset: total_symbol = 0 error_symbol = 0 for item in testset: evaluator.reset_state() # initialize state 是否重置关系不大 x_data = os.path.join(data_root, item[0]) y_data = item[1] y, _ = forward_one_sample(evaluator, x_data, y_data, SIL_idx, useGPU) if y is None: continue # decoding y_prob = [F.softmax(y[i]).data for i in range(len(y))] # observe the model output by uncommenting the following line plot_ctc(y_prob) y_dec = [y_prob[i].argmax() for i in range(len(y))] num_seq = utils.compress_seq(y_dec, SIL_idx) print('decode sequence: ', num_seq) print('target sequence: ', y_data) total_symbol += len(y_data) error_symbol += wer(y_data, num_seq) print('WER: ', str(float(error_symbol) / total_symbol * 100) + '%')
def main(): sentence = sys.stdin.read() parser = get_argparser() args = parser.parse_args() file = args.filename p = args.p e = args.e print(sentence) if (p): # print("PUNCT") m = maxmatch_punct(sentence.replace(" ", ""), get_dicts(file)) else: m = maxmatch(sentence.replace(" ", ""), get_surface_csv(file)) print(' '.join(m)) if (e): wer.wer(sentence, m)
def convert_to_wer(fpath, model_type): foldername = os.path.basename(fpath) fpath = os.path.join(fpath, "output_df.b") # Read the bianry file with lists logging.info("Reading file: " + fpath) with open(fpath, "rb") as f: text_list = pickle.load(f) # Find the shape logging.info("Number of audio segments: {}".format(len(text_list[0]))) reference = text_list[0] if model_type == "la": hypothesis = text_list[2] else: hypothesis = text_list[1] wer_v = [] wer_wer = [] for index, ref in enumerate(reference): if not ref: # Because text.wer throws ZeroDivisionError if ref is null wer_v.append([1.0, 1.0]) continue wer_v.append([text.wer(ref, hypothesis[index])]) wer_wer.append([wer.wer(ref, hypothesis[index])]) # Push the wer to data frame for easier calculations col_name = "WER for " + model_type werds_df = pd.DataFrame(wer_v, columns=[col_name]) # Remove all the values whose WER > 1 werds_df = werds_df[werds_df[col_name] <= 1] # Push the wer to data frame for easier calculations wer_df = pd.DataFrame(wer_wer, columns=[col_name]) # Remove all the values whose WER > 1 wer_df = wer_df[wer_df[col_name] <= 100] # ### Lower WER is better # Look at these stats print(werds_df.describe()) print(wer_df.describe()) # ### Average of WER logging.info("Mean of WER using text.py and wer.py are given below: ") logging.info("text.py WER average for " + fpath + ": " + str(werds_df.mean())) logging.info("wer.py WER average for " + fpath + ": " + str(wer_df.mean())) return [foldername, werds_df.mean(), wer_df.mean()]
def calculate_ctm_mistakes(gold_ctms_df, created_ctms_df): ctm_mistakes_seconds = [] list_of_filenames = gold_ctms_df["Filename"].unique().tolist() for filename in list_of_filenames: df_current_gold_ctm = gold_ctms_df.loc[ gold_ctms_df['Filename'] == filename][["start", "end", "token"]] df_current_created_ctm = created_ctms_df.loc[ created_ctms_df['Filename'] == filename][["start", "end", "token"]] # ["OP", "REF", "HYP"] # "OK","SUB","INS", "***", "DEL", "***" wer_results, token_comparisons = \ wer(df_current_gold_ctm["token"].tolist(), df_current_created_ctm["token"].tolist(), True) # Iterate three things gold_iterator = df_current_gold_ctm.itertuples() created_iterator = df_current_created_ctm.itertuples() number_of_dels = 0 number_of_ins = 0 for comparison_row in token_comparisons[1:]: if comparison_row[0] == "OK" or comparison_row[0] == "SUB": gold_ctm_row = next(gold_iterator) created_ctm_row = next(created_iterator) # Sanity check if (comparison_row[0] == "OK") and (gold_ctm_row.token != created_ctm_row.token): print("gold token is {}, but created {}".format( gold_ctm_row.token, created_ctm_row.token)) start_difference = created_ctm_row.start - gold_ctm_row.start end_difference = created_ctm_row.end - gold_ctm_row.end ctm_mistakes_seconds.append([start_difference, end_difference]) elif comparison_row[0] == "INS": created_ctm_row = next(created_iterator) number_of_ins += 1 elif comparison_row[0] == "DEL": gold_ctm_row = next(gold_iterator) number_of_dels += 1 else: print("Something went terribly wrong") break print("for {} the number of dels was {} and ins {}".format( filename, number_of_dels, number_of_ins)) return ctm_mistakes_seconds
def score_per_wer(batch_size=16): phon_to_idx = pickle.load( open('/data/lisa/exp/kumarrit/vctk/phon2code.pkl')) char_to_idx = pickle.load( open('/data/lisa/exp/kumarrit/vctk/char2code.pkl')) idx_to_char = {x: y for y, x in char_to_idx.iteritems()} remove_num = lambda c: re.sub('\d+', '', c.lower()) model = nltk.corpus.cmudict.dict() start = time.time() valid_costs = [] valid_itr = data_loader('valid', batch_size) count_exact_match = 0 iteration = 0 for ch, ch_mask, ph, ph_mask in valid_itr: iteration += 1 pred_phons = predict_fn(ch, ch_mask) assert pred_phons.shape[0] == batch_size, "Incorrect Shape" for i in xrange(batch_size): tmp_wer = [] ref = ch[i].flatten().tolist() end_idx = ref.index(0) ref = ref[:end_idx] hyp = pred_phons[i].flatten().tolist() try: end_idx = hyp.index(0) hyp = hyp[:end_idx] except: pass phs = model[''.join([idx_to_char[x] for x in ref[1:]])] for ph in phs: ph = [phon_to_idx[remove_num(y)] for y in ph] tmp_wer += [wer(ph, hyp)] if len(ph) == len(hyp) and (np.asarray(hyp) == np.asarray(ph)).all(): count_exact_match += 1 valid_costs.append(min(tmp_wer)) print "Validation Completed! PER: {} WER: {} time: {}".format( np.mean(valid_costs), 100 - 100 * (count_exact_match / (iteration * 16.)), time.time() - start) return np.mean(valid_costs)
def get_features_tgt(self, target, parallelsentence): """ Calculates word error rate for the given target sentence, against the reference sentence @param simplesentence: The target sentence to be scored @type simplesentence: sentence.sentence.SimpleSentence @rtype: dict @return: dictionary containing lenght attribute """ target = target.get_string() try: ref = parallelsentence.get_reference().get_string() except: log.error("No reference. Aborting WER calculation") return {} #ef_tokens = PunktWordTokenizer().tokenize(ref_untokenized) #print ref_untokenized #print target_untokenized #target_tokens = " ".join(PunktWordTokenizer().tokenize(target_untokenized)) wer_value = wer(target, [ref]) return {'ref-wer': str(wer_value)}
def get_wer(self): r = file(self.args.refpath).read().split() h = file(self.args.tspath).read().split() wer.wer(r, h)
from wer import wer r = [ 'In', 'computational', 'linguistics', 'and', 'computer', 'science', ',', 'edit', 'distance', 'is', 'a', 'way', 'of', 'quantifying', 'how', 'dissimilar', 'two', 'strings', 'are', 'to', 'one', 'another', 'by', 'counting', 'the', 'minimum', 'number', 'of', 'operations', 'required', 'to', 'transform', 'one', 'string', 'into', 'the', 'other.', 'Edit', 'distances', 'find', 'applications', 'in', 'natural', 'language', 'processing,', 'where', 'automatic', 'spelling', 'correction', 'can', 'determine', 'candidate', 'corrections', 'for', 'a', 'misspelled', 'word', 'by', 'selecting', 'words', 'from', 'a', 'dictionary', 'that', 'have', 'a', 'low', 'distance', 'to', 'the', 'word', 'in', 'question' ] h = [ 'In', 'linguistics', 'and', 'computer', 'science', 'theory', ',', 'edit', 'distance', 'iss', 'a', 'way', 'of', 'quantifying', 'how', 'dissimilar', 'the', 'two', 'string', 'is', 'to', 'one', 'another', 'by', 'counting', 'the', 'number', 'of', 'operations', 'required', 'to', 'transform', 'one', 'string', 'into', 'the', 'other.', 'Edit', 'distances', 'find', 'applications', 'in', 'natural', 'language', 'processing,', 'where', 'automatic', 'spelling', 'correction', 'can', 'determine', 'candidate', 'corrections', 'for', 'a', 'misspelled', 'word', 'by', 'selecting', 'words', 'from', 'a', 'dictionary', 'that', 'have', 'a', 'low', 'distance', 'to', 'the', 'words', 'in', 'question' ] wer(r, h)
# train_inputs2=np.append(train_inputs.values(), axis=1) # print(type(train_inputs2)) # print (num_examples) # print(d) totalwer = 0 for i in range(1, num_examples): feed2 = { inputs: train_inputs[i - 1], targets: train_targets[i - 1], seq_len: train_seq_len[i - 1] } d = session.run(decoded[0], feed_dict=feed2) #print (d) str_decoded = ''.join([chr(x) for x in np.asarray(d[1]) + FIRST_INDEX]) # Replacing blank label to none str_decoded = str_decoded.replace(chr(ord('z') + 1), '') # Replacing space label to space str_decoded = str_decoded.replace(chr(ord('a') - 1), ' ') #print('Original:\n%s' % original[i-1]) #print('Decoded:\n%s' % str_decoded) totalwer = totalwer + wer(original[i - 1].split(), decoded.split()) print("average wer = " + str(totalwer / num_examples)) # save_path = saver.save(session, "./orange18.ckpt") # print("Model saved in file: %s" % save_path) x
def main(): ########################### #### create dictionary #### ########################### if os.path.exists('./data/corpus/dictionary.dict'): if args.lang == 'ja': corpus = JaConvCorpus(file_path=None, batch_size=batchsize, size_filter=True) else: corpus = ConvCorpus(file_path=None, batch_size=batchsize) corpus.load(load_dir='./data/corpus/') else: if args.lang == 'ja': corpus = JaConvCorpus(file_path=data_file, batch_size=batchsize, size_filter=True) else: corpus = ConvCorpus(file_path=data_file, batch_size=batchsize) corpus.save(save_dir='./data/corpus/') print('Vocabulary Size (number of words) :', len(corpus.dic.token2id)) ###################### #### create model #### ###################### model = Seq2Seq(len(corpus.dic.token2id), feature_num=feature_num, hidden_num=hidden_num, batch_size=batchsize, gpu_flg=args.gpu) if args.gpu >= 0: model.to_gpu() optimizer = optimizers.Adam(alpha=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(5)) # optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) ########################## #### create ID corpus #### ########################## input_mat = [] output_mat = [] max_input_ren = max_output_ren = 0 for input_text, output_text in zip(corpus.posts, corpus.cmnts): # convert to list # input_text.reverse() # encode words in a reverse order # input_text.insert(0, corpus.dic.token2id["<eos>"]) output_text.append(corpus.dic.token2id["<eos>"]) # update max sentence length max_input_ren = max(max_input_ren, len(input_text)) max_output_ren = max(max_output_ren, len(output_text)) input_mat.append(input_text) output_mat.append(output_text) # padding for li in input_mat: insert_num = max_input_ren - len(li) for _ in range(insert_num): li.insert(0, corpus.dic.token2id['<pad>']) for li in output_mat: insert_num = max_output_ren - len(li) for _ in range(insert_num): li.append(corpus.dic.token2id['<pad>']) # create batch matrix input_mat = np.array(input_mat, dtype=np.int32).T output_mat = np.array(output_mat, dtype=np.int32).T # separate corpus into Train and Test perm = np.random.permutation(len(corpus.posts)) test_input_mat = input_mat[:, perm[0:0 + testsize]] test_output_mat = output_mat[:, perm[0:0 + testsize]] train_input_mat = input_mat[:, perm[testsize:]] train_output_mat = output_mat[:, perm[testsize:]] list_of_references = [] for text_ndarray in test_output_mat.T: reference = text_ndarray.tolist() references = [[w_id for w_id in reference if w_id is not -1]] list_of_references.append(references) ############################# #### train seq2seq model #### ############################# accum_loss = 0 train_loss_data = [] test_loss_data = [] bleu_score_data = [] wer_score_data = [] for num, epoch in enumerate(range(n_epoch)): total_loss = test_loss = 0 batch_num = 0 perm = np.random.permutation(len(corpus.posts) - testsize) # for training for i in range(0, len(corpus.posts) - testsize, batchsize): # select batch data input_batch = train_input_mat[:, perm[i:i + batchsize]] output_batch = train_output_mat[:, perm[i:i + batchsize]] # Encode a sentence model.initialize() # initialize cell model.encode(input_batch, train=True) # encode (output: hidden Variable) # Decode from encoded context end_batch = xp.array( [corpus.dic.token2id["<start>"] for _ in range(batchsize)]) first_words = output_batch[0] loss, predict_mat = model.decode(end_batch, first_words, train=True) next_ids = first_words accum_loss += loss for w_ids in output_batch[1:]: loss, predict_mat = model.decode(next_ids, w_ids, train=True) next_ids = w_ids accum_loss += loss # learn model model.cleargrads() # initialize all grad to zero accum_loss.backward() # back propagation optimizer.update() total_loss += float(accum_loss.data) batch_num += 1 print('Epoch: ', num, 'Batch_num', batch_num, 'batch loss: {:.2f}'.format(float(accum_loss.data))) accum_loss = 0 # for testing list_of_hypotheses = [] for i in range(0, testsize, batchsize): # select test batch data input_batch = test_input_mat[:, i:i + batchsize] output_batch = test_output_mat[:, i:i + batchsize] # Encode a sentence model.initialize() # initialize cell model.encode(input_batch, train=True) # encode (output: hidden Variable) # Decode from encoded context end_batch = xp.array( [corpus.dic.token2id["<start>"] for _ in range(batchsize)]) first_words = output_batch[0] loss, predict_mat = model.decode(end_batch, first_words, train=True) next_ids = xp.argmax(predict_mat.data, axis=1) test_loss += loss if args.gpu >= 0: hypotheses = [cuda.to_cpu(next_ids)] else: hypotheses = [next_ids] for w_ids in output_batch[1:]: loss, predict_mat = model.decode(next_ids, w_ids, train=True) next_ids = xp.argmax(predict_mat.data, axis=1) test_loss += loss.data if args.gpu >= 0: hypotheses.append(cuda.to_cpu(next_ids)) else: hypotheses.append(next_ids) # collect hypotheses for calculating BLEU score hypotheses = np.array(hypotheses).T for hypothesis in hypotheses: text_list = hypothesis.tolist() list_of_hypotheses.append( [w_id for w_id in text_list if w_id is not -1]) # calculate BLEU score from test (develop) data bleu_score = nltk.translate.bleu_score.corpus_bleu(list_of_references, list_of_hypotheses, weights=(0.25, 0.25, 0.25, 0.25)) bleu_score_data.append(bleu_score) print('Epoch: ', num, 'BLEU SCORE: ', bleu_score) # calculate WER score from test (develop) data wer_score = 0 for index, references in enumerate(list_of_references): wer_score += wer(references[0], list_of_hypotheses[index]) wer_score /= len(list_of_references) wer_score_data.append(wer_score) print('Epoch: ', num, 'WER SCORE: ', wer_score) # save model and optimizer if (epoch + 1) % 10 == 0: print('-----', epoch + 1, ' times -----') print('save the model and optimizer') serializers.save_hdf5('data/' + str(epoch) + '.model', model) serializers.save_hdf5('data/' + str(epoch) + '.state', optimizer) # display the on-going status print('Epoch: ', num, 'Train loss: {:.2f}'.format(total_loss), 'Test loss: {:.2f}'.format(float(test_loss))) train_loss_data.append(float(total_loss / batch_num)) test_loss_data.append(float(test_loss)) # evaluate a test loss check_loss = test_loss_data[-10:] # check out the last 10 loss data end_flg = [ j for j in range(len(check_loss) - 1) if check_loss[j] < check_loss[j + 1] ] if len(end_flg) > 9: print('Probably it is over-fitting. So stop to learn...') break # save loss data with open('./data/loss_train_data.pkl', 'wb') as f: pickle.dump(train_loss_data, f) with open('./data/loss_test_data.pkl', 'wb') as f: pickle.dump(test_loss_data, f) with open('./data/bleu_score_data.pkl', 'wb') as f: pickle.dump(bleu_score_data, f) with open('./data/wer_score_data.pkl', 'wb') as f: pickle.dump(wer_score_data, f)
def get_wer(s1, s2): s1 = s1.split() s2 = s2.split() return 0.5 * wer(s1,s2) + 0.5 * wer(s2,s1)
clean_dir = "/mnt/gv0/user_sylar/segan_data/clean_test_wav_16k" file_name = os.listdir(clean_dir) wer_list = [] for f in file_name: print(clean_dir + '/' + f, enhanced_dir + '/' + f) with sr.WavFile(clean_dir + '/' + f) as source: clean_audio = r.record(source) with sr.WavFile(enhanced_dir + '/' + f) as source2: enhanced_audio = r.record(source2) while True: try: gt = r.recognize_google(clean_audio) result = r.recognize_google(enhanced_audio) encoded_gt = gt.encode('utf8').split() encoded_r = result.encode('utf8').split() print(encoded_gt, encoded_r) score = wer(encoded_gt, encoded_r) wer_list.append(score) print(score) break except sr.RequestError: # No internet connection print("No internet connection") except sr.UnknownValueError: print("Sorry sir, but, I could not understand what you said!") wer_list.append(100.0) break print(np.mean(np.array(wer_list)))
if kaldi_decoder.decode_wav_file(wav_file): s, l = kaldi_decoder.get_decoded_string() return s else: return "***ERROR: decoding of %s failed." % wav_file def append_to_file(file, line): with open(file, 'a') as f: f.write(line + '\n') AUDIO_DIR = 'audio' with open(AUDIO_DIR + '/' + 'files.csv', 'r') as files: for file in files.readlines(): splitter = file.split(',') if splitter[1][-1] == '\n': splitter[1] = splitter[1][0:-1] audio_file = AUDIO_DIR + '/' + splitter[0] recognized_text = deepspeech.recognize(audio_file) print(recognized_text) w, r = wer(splitter[1].split(), recognized_text.split()) append_to_file('deepspeech.csv', recognized_text + ',' + splitter[1] + ',' + w + ',' + r) '''recognized_text = kaldi_recognize(audio_file) w, r = wer(splitter[1].split(), recognized_text.split()) append_to_file('kaldi.csv', recognized_text + ',' + splitter[1] + ',' + w + ',' + r) recognized_text = openseq2seq.recognize(audio_file) w, r = wer(splitter[1].split(), recognized_text.split()) append_to_file('openseq2seq.csv', recognized_text + ',' + splitter[1] + ',' + w + ',' + r)'''
import sys from wer import wer g_truth = open("ground_truth.txt") maxmatch = open("mm_result.txt") g = g_truth.readline() m = maxmatch.readline() # taking mean c = 1 numorator = 0 while m and g: numorator += float(wer(g, m)) g = g_truth.readline() m = maxmatch.readline() c += 1 g_truth.close() maxmatch.close() print(numorator / c)
def analytic_score_sentences(self, sentence_tuples): return {'ref-wer': average([wer(h,[r]) for h,r in sentence_tuples])}