def test_wer_4(self): ref = 'the wood flamed up splendidly under the large brewing copper '\ 'and it sighed so deeply' hyp = 'the wood flame do splendidly under the large brewing copper '\ 'and its side so deeply' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.2666666667) < 1e-6)
def test_wer_2(self): ref = 'as any in england i would say said gamewell proudly that is '\ 'in his day' hyp = 'as any in england i would say said came well proudly that is '\ 'in his day' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.1333333) < 1e-6)
def tune(): """Tune parameters alpha and beta on one minibatch.""" if not args.num_alphas >= 0: raise ValueError("num_alphas must be non-negative!") if not args.num_betas >= 0: raise ValueError("num_betas must be non-negative!") data_generator = DataGenerator( vocab_filepath=args.vocab_path, mean_std_filepath=args.mean_std_path, augmentation_config='{}', specgram_type=args.specgram_type, num_threads=1) batch_reader = data_generator.batch_reader_creator( manifest_path=args.tune_manifest, batch_size=args.num_samples, sortagrad=False, shuffle_method=None) tune_data = batch_reader().next() target_transcripts = [ ''.join([data_generator.vocab_list[token] for token in transcript]) for _, transcript in tune_data ] ds2_model = DeepSpeech2Model( vocab_size=data_generator.vocab_size, num_conv_layers=args.num_conv_layers, num_rnn_layers=args.num_rnn_layers, rnn_layer_size=args.rnn_layer_size, use_gru=args.use_gru, pretrained_model_path=args.model_path, share_rnn_weights=args.share_rnn_weights) # create grid for search cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) params_grid = [(alpha, beta) for alpha in cand_alphas for beta in cand_betas] ## tune parameters in loop for alpha, beta in params_grid: result_transcripts = ds2_model.infer_batch( infer_data=tune_data, decoding_method='ctc_beam_search', beam_alpha=alpha, beam_beta=beta, beam_size=args.beam_size, cutoff_prob=args.cutoff_prob, vocab_list=data_generator.vocab_list, language_model_path=args.lang_model_path, num_processes=args.num_proc_bsearch) wer_sum, num_ins = 0.0, 0 for target, result in zip(target_transcripts, result_transcripts): wer_sum += wer(target, result) num_ins += 1 print("alpha = %f\tbeta = %f\tWER = %f" % (alpha, beta, wer_sum / num_ins))
def test_wer_5(self): ref = 'all the morning they trudged up the mountain path and at noon '\ 'unc and ojo sat on a fallen tree trunk and ate the last of '\ 'the bread which the old munchkin had placed in his pocket' hyp = 'all the morning they trudged up the mountain path and at noon '\ 'unc in ojo sat on a fallen tree trunk and ate the last of '\ 'the bread which the old munchkin had placed in his pocket' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.027027027) < 1e-6)
def test_wer_3(self): ref = 'the lieutenant governor lilburn w boggs afterward governor '\ 'was a pronounced mormon hater and throughout the period of '\ 'the troubles he manifested sympathy with the persecutors' hyp = 'the lieutenant governor little bit how bags afterward '\ 'governor was a pronounced warman hater and throughout the '\ 'period of th troubles he manifests sympathy with the '\ 'persecutors' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.2692307692) < 1e-6)
def test_wer_7(self): ref = ' ' hyp = 'Hypothesis sentence' with self.assertRaises(ValueError): word_error_rate = error_rate.wer(ref, hyp)
def test_wer_6(self): ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' word_error_rate = error_rate.wer(ref, ref) self.assertEqual(word_error_rate, 0.0)
def test_wer_1(self): ref = 'i UM the PHONE IS i LEFT THE portable PHONE UPSTAIRS last night' hyp = 'i GOT IT TO the FULLEST i LOVE TO portable FROM OF STORES last '\ 'night' word_error_rate = error_rate.wer(ref, hyp) self.assertTrue(abs(word_error_rate - 0.769230769231) < 1e-6)