def run_test(init_op, dataset): wav_filenames = [] losses = [] predictions = [] ground_truths = [] bar = create_progressbar(prefix='Test epoch | ', widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start() log_progress('Test epoch...') step_count = 0 # Initialize iterator to the appropriate dataset session.run(init_op) # First pass, compute losses and transposed logits for decoding while True: try: batch_wav_filenames, batch_logits, batch_loss, batch_lengths, batch_transcripts = \ session.run([batch_wav_filename, transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break decoded = ctc_beam_search_decoder_batch(batch_logits, batch_lengths, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) predictions.extend(d[0][1] for d in decoded) ground_truths.extend(sparse_tensor_value_to_texts(batch_transcripts, Config.alphabet)) wav_filenames.extend(wav_filename.decode('UTF-8') for wav_filename in batch_wav_filenames) losses.extend(batch_loss) step_count += 1 bar.update(step_count) bar.finish() wer, cer, samples = calculate_report(wav_filenames, ground_truths, predictions, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test on %s - WER: %f, CER: %f, loss: %f' % (dataset, wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.cer, sample.loss)) print(' - wav: file://%s' % sample.wav_filename) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples
def main(args, _): manager = Manager() work_todo = JoinableQueue() # this is where we are going to store input data work_done = manager.Queue() # this where we are gonna push them out processes = [] for i in range(args.proc): worker_process = Process(target=tflite_worker, args=(args.model, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) worker_process.start() # Launch reader() as a separate python process processes.append(worker_process) print([x.name for x in processes]) wavlist = [] ground_truths = [] predictions = [] losses = [] wav_filenames = [] with open(args.csv, 'r') as csvfile: csvreader = csv.DictReader(csvfile) count = 0 for row in csvreader: count += 1 # Relative paths are relative to the folder the CSV file is in if not os.path.isabs(row['wav_filename']): row['wav_filename'] = os.path.join(os.path.dirname(args.csv), row['wav_filename']) work_todo.put({'filename': row['wav_filename'], 'transcript': row['transcript']}) wav_filenames.extend(row['wav_filename']) print('Totally %d wav entries found in csv\n' % count) work_todo.join() print('\nTotally %d wav file transcripted' % work_done.qsize()) while not work_done.empty(): msg = work_done.get() losses.append(0.0) ground_truths.append(msg['ground_truth']) predictions.append(msg['prediction']) wavlist.append(msg['wav']) wer, cer, samples = calculate_report(wav_filenames, ground_truths, predictions, losses) mean_loss = np.mean(losses) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss)) if args.dump: with open(args.dump + '.txt', 'w') as ftxt, open(args.dump + '.out', 'w') as fout: for wav, txt, out in zip(wavlist, ground_truths, predictions): ftxt.write('%s %s\n' % (wav, txt)) fout.write('%s %s\n' % (wav, out)) print('Reference texts dumped to %s.txt' % args.dump) print('Transcription dumped to %s.out' % args.dump)
def main(): parser = argparse.ArgumentParser(description='Computing TFLite accuracy') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') parser.add_argument('--alphabet', required=True, help='Path to the configuration file specifying the alphabet used by the network') parser.add_argument('--lm', required=True, help='Path to the language model binary file') parser.add_argument('--trie', required=True, help='Path to the language model trie file created with native_client/generate_trie') parser.add_argument('--csv', required=True, help='Path to the CSV source file') parser.add_argument('--proc', required=False, default=cpu_count(), type=int, help='Number of processes to spawn, defaulting to number of CPUs') args = parser.parse_args() work_todo = JoinableQueue() # this is where we are going to store input data work_done = Queue() # this where we are gonna push them out processes = [] for i in range(args.proc): worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) worker_process.start() # Launch reader() as a separate python process processes.append(worker_process) print([x.name for x in processes]) ground_truths = [] predictions = [] losses = [] with open(args.csv, 'r') as csvfile: csvreader = csv.DictReader(csvfile) for row in csvreader: work_todo.put({'filename': row['wav_filename'], 'transcript': row['transcript']}) work_todo.join() while (not work_done.empty()): msg = work_done.get() losses.append(0.0) ground_truths.append(msg['ground_truth']) predictions.append(msg['prediction']) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss))
def run_test(init_op, dataset): logitses = [] losses = [] seq_lengths = [] ground_truths = [] bar = create_progressbar(prefix='Computing acoustic model predictions | ', widgets=['Steps: ', progressbar.Counter(), ' | ', progressbar.Timer()]).start() log_progress('Computing acoustic model predictions...') step_count = 0 # Initialize iterator to the appropriate dataset session.run(init_op) # First pass, compute losses and transposed logits for decoding while True: try: logits, loss_, lengths, transcripts = session.run([transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break step_count += 1 bar.update(step_count) logitses.append(logits) losses.extend(loss_) seq_lengths.append(lengths) ground_truths.extend(sparse_tensor_value_to_texts(transcripts, Config.alphabet)) bar.finish() predictions = [] bar = create_progressbar(max_value=step_count, prefix='Decoding predictions | ').start() log_progress('Decoding predictions...') # Second pass, decode logits and compute WER and edit distance metrics for logits, seq_length in bar(zip(logitses, seq_lengths)): decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test on %s - WER: %f, CER: %f, loss: %f' % (dataset, wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples
def run_test(init_op, dataset): wav_filenames = [] losses = [] predictions = [] ground_truths = [] bar = create_progressbar(prefix='Test epoch | ', widgets=[ 'Steps: ', progressbar.Counter(), ' | ', progressbar.Timer() ]).start() log_progress('Test epoch...') step_count = 0 # Initialize iterator to the appropriate dataset session.run(init_op) # First pass, compute losses and transposed logits for decoding while True: try: batch_wav_filenames, batch_logits, batch_loss, batch_lengths, batch_transcripts = \ session.run([batch_wav_filename, transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break decoded = ctc_beam_search_decoder_batch( batch_logits, batch_lengths, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) predictions.extend(d[0][1] for d in decoded) ground_truths.extend( sparse_tensor_value_to_texts(batch_transcripts, Config.alphabet)) wav_filenames.extend( wav_filename.decode('UTF-8') for wav_filename in batch_wav_filenames) losses.extend(batch_loss) step_count += 1 bar.update(step_count) bar.finish() wer, cer, samples = calculate_report(wav_filenames, ground_truths, predictions, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) report_samples2 = itertools.islice(samples, FLAGS.report_count2) print('Test on %s - WER: %f, CER: %f, loss: %f' % (dataset, wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.cer, sample.loss)) print(' - wav: file://%s' % sample.wav_filename) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) wer_sum = 0 avg_wer = 0 result_csv = os.path.join(FLAGS.result_dir, "result.csv") with open(result_csv, 'w', encoding='utf-8') as f: writer = mycsv.DictWriter(f, fieldnames=['wav_filename', 'text']) writer.writeheader() for sample in report_samples2: #writer.writerow("WER: ",sample.wer, "CER: ", sample.cer, "loss: ", sample.loss) writer.writerow({ "wav_filename": sample.wav_filename, "text": sample.src }) writer.writerow({ "wav_filename": sample.wav_filename, "text": sample.res }) # writer.writerow("src: ", sample.src) # writer.writerow("res: ", sample.res) # writer.writerow("-"*80) wer_sum += sample.wer avg_wer = wer_sum / FLAGS.report_count2 writer.writerow({"wav_filename": avg_wer, "text": avg_wer}) print("*************avg_wer*************", avg_wer) return samples
def run_test(init_op, dataset): wav_filenames = [] losses = [] predictions = [] ground_truths = [] bar = create_progressbar(prefix='Test epoch | ', widgets=[ 'Steps: ', progressbar.Counter(), ' | ', progressbar.Timer() ]).start() log_progress('Test epoch...') step_count = 0 # Initialize iterator to the appropriate dataset session.run(init_op) # First pass, compute losses and transposed logits for decoding while True: try: batch_wav_filenames, batch_logits, batch_loss, batch_lengths, batch_transcripts = \ session.run([batch_wav_filename, transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break decoded = ctc_beam_search_decoder_batch( batch_logits, batch_lengths, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer, cutoff_prob=FLAGS.cutoff_prob, cutoff_top_n=FLAGS.cutoff_top_n) predictions.extend(d[0][1] for d in decoded) ground_truths.extend( sparse_tensor_value_to_texts(batch_transcripts, Config.alphabet)) wav_filenames.extend( wav_filename.decode('UTF-8') for wav_filename in batch_wav_filenames) losses.extend(batch_loss) step_count += 1 bar.update(step_count) bar.finish() wer, cer, samples = calculate_report(wav_filenames, ground_truths, predictions, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) if verbose: print('Test on %s - WER: %f, CER: %f, loss: %f' % (dataset, wer, cer, mean_loss)) if verbose: print('-' * 80) if result_file: pruning_type = 'score-based' if not random else 'random' result_string = '''Results for evaluating model with pruning percentage of {}% and {} pruning: Test on {} - WER: {}, CER: {}, loss: {} '''.format(prune_percentage * 100, pruning_type, dataset, wer, cer, mean_loss) write_to_file(result_file, result_string, 'a+') return wer, cer, mean_loss
def main(): parser = argparse.ArgumentParser(description='Computing TFLite accuracy') parser.add_argument('--model', required=True, help='Path to the model (protocol buffer binary file)') parser.add_argument( '--alphabet', required=True, help= 'Path to the configuration file specifying the alphabet used by the network' ) parser.add_argument('--lm', required=True, help='Path to the language model binary file') parser.add_argument( '--trie', required=True, help= 'Path to the language model trie file created with native_client/generate_trie' ) parser.add_argument('--csv', required=True, help='Path to the CSV source file') parser.add_argument( '--proc', required=False, default=cpu_count(), type=int, help='Number of processes to spawn, defaulting to number of CPUs') parser.add_argument( '--dump', required=False, action='store_true', default=False, help= 'Dump the results as text file, with one line for each wav: "wav transcription"' ) args = parser.parse_args() manager = Manager() work_todo = JoinableQueue( ) # this is where we are going to store input data work_done = manager.Queue() # this where we are gonna push them out processes = [] for i in range(args.proc): worker_process = Process(target=tflite_worker, args=(args.model, args.alphabet, args.lm, args.trie, work_todo, work_done, i), daemon=True, name='tflite_process_{}'.format(i)) worker_process.start() # Launch reader() as a separate python process processes.append(worker_process) print([x.name for x in processes]) wavlist = [] ground_truths = [] predictions = [] losses = [] with open(args.csv, 'r') as csvfile: csvreader = csv.DictReader(csvfile) count = 0 for row in csvreader: count += 1 work_todo.put({ 'filename': row['wav_filename'], 'transcript': row['transcript'] }) print('Totally %d wav entries found in csv\n' % count) work_todo.join() print('\nTotally %d wav file transcripted' % work_done.qsize()) while not work_done.empty(): msg = work_done.get() losses.append(0.0) ground_truths.append(msg['ground_truth']) predictions.append(msg['prediction']) wavlist.append(msg['wav']) wer, cer, _ = calculate_report(ground_truths, predictions, losses) mean_loss = np.mean(losses) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss)) if args.dump: with open(args.csv + '.txt', 'w') as ftxt, open(args.csv + '.out', 'w') as fout: for wav, txt, out in zip(wavlist, ground_truths, predictions): ftxt.write('%s %s\n' % (wav, txt)) fout.write('%s %s\n' % (wav, out)) print('Reference texts dumped to %s.txt' % args.csv) print('Transcription dumped to %s.out' % args.csv)
def evaluate(test_csvs, create_model, try_loading): scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) test_set = create_dataset(test_csvs, batch_size=FLAGS.test_batch_size, cache_path=FLAGS.test_cached_features_path) it = test_set.make_one_shot_iterator() (batch_x, batch_x_len), batch_y = it.get_next() # One rate per layer no_dropout = [None] * 6 logits, _ = create_model(batch_x=batch_x, seq_length=batch_x_len, dropout=no_dropout) # Transpose to batch major and apply softmax for decoder transposed = tf.nn.softmax(tf.transpose(logits, [1, 0, 2])) loss = tf.nn.ctc_loss(labels=batch_y, inputs=logits, sequence_length=batch_x_len) global_step = tf.train.get_or_create_global_step() with tf.Session(config=Config.session_config) as session: # Create a saver using variables from the above newly created graph saver = tf.train.Saver() # Restore variables from training checkpoint loaded = try_loading(session, saver, 'best_dev_checkpoint', 'best validation') if not loaded: loaded = try_loading(session, saver, 'checkpoint', 'most recent') if not loaded: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) logitses = [] losses = [] seq_lengths = [] ground_truths = [] print('Computing acoustic model predictions...') bar = progressbar.ProgressBar(widgets=[ 'Steps: ', progressbar.Counter(), ' | ', progressbar.Timer() ]) step_count = 0 # First pass, compute losses and transposed logits for decoding while True: try: logits, loss_, lengths, transcripts = session.run( [transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break step_count += 1 bar.update(step_count) logitses.append(logits) losses.extend(loss_) seq_lengths.append(lengths) ground_truths.extend( sparse_tensor_value_to_texts(transcripts, Config.alphabet)) bar.finish() predictions = [] # Get number of accessible CPU cores for this process try: num_processes = cpu_count() except: num_processes = 1 print('Decoding predictions...') bar = progressbar.ProgressBar(max_value=step_count, widget=progressbar.AdaptiveETA) # Second pass, decode logits and compute WER and edit distance metrics for logits, seq_length in bar(zip(logitses, seq_lengths)): decoded = ctc_beam_search_decoder_batch(logits, seq_length, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples
def evaluate(test_data, inference_graph): scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) def create_windows(features): num_strides = len(features) - (Config.n_context * 2) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2 * Config.n_context + 1 features = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, Config.n_input), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) return features # Create overlapping windows over the features test_data['features'] = test_data['features'].apply(create_windows) with tf.Session(config=Config.session_config) as session: inputs, outputs, layers = inference_graph layer_4 = layers['rnn_output'] layer_5 = layers['layer_5'] layer_6 = layers['layer_6'] # Transpose to batch major for decoder transposed = tf.transpose(outputs['outputs'], [1, 0, 2]) labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels") label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths") # We add 1 to all elements of the transcript to avoid any zero values # since we use that as an end-of-sequence token for converting the batch # into a SparseTensor. So here we convert the placeholder back into a # SparseTensor and subtract ones to get the real labels. sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph) neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape) sparse_labels = tf.sparse_add(sparse_labels, neg_ones) loss = tf.nn.ctc_loss(labels=sparse_labels, inputs=layers['raw_logits'], sequence_length=inputs['input_lengths']) # Create a saver using variables from the above newly created graph mapping = { v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_') } saver = tf.train.Saver(mapping) # Restore variables from training checkpoint checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error( 'Checkpoint directory ({}) does not contain a valid checkpoint state.' .format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) logitses = [] losses = [] ## To Print the embeddings layer_4s = [] layer_5s = [] layer_6s = [] print('Computing acoustic model predictions...') batch_count = len(test_data) // FLAGS.test_batch_size print('Batch Count: ', batch_count) bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # First pass, compute losses and transposed logits for decoding for batch in bar(split_data(test_data, FLAGS.test_batch_size)): session.run(outputs['initialize_state']) #TODO: Need to remove it to generalize for greater batch size! assert FLAGS.test_batch_size == 1, 'Embedding Extraction will only work for Batch Size = 1 for now!' features = pad_to_dense(batch['features'].values) features_len = batch['features_len'].values labels = pad_to_dense(batch['transcript'].values + 1) label_lengths = batch['transcript_len'].values logits, loss_, lay4, lay5, lay6 = session.run( [transposed, loss, layer_4, layer_5, layer_6], feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, labels_ph: labels, label_lengths_ph: label_lengths }) logitses.append(logits) losses.extend(loss_) layer_4s.append(lay4) layer_5s.append(lay5) layer_6s.append(lay6) print('Saving to Files: ') #lay4.tofile('embeddings/lay4.txt') #lay5.tofile('embeddings/lay5.txt') #lay6.tofile('embeddings/lay6.txt') # np.save('embeddings/lay41.npy', lay4) filename = batch.fname.iloc[0] save_np_array(lay4, Config.LAYER4 + filename + '.npy') save_np_array(lay5, Config.LAYER5 + filename + '.npy') save_np_array(lay6, Config.LAYER6 + filename + '.npy') # print('\nLayer 4 Shape: ', load_np_array('embeddings/lay41.npy').shape) # print('\nLayer 4 Shape: ', np.load('embeddings/lay41.npy').shape) print('Layer 5 Shape: ', lay5.shape) print('Layer 6 Shape: ', lay6.shape) print('LAYER4: ', Config.LAYER4) ground_truths = [] predictions = [] fnames = [] print('Decoding predictions...') bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # Get number of accessible CPU cores for this process try: num_processes = cpu_count() except: num_processes = 1 # Second pass, decode logits and compute WER and edit distance metrics for logits, batch in bar( zip(logitses, split_data(test_data, FLAGS.test_batch_size))): seq_lengths = batch['features_len'].values.astype(np.int32) decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) #print('Batch\n', batch) ground_truths.extend( Config.alphabet.decode(l) for l in batch['transcript']) fnames.extend([l for l in batch['fname']]) #fnames.append(batch['fname']) #print(fnames) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses, fnames) print('Sample Lengths: ', len(samples)) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print(report_samples) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss)) print('-' * 80) count = 0 for sample in report_samples: count += 1 with open(Config.TEXT + sample.fname + '.txt', 'w') as f: f.write(sample.res) print("File Name: ", sample.fname) print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) print('Total Count: ', count) return samples
def run_test(init_op, dataset): logitses = [] losses = [] seq_lengths = [] ground_truths = [] bar = create_progressbar( prefix='Computing acoustic model predictions | ', widgets=[ 'Steps: ', progressbar.Counter(), ' | ', progressbar.Timer() ]).start() log_progress('Computing acoustic model predictions...') step_count = 0 # Initialize iterator to the appropriate dataset session.run(init_op) # First pass, compute losses and transposed logits for decoding while True: try: logits, loss_, lengths, transcripts = session.run( [transposed, loss, batch_x_len, batch_y]) except tf.errors.OutOfRangeError: break step_count += 1 bar.update(step_count) logitses.append(logits) losses.extend(loss_) seq_lengths.append(lengths) ground_truths.extend( sparse_tensor_value_to_texts(transcripts, Config.alphabet)) bar.finish() predictions = [] bar = create_progressbar(max_value=step_count, prefix='Decoding predictions | ').start() log_progress('Decoding predictions...') # Second pass, decode logits and compute WER and edit distance metrics for logits, seq_length in bar(zip(logitses, seq_lengths)): decoded = ctc_beam_search_decoder_batch( logits, seq_length, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) predictions.extend(d[0][1] for d in decoded) distances = [ levenshtein(a, b) for a, b in zip(ground_truths, predictions) ] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test on %s - WER: %f, CER: %f, loss: %f' % (dataset, wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples
def evaluate(test_data, inference_graph): scorer = Scorer(FLAGS.lm_alpha, FLAGS.lm_beta, FLAGS.lm_binary_path, FLAGS.lm_trie_path, Config.alphabet) def create_windows(features): num_strides = len(features) - (Config.n_context * 2) # Create a view into the array with overlapping strides of size # numcontext (past) + 1 (present) + numcontext (future) window_size = 2*Config.n_context+1 features = np.lib.stride_tricks.as_strided( features, (num_strides, window_size, Config.n_input), (features.strides[0], features.strides[0], features.strides[1]), writeable=False) return features # Create overlapping windows over the features test_data['features'] = test_data['features'].apply(create_windows) with tf.Session(config=Config.session_config) as session: inputs, outputs, layers = inference_graph # Transpose to batch major for decoder transposed = tf.transpose(outputs['outputs'], [1, 0, 2]) labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels") label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths") # We add 1 to all elements of the transcript to avoid any zero values # since we use that as an end-of-sequence token for converting the batch # into a SparseTensor. So here we convert the placeholder back into a # SparseTensor and subtract ones to get the real labels. sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph) neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape) sparse_labels = tf.sparse_add(sparse_labels, neg_ones) loss = tf.nn.ctc_loss(labels=sparse_labels, inputs=layers['raw_logits'], sequence_length=inputs['input_lengths']) # Create a saver using variables from the above newly created graph mapping = {v.op.name: v for v in tf.global_variables() if not v.op.name.startswith('previous_state_')} saver = tf.train.Saver(mapping) # Restore variables from training checkpoint checkpoint = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if not checkpoint: log_error('Checkpoint directory ({}) does not contain a valid checkpoint state.'.format(FLAGS.checkpoint_dir)) exit(1) checkpoint_path = checkpoint.model_checkpoint_path saver.restore(session, checkpoint_path) logitses = [] losses = [] print('Computing acoustic model predictions...') batch_count = len(test_data) // FLAGS.test_batch_size bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # First pass, compute losses and transposed logits for decoding for batch in bar(split_data(test_data, FLAGS.test_batch_size)): session.run(outputs['initialize_state']) features = pad_to_dense(batch['features'].values) features_len = batch['features_len'].values labels = pad_to_dense(batch['transcript'].values + 1) label_lengths = batch['transcript_len'].values logits, loss_ = session.run([transposed, loss], feed_dict={ inputs['input']: features, inputs['input_lengths']: features_len, labels_ph: labels, label_lengths_ph: label_lengths }) logitses.append(logits) losses.extend(loss_) ground_truths = [] predictions = [] print('Decoding predictions...') bar = progressbar.ProgressBar(max_value=batch_count, widget=progressbar.AdaptiveETA) # Get number of accessible CPU cores for this process try: num_processes = cpu_count() except: num_processes = 1 # Second pass, decode logits and compute WER and edit distance metrics for logits, batch in bar(zip(logitses, split_data(test_data, FLAGS.test_batch_size))): seq_lengths = batch['features_len'].values.astype(np.int32) decoded = ctc_beam_search_decoder_batch(logits, seq_lengths, Config.alphabet, FLAGS.beam_width, num_processes=num_processes, scorer=scorer) ground_truths.extend(Config.alphabet.decode(l) for l in batch['transcript']) predictions.extend(d[0][1] for d in decoded) distances = [levenshtein(a, b) for a, b in zip(ground_truths, predictions)] wer, cer, samples = calculate_report(ground_truths, predictions, distances, losses) mean_loss = np.mean(losses) # Take only the first report_count items report_samples = itertools.islice(samples, FLAGS.report_count) print('Test - WER: %f, CER: %f, loss: %f' % (wer, cer, mean_loss)) print('-' * 80) for sample in report_samples: print('WER: %f, CER: %f, loss: %f' % (sample.wer, sample.distance, sample.loss)) print(' - src: "%s"' % sample.src) print(' - res: "%s"' % sample.res) print('-' * 80) return samples