def decode_dataset(logits, test_dataset, batch_size, lm_alpha, lm_beta, mesh_x, mesh_y, labels, grid_index): print("Beginning decode for {}, {}".format(lm_alpha, lm_beta)) test_loader = AudioDataLoader(test_dataset, batch_size=batch_size, num_workers=0) target_decoder = GreedyDecoder(labels, blank_index=labels.index('_')) decoder = BeamCTCDecoder(labels, beam_width=args.beam_width, cutoff_top_n=args.cutoff_top_n, blank_index=labels.index('_'), lm_path=args.lm_path, alpha=lm_alpha, beta=lm_beta, num_processes=1) total_cer, total_wer = 0, 0 for i, (data) in enumerate(test_loader): inputs, targets, input_percentages, target_sizes = data # unflatten targets split_targets = [] offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size out = torch.from_numpy(logits[i][0]) sizes = torch.from_numpy(logits[i][1]) decoded_output, _, = decoder.decode(out, sizes) target_strings = target_decoder.convert_to_strings(split_targets) wer, cer = 0, 0 for x in range(len(target_strings)): transcript, reference = decoded_output[x][0], target_strings[x][0] wer_inst = decoder.wer(transcript, reference) / float(len(reference.split())) cer_inst = decoder.cer(transcript, reference) / float(len(reference)) wer += wer_inst cer += cer_inst total_cer += cer total_wer += wer wer = total_wer / len(test_loader.dataset) cer = total_cer / len(test_loader.dataset) return [grid_index, mesh_x, mesh_y, lm_alpha, lm_beta, wer, cer]
offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size _, out, output_sizes = model(inputs, input_sizes) if args.save_output: # add output to data array, and continue output_data.append((out.cpu().numpy(), output_sizes.numpy())) decoded_output, _ = decoder.decode(out, output_sizes) target_strings = target_decoder.convert_to_strings(split_targets) for x in range(len(target_strings)): transcript, reference = decoded_output[x][0], target_strings[x][0] wer_inst = decoder.wer(transcript, reference) cer_inst = decoder.cer(transcript, reference) total_wer += wer_inst total_cer += cer_inst num_tokens += len(reference.split()) num_chars += len(reference) if args.verbose: print("Ref:", reference.lower()) print("Hyp:", transcript.lower()) print("WER:", float(wer_inst) / len(reference.split()), "CER:", float(cer_inst) / len(reference), "\n") wer = float(total_wer) / num_tokens cer = float(total_cer) / num_chars
for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size if args.cuda: inputs = inputs.cuda() out = model(inputs) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) sizes = input_percentages.mul_(int(seq_length)).int() decoded_output = decoder.decode(out.data, sizes) target_strings = decoder.process_strings( decoder.convert_to_strings(split_targets)) wer, cer = 0, 0 for x in range(len(target_strings)): wer += decoder.wer(decoded_output[x], target_strings[x]) / float( len(target_strings[x].split())) cer += decoder.cer(decoded_output[x], target_strings[x]) / float( len(target_strings[x])) total_cer += cer total_wer += wer wer = total_wer / len(test_loader.dataset) cer = total_cer / len(test_loader.dataset) print('Test Summary \t' 'Average WER {wer:.3f}\t' 'Average CER {cer:.3f}\t'.format(wer=wer * 100, cer=cer * 100))
else: decoded_output, _, = decoder.decode(out.data, sizes) target_strings = target_decoder.convert_to_strings(split_targets) wer, cer = 0, 0 for x in range(len(target_strings)): transcript, reference = decoded_output[x][0], target_strings[x][0] if args.save_path or args.eval : ref_file.write(reference.encode('utf-8')+"("+audio_ids[x]+")\n") trans_file.write(transcript.encode('utf-8')+"("+audio_ids[x]+")\n") cp=cp+1 wer_inst=0 if args.eval == 'concept': # Concept error rate evaluation new_ref=convert_to_NE(reference) new_hyp=convert_to_NE(transcript) n_concept_err+= decoder.wer(new_hyp, new_ref) n_concept_ref+=len(new_ref.split()) # print " wer_inst ", wer_inst else: wer_inst = decoder.wer(transcript, reference) / float(len(reference.split())) cer_inst = decoder.cer(transcript, reference) / float(len(reference)) wer += wer_inst cer += cer_inst if args.verbose: print("Ref:", reference.lower()) print("Hyp:", transcript.lower()) print("WER:", wer_inst, "CER:", cer_inst, "\n") if args.nbest_path != None : # print " saving the ",args.beam_width , "best list " nbest_file=open(args.nbest_path,'a')
beforeDecoderTime = time.time() avgTime.append(afterInferenceTime - beforeInferenceTime) try: decoded_output, _, = decoder.decode(out.data, sizes) except Exception as e: continue target_strings = target_decoder.convert_to_strings(split_targets) wer, cer = 0, 0 afterDecoderTime = time.time() print ('inferenceTime Total {}, only decodingTime {}, model outputTime {}' ''.format((afterDecoderTime-beforeInferenceTime), (afterDecoderTime-beforeDecoderTime),(afterInferenceTime-beforeInferenceTime))) for x in range(len(target_strings)): transcript, reference = decoded_output[x][0], target_strings[x][0] wer_inst = decoder.wer(transcript, reference) / float(len(reference.split())) cer_inst = decoder.cer(transcript, reference) / float(len(reference)) wer += wer_inst cer += cer_inst if args.verbose: print("Ref:", reference.lower()) print("Hyp:", transcript.lower()) print("WER:", wer_inst, "CER:", cer_inst, "\n") total_cer += cer total_wer += wer temp = (i+1)*args.batch_size if args.verbose: print("average_wer: ", total_wer/temp,"average_cer:", total_cer/temp) if decoder is not None: wer = total_wer / len(test_loader.dataset)
if args.cuda: inputs = inputs.cuda() out, output_sizes = model(inputs, input_sizes) if decoder is None: # add output to data array, and continue output_data.append((out.numpy(), output_sizes.numpy())) continue decoded_output, _ = decoder.decode(out.data, output_sizes.data) target_strings = target_decoder.convert_to_strings(split_targets) for x in range(len(target_strings)): transcript, reference = decoded_output[x][0], target_strings[x][0] wer_inst = decoder.wer(transcript, reference) cer_inst = decoder.cer(transcript, reference) total_wer += wer_inst total_cer += cer_inst num_tokens += len(reference.split()) num_chars += len(reference) if args.verbose: print("Ref:", reference.lower()) print("Hyp:", transcript.lower()) print("WER:", float(wer_inst) / len(reference.split()), "CER:", float(cer_inst) / len(reference), "\n") if decoder is not None: wer = float(total_wer) / num_tokens cer = float(total_cer) / num_chars print('Test Summary \t'
def decode_dataset(logits, test_dataset, batch_size, lm_alpha, lm_beta, mesh_x, mesh_y, index, labels, eval): print("Beginning decode for {}, {}".format(lm_alpha, lm_beta)) test_loader = AudioDataLoader(test_dataset, batch_size=batch_size, num_workers=0) target_decoder = GreedyDecoder(labels, blank_index=labels.index('_')) decoder = BeamCTCDecoder(labels, beam_width=args.beam_width, cutoff_top_n=args.cutoff_top_n, blank_index=labels.index('_'), lm_path=args.lm_path, alpha=lm_alpha, beta=lm_beta, num_processes=1) model_name = re.sub('.json.pth.tar', '', os.path.basename(args.model_path)) ref_file = None if eval == 'concept': eval_dir = "%s/%s/%s" % (os.path.dirname( args.output_path), model_name, index) if not os.path.exists(eval_dir): os.makedirs(eval_dir) ref_file = open( "%s/%s_reference.txt" % (eval_dir, re.sub('.csv', '', os.path.basename( args.test_manifest))), 'w') trans_file = open( "%s/%s_transcription.txt" % (eval_dir, re.sub('.csv', '', os.path.basename( args.test_manifest))), 'w') total_cer, total_wer = 0, 0 for i, (data) in enumerate(test_loader): inputs, targets, input_percentages, target_sizes, audio_ids = data # unflatten targets split_targets = [] offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size out = torch.from_numpy(logits[i][0]) sizes = torch.from_numpy(logits[i][1]) decoded_output, _, _, _, _ = decoder.decode(out, sizes) target_strings = target_decoder.convert_to_strings(split_targets) wer, cer = 0, 0 for x in range(len(target_strings)): transcript, reference = decoded_output[x][0], target_strings[x][0] if eval == 'concept': ref_file.write( reference.encode('utf-8') + "(" + audio_ids[x] + ")\n") trans_file.write( transcript.encode('utf-8') + "(" + audio_ids[x] + ")\n") wer_inst = decoder.wer(transcript, reference) / float( len(reference.split())) cer_inst = decoder.cer(transcript, reference) / float( len(reference)) wer += wer_inst cer += cer_inst total_cer += cer total_wer += wer ref_file.close() trans_file.close() wer = total_wer / len(test_loader.dataset) cer = total_cer / len(test_loader.dataset) if eval == 'concept': # Concept error rate evaluation cmd = "perl /lium/buster1/ghannay/deepSpeech2/deepspeech.pytorch/data/eval.sclit_cer.pl %s" % ( eval_dir) print("cmd ", cmd) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) coner, error = p.communicate() print(" coner ", coner) return [mesh_x, mesh_y, lm_alpha, lm_beta, float(coner) / 100, cer] else: return [mesh_x, mesh_y, lm_alpha, lm_beta, wer, cer]
# unflatten targets split_targets = [] offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size if args.cuda: inputs = inputs.cuda() out = model(inputs) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) sizes = input_percentages.mul_(int(seq_length)).int() decoded_output = decoder.decode(out.data, sizes) target_strings = decoder.process_strings(decoder.convert_to_strings(split_targets)) wer, cer = 0, 0 for x in range(len(target_strings)): wer += decoder.wer(decoded_output[x], target_strings[x]) / float(len(target_strings[x].split())) cer += decoder.cer(decoded_output[x], target_strings[x]) / float(len(target_strings[x])) total_cer += cer total_wer += wer wer = total_wer / len(test_loader.dataset) cer = total_cer / len(test_loader.dataset) print('Test Summary \t' 'Average WER {wer:.3f}\t' 'Average CER {cer:.3f}\t'.format(wer=wer * 100, cer=cer * 100))
def decode_dataset(logits, test_dataset, batch_size, lm_alpha, lm_beta, mesh_x, mesh_y, labels): print("Beginning decode for {}, {}".format(lm_alpha, lm_beta)) test_loader = FeatLoader(test_dataset, batch_size=batch_size, num_workers=0) target_decoder = GreedyDecoder(labels, blank_index=labels.index('_')) decoder = BeamCTCDecoder(labels, beam_width=args.beam_width, cutoff_top_n=args.cutoff_top_n, blank_index=labels.index('_'), lm_path=args.lm_path, alpha=lm_alpha, beta=lm_beta, num_processes=1) total_cer, total_wer = 0, 0 #decoding_log = [] for i, (data) in enumerate(test_loader): inputs, targets, input_percentages, target_sizes = data # unflatten targets split_targets = [] offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size out = torch.from_numpy(logits[i][0]) sizes = torch.from_numpy(logits[i][1]) decoded_output, _ = decoder.decode(out, sizes) target_strings = target_decoder.convert_to_strings(split_targets) wer, cer = 0, 0 for x in range(len(target_strings)): transcript, reference = decoded_output[x][0], target_strings[x][0] wer_inst = decoder.wer(transcript, reference) / float( len(reference.split())) cer_inst = decoder.cer(transcript, reference) / float( len(reference)) wer += wer_inst cer += cer_inst # ver1 # write result to logFile # can't do this because multi processing code cannot do this #logFile.write('decoding : ' + transcript) #logFIle.write('reference : ' + reference) #logFile.write('WER = ' + str(wer_inst) + ', CER = ' + str(cer_inst)) if (random.uniform(0, 1) < float(args.detail_log_print_prob)): print('decoding : ' + transcript) print('reference : ' + reference) print('WER = ' + str(wer_inst) + ', CER = ' + str(cer_inst)) print(' ') #ver1 #decoding_log_sample = [] #decoding_log_sample.append(transcript) #decoding_log_sample.append(reference) #decoding_log.append(decoding_log_sample) #ver2. thread safe but does not write anything to file #logging.info('decoding : ' + transcript) #logging.info('reference : ' + reference) #logging.info('WER = ' + str(wer_inst) + ', CER = ' + str(cer_inst)) #logging.info(' ') #ver3 logger.error('decoding : ' + transcript) logger.error('reference : ' + reference) logger.error('WER = ' + str(wer_inst) + ', CER = ' + str(cer_inst)) logger.error(' ') total_cer += cer total_wer += wer wer = total_wer / len(test_loader.dataset) cer = total_cer / len(test_loader.dataset) return [mesh_x, mesh_y, lm_alpha, lm_beta, wer, cer]