def wer_fun(model, feat, normalizeBias): global args # Tranform the formate of KaldiDict feature data in order to forward network temp = E.KaldiDict() utts = feat.utts with torch.no_grad(): for index, utt in enumerate(utts): data = torch.Tensor(feat[utt][:, np.newaxis, :]) data = torch.autograd.Variable(data) if args.gpu >= 0: data = data.cuda(args.gpu) out1, out2 = model(data, is_training=False, device=args.gpu) out = out1.cpu().detach().numpy() - normalizeBias temp[utt] = out print("(testing) Forward network {}/{}".format( index, len(utts)), end=" " * 20 + '\r') # Tansform KaldiDict to KaldiArk format print('(testing) Transform to ark', end=" " * 20 + '\r') amp = temp.ark # Decode and obtain lattice hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl' hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst' lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt' print('(testing) Generate Lattice', end=" " * 20 + '\r') lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive, args.maxActive, args.maxMemory, args.beam, args.latBeam, args.acwt) # Change language weight from 1 to 10, get the 1best words. print('(testing) Get 1-best words', end=" " * 20 + '\r') outs = lattice.get_1best(lmwt=args.minLmwt, maxLmwt=args.maxLmwt, outFile=args.outDir + '/outRaw') # If reference file is not existed, make it. phonemap = args.TIMITpath + '/conf/phones.60-48-39.map' outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format( phonemap) if not os.path.isfile(args.outDir + '/test_filt.txt'): refText = args.TIMITpath + '/data/test/text' cmd = 'cat {} | {} > {}/test_filt.txt'.format( refText, outFilter, args.outDir) (_, _) = E.run_shell_cmd(cmd) # Score WER and find the smallest one. print('(testing) Score', end=" " * 20 + '\r') minWER = None for k in range(args.minLmwt, args.maxLmwt + 1, 1): cmd = 'cat {} | {} > {}/test_prediction_filt.txt'.format( outs[k], outFilter, args.outDir) (_, _) = E.run_shell_cmd(cmd) os.remove(outs[k]) score = E.wer('{}/test_filt.txt'.format(args.outDir), "{}/test_prediction_filt.txt".format(args.outDir), mode='all') if minWER == None or score['WER'] < minWER: minWER = score['WER'] os.remove("{}/test_prediction_filt.txt".format(args.outDir)) return minWER
def wer_fun(model, testFeat, normalizeBias): global args # Use decode test data to forward network temp = E.KaldiDict() print('(testing) Forward network', end=" " * 20 + '\r') with chainer.using_config('train', False), chainer.no_backprop_mode(): for utt in testFeat.keys(): data = [cp.array(testFeat[utt], dtype=cp.float32)] out1, out2 = model(data) out = F.log_softmax(out1, axis=1) out.to_cpu() temp[utt] = out.array - normalizeBias # Tansform KaldiDict to KaldiArk format print('(testing) Transform to ark', end=" " * 20 + '\r') amp = temp.ark # Decode and obtain lattice hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl' hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst' lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt' print('(testing) Generate Lattice', end=" " * 20 + '\r') lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive, args.maxActive, args.maxMemory, args.beam, args.latBeam, args.acwt) # Change language weight from 1 to 10, get the 1best words. print('(testing) Get 1-best words', end=" " * 20 + '\r') outs = lattice.get_1best(lmwt=args.minLmwt, maxLmwt=args.maxLmwt, outFile=args.outDir + '/outRaw') # If reference file is not existed, make it. phonemap = args.TIMITpath + '/conf/phones.60-48-39.map' outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format( phonemap) if not os.path.isfile(args.outDir + '/test_filt.txt'): refText = args.TIMITpath + '/data/test/text' cmd = 'cat {} | {} > {}/test_filt.txt'.format( refText, outFilter, args.outDir) (_, _) = E.run_shell_cmd(cmd) # Score WER and find the smallest one. print('(testing) Score WER', end=" " * 20 + '\r') minWER = None for k in range(args.minLmwt, args.maxLmwt + 1, 1): cmd = 'cat {} | {} > {}/test_prediction_filt.txt'.format( outs[k], outFilter, args.outDir) (_, _) = E.run_shell_cmd(cmd) os.remove(outs[k]) score = E.wer('{}/test_filt.txt'.format(args.outDir), "{}/test_prediction_filt.txt".format(args.outDir), mode='all') if minWER == None or score['WER'] < minWER: minWER = score['WER'] os.remove("{}/test_prediction_filt.txt".format(args.outDir)) return minWER
def decode_test(outDimPdf=1968, outDimPho=48): global args if args.preModel == '': raise Exception("Expected Pretrained Model.") elif not os.path.isfile(args.preModel): raise Exception("No such file:{}.".format(args.preModel)) print("\n############## Parameters Configure ##############") # Show configure information and write them to file def configLog(message, f): print(message) f.write(message + '\n') f = open(args.outDir + '/configure', "w") configLog( 'Start System Time:{}'.format( datetime.datetime.now().strftime("%Y-%m-%d %X")), f) configLog('Host Name:{}'.format(socket.gethostname()), f) configLog('Fix Random Seed:{}'.format(args.randomSeed), f) configLog('GPU ID:{}'.format(args.gpu), f) configLog('Pretrained Model:{}'.format(args.preModel), f) configLog('Output Folder:{}'.format(args.outDir), f) configLog('Use CMVN:{}'.format(args.useCMVN), f) configLog('Splice N Frames:{}'.format(args.splice), f) configLog('Add N Deltas:{}'.format(args.delta), f) configLog('Normalize Chunk:{}'.format(args.normalizeChunk), f) configLog('Normalize AMP:{}'.format(args.normalizeAMP), f) configLog('Decode Minimum Active:{}'.format(args.minActive), f) configLog('Decode Maximum Active:{}'.format(args.maxActive), f) configLog('Decode Maximum Memory:{}'.format(args.maxMemory), f) configLog('Decode Beam:{}'.format(args.beam), f) configLog('Decode Lattice Beam:{}'.format(args.latBeam), f) configLog('Decode Acoustic Weight:{}'.format(args.acwt), f) configLog('Decode minimum Language Weight:{}'.format(args.minLmwt), f) configLog('Decode maximum Language Weight:{}'.format(args.maxLmwt), f) f.close() print("\n############## Decode Test ##############") #------------------ STEP 1: Load Pretrained Model ------------------ print('Load Model...') # Initialize model featDim = 40 if args.delta > 0: featDim *= (args.delta + 1) if args.splice > 0: featDim *= (2 * args.splice + 1) model = MLP(featDim, outDimPdf, outDimPho) chainer.serializers.load_npz(args.preModel, model) if args.gpu >= 0: model.to_gpu(args.gpu) #------------------ STEP 2: Prepare Test Data ------------------ print('Prepare decode test data...') # Fmllr file testFilePath = args.TIMITpath + '/data-fmllr-tri3/test/feats.scp' testFeat = E.load(testFilePath) # Use CMVN if args.useCMVN: testUttSpk = args.TIMITpath + '/data-fmllr-tri3/test/utt2spk' testCmvnState = args.TIMITpath + '/data-fmllr-tri3/test/cmvn.ark' testFeat = E.use_cmvn(testFeat, testCmvnState, testUttSpk) # Add delta if args.delta > 0: testFeat = E.add_delta(testFeat, args.delta) # Splice frames if args.splice > 0: testFeat = testFeat.splice(args.splice) # Transform to array testFeat = testFeat.array # Normalize if args.normalizeChunk: testFeat = testFeat.normalize() # Normalize acoustic model output if args.normalizeAMP: # Compute pdf counts in order to normalize acoustic model posterior probability. countFile = args.outDir + '/pdfs_counts.txt' # Get statistics file if not os.path.isfile(countFile): trainAliFile = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali/ali.*.gz' _ = E.analyze_counts(aliFile=trainAliFile, outFile=countFile) with open(countFile) as f: line = f.readline().strip().strip("[]").strip() # Get AMP bias value counts = np.array(list(map(float, line.split())), dtype=np.float32) normalizeBias = np.log(counts / np.sum(counts)) else: normalizeBias = 0 #------------------ STEP 3: Decode ------------------ temp = E.KaldiDict() print('Compute Test WER: Forward network', end=" " * 20 + '\r') with chainer.using_config('train', False), chainer.no_backprop_mode(): for utt in testFeat.keys(): data = cp.array(testFeat[utt], dtype=cp.float32) out1, out2 = model(data) out = F.log_softmax(out1, axis=1) out.to_cpu() temp[utt] = out.array - normalizeBias # Tansform KaldiDict to KaldiArk format print('Compute Test WER: Transform to ark', end=" " * 20 + '\r') amp = temp.ark # Decode and obtain lattice hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl' hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst' lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt' print('Compute Test WER: Generate Lattice', end=" " * 20 + '\r') lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive, args.maxActive, args.maxMemory, args.beam, args.latBeam, args.acwt) # Change language weight from 1 to 10, get the 1best words. print('Compute Test WER: Get 1Best', end=" " * 20 + '\r') outs = lattice.get_1best(lmwt=args.minLmwt, maxLmwt=args.maxLmwt, outFile=args.outDir + '/outRaw.txt') #------------------ STEP 4: Score ------------------ # If reference file is not existed, make it. phonemap = args.TIMITpath + '/conf/phones.60-48-39.map' outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format( phonemap) if not os.path.isfile(args.outDir + '/test_filt.txt'): refText = args.TIMITpath + '/data/test/text' cmd = 'cat {} | {} > {}/test_filt.txt'.format(refText, outFilter, args.outDir) (_, _) = E.run_shell_cmd(cmd) # Score WER and find the smallest one. print('Compute Test WER: compute WER', end=" " * 20 + '\r') minWER = (None, None) for k in range(args.minLmwt, args.maxLmwt + 1, 1): cmd = 'cat {} | {} > {}/tanslation_{}.txt'.format( outs[k], outFilter, args.outDir, k) (_, _) = E.run_shell_cmd(cmd) os.remove(outs[k]) score = E.wer('{}/test_filt.txt'.format(args.outDir), "{}/tanslation_{}.txt".format(args.outDir, k), mode='all') if minWER[0] == None or score['WER'] < minWER[0]: minWER = (score['WER'], k) print("Best WER:{}% at {}/tanslation_{}.txt".format( minWER[0], args.outDir, k))