def wer_fun(model, feat, normalizeBias):
     global args
     # Tranform the formate of KaldiDict feature data in order to forward network
     temp = E.KaldiDict()
     utts = feat.utts
     with torch.no_grad():
         for index, utt in enumerate(utts):
             data = torch.Tensor(feat[utt][:, np.newaxis, :])
             data = torch.autograd.Variable(data)
             if args.gpu >= 0:
                 data = data.cuda(args.gpu)
             out1, out2 = model(data, is_training=False, device=args.gpu)
             out = out1.cpu().detach().numpy() - normalizeBias
             temp[utt] = out
             print("(testing) Forward network {}/{}".format(
                 index, len(utts)),
                   end=" " * 20 + '\r')
     # Tansform KaldiDict to KaldiArk format
     print('(testing) Transform to ark', end=" " * 20 + '\r')
     amp = temp.ark
     # Decode and obtain lattice
     hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl'
     hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst'
     lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt'
     print('(testing) Generate Lattice', end=" " * 20 + '\r')
     lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive,
                                args.maxActive, args.maxMemory, args.beam,
                                args.latBeam, args.acwt)
     # Change language weight from 1 to 10, get the 1best words.
     print('(testing) Get 1-best words', end=" " * 20 + '\r')
     outs = lattice.get_1best(lmwt=args.minLmwt,
                              maxLmwt=args.maxLmwt,
                              outFile=args.outDir + '/outRaw')
     # If reference file is not existed, make it.
     phonemap = args.TIMITpath + '/conf/phones.60-48-39.map'
     outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format(
         phonemap)
     if not os.path.isfile(args.outDir + '/test_filt.txt'):
         refText = args.TIMITpath + '/data/test/text'
         cmd = 'cat {} | {} > {}/test_filt.txt'.format(
             refText, outFilter, args.outDir)
         (_, _) = E.run_shell_cmd(cmd)
     # Score WER and find the smallest one.
     print('(testing) Score', end=" " * 20 + '\r')
     minWER = None
     for k in range(args.minLmwt, args.maxLmwt + 1, 1):
         cmd = 'cat {} | {} > {}/test_prediction_filt.txt'.format(
             outs[k], outFilter, args.outDir)
         (_, _) = E.run_shell_cmd(cmd)
         os.remove(outs[k])
         score = E.wer('{}/test_filt.txt'.format(args.outDir),
                       "{}/test_prediction_filt.txt".format(args.outDir),
                       mode='all')
         if minWER == None or score['WER'] < minWER:
             minWER = score['WER']
     os.remove("{}/test_prediction_filt.txt".format(args.outDir))
     return minWER
Exemple #2
0
 def wer_fun(model, testFeat, normalizeBias):
     global args
     # Use decode test data to forward network
     temp = E.KaldiDict()
     print('(testing) Forward network', end=" " * 20 + '\r')
     with chainer.using_config('train', False), chainer.no_backprop_mode():
         for utt in testFeat.keys():
             data = [cp.array(testFeat[utt], dtype=cp.float32)]
             out1, out2 = model(data)
             out = F.log_softmax(out1, axis=1)
             out.to_cpu()
             temp[utt] = out.array - normalizeBias
     # Tansform KaldiDict to KaldiArk format
     print('(testing) Transform to ark', end=" " * 20 + '\r')
     amp = temp.ark
     # Decode and obtain lattice
     hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl'
     hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst'
     lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt'
     print('(testing) Generate Lattice', end=" " * 20 + '\r')
     lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive,
                                args.maxActive, args.maxMemory, args.beam,
                                args.latBeam, args.acwt)
     # Change language weight from 1 to 10, get the 1best words.
     print('(testing) Get 1-best words', end=" " * 20 + '\r')
     outs = lattice.get_1best(lmwt=args.minLmwt,
                              maxLmwt=args.maxLmwt,
                              outFile=args.outDir + '/outRaw')
     # If reference file is not existed, make it.
     phonemap = args.TIMITpath + '/conf/phones.60-48-39.map'
     outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format(
         phonemap)
     if not os.path.isfile(args.outDir + '/test_filt.txt'):
         refText = args.TIMITpath + '/data/test/text'
         cmd = 'cat {} | {} > {}/test_filt.txt'.format(
             refText, outFilter, args.outDir)
         (_, _) = E.run_shell_cmd(cmd)
     # Score WER and find the smallest one.
     print('(testing) Score WER', end=" " * 20 + '\r')
     minWER = None
     for k in range(args.minLmwt, args.maxLmwt + 1, 1):
         cmd = 'cat {} | {} > {}/test_prediction_filt.txt'.format(
             outs[k], outFilter, args.outDir)
         (_, _) = E.run_shell_cmd(cmd)
         os.remove(outs[k])
         score = E.wer('{}/test_filt.txt'.format(args.outDir),
                       "{}/test_prediction_filt.txt".format(args.outDir),
                       mode='all')
         if minWER == None or score['WER'] < minWER:
             minWER = score['WER']
         os.remove("{}/test_prediction_filt.txt".format(args.outDir))
     return minWER
def decode_test(outDimPdf=1968, outDimPho=48):

    global args

    if args.preModel == '':
        raise Exception("Expected Pretrained Model.")
    elif not os.path.isfile(args.preModel):
        raise Exception("No such file:{}.".format(args.preModel))

    print("\n############## Parameters Configure ##############")

    # Show configure information and write them to file
    def configLog(message, f):
        print(message)
        f.write(message + '\n')

    f = open(args.outDir + '/configure', "w")
    configLog(
        'Start System Time:{}'.format(
            datetime.datetime.now().strftime("%Y-%m-%d %X")), f)
    configLog('Host Name:{}'.format(socket.gethostname()), f)
    configLog('Fix Random Seed:{}'.format(args.randomSeed), f)
    configLog('GPU ID:{}'.format(args.gpu), f)
    configLog('Pretrained Model:{}'.format(args.preModel), f)
    configLog('Output Folder:{}'.format(args.outDir), f)
    configLog('Use CMVN:{}'.format(args.useCMVN), f)
    configLog('Splice N Frames:{}'.format(args.splice), f)
    configLog('Add N Deltas:{}'.format(args.delta), f)
    configLog('Normalize Chunk:{}'.format(args.normalizeChunk), f)
    configLog('Normalize AMP:{}'.format(args.normalizeAMP), f)
    configLog('Decode Minimum Active:{}'.format(args.minActive), f)
    configLog('Decode Maximum Active:{}'.format(args.maxActive), f)
    configLog('Decode Maximum Memory:{}'.format(args.maxMemory), f)
    configLog('Decode Beam:{}'.format(args.beam), f)
    configLog('Decode Lattice Beam:{}'.format(args.latBeam), f)
    configLog('Decode Acoustic Weight:{}'.format(args.acwt), f)
    configLog('Decode minimum Language Weight:{}'.format(args.minLmwt), f)
    configLog('Decode maximum Language Weight:{}'.format(args.maxLmwt), f)
    f.close()

    print("\n############## Decode Test ##############")

    #------------------ STEP 1: Load Pretrained Model ------------------

    print('Load Model...')
    # Initialize model
    featDim = 40
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)
    model = MLP(featDim, outDimPdf, outDimPho)
    chainer.serializers.load_npz(args.preModel, model)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)

    #------------------ STEP 2: Prepare Test Data ------------------

    print('Prepare decode test data...')
    # Fmllr file
    testFilePath = args.TIMITpath + '/data-fmllr-tri3/test/feats.scp'
    testFeat = E.load(testFilePath)
    # Use CMVN
    if args.useCMVN:
        testUttSpk = args.TIMITpath + '/data-fmllr-tri3/test/utt2spk'
        testCmvnState = args.TIMITpath + '/data-fmllr-tri3/test/cmvn.ark'
        testFeat = E.use_cmvn(testFeat, testCmvnState, testUttSpk)
    # Add delta
    if args.delta > 0:
        testFeat = E.add_delta(testFeat, args.delta)
    # Splice frames
    if args.splice > 0:
        testFeat = testFeat.splice(args.splice)
    # Transform to array
    testFeat = testFeat.array
    # Normalize
    if args.normalizeChunk:
        testFeat = testFeat.normalize()
    # Normalize acoustic model output
    if args.normalizeAMP:
        # Compute pdf counts in order to normalize acoustic model posterior probability.
        countFile = args.outDir + '/pdfs_counts.txt'
        # Get statistics file
        if not os.path.isfile(countFile):
            trainAliFile = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali/ali.*.gz'
            _ = E.analyze_counts(aliFile=trainAliFile, outFile=countFile)
        with open(countFile) as f:
            line = f.readline().strip().strip("[]").strip()
        # Get AMP bias value
        counts = np.array(list(map(float, line.split())), dtype=np.float32)
        normalizeBias = np.log(counts / np.sum(counts))
    else:
        normalizeBias = 0

    #------------------ STEP 3: Decode  ------------------

    temp = E.KaldiDict()
    print('Compute Test WER: Forward network', end=" " * 20 + '\r')
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        for utt in testFeat.keys():
            data = cp.array(testFeat[utt], dtype=cp.float32)
            out1, out2 = model(data)
            out = F.log_softmax(out1, axis=1)
            out.to_cpu()
            temp[utt] = out.array - normalizeBias
    # Tansform KaldiDict to KaldiArk format
    print('Compute Test WER: Transform to ark', end=" " * 20 + '\r')
    amp = temp.ark
    # Decode and obtain lattice
    hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl'
    hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst'
    lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt'
    print('Compute Test WER: Generate Lattice', end=" " * 20 + '\r')
    lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive,
                               args.maxActive, args.maxMemory, args.beam,
                               args.latBeam, args.acwt)
    # Change language weight from 1 to 10, get the 1best words.
    print('Compute Test WER: Get 1Best', end=" " * 20 + '\r')
    outs = lattice.get_1best(lmwt=args.minLmwt,
                             maxLmwt=args.maxLmwt,
                             outFile=args.outDir + '/outRaw.txt')

    #------------------ STEP 4: Score  ------------------

    # If reference file is not existed, make it.
    phonemap = args.TIMITpath + '/conf/phones.60-48-39.map'
    outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format(
        phonemap)
    if not os.path.isfile(args.outDir + '/test_filt.txt'):
        refText = args.TIMITpath + '/data/test/text'
        cmd = 'cat {} | {} > {}/test_filt.txt'.format(refText, outFilter,
                                                      args.outDir)
        (_, _) = E.run_shell_cmd(cmd)
    # Score WER and find the smallest one.
    print('Compute Test WER: compute WER', end=" " * 20 + '\r')
    minWER = (None, None)
    for k in range(args.minLmwt, args.maxLmwt + 1, 1):
        cmd = 'cat {} | {} > {}/tanslation_{}.txt'.format(
            outs[k], outFilter, args.outDir, k)
        (_, _) = E.run_shell_cmd(cmd)
        os.remove(outs[k])
        score = E.wer('{}/test_filt.txt'.format(args.outDir),
                      "{}/tanslation_{}.txt".format(args.outDir, k),
                      mode='all')
        if minWER[0] == None or score['WER'] < minWER[0]:
            minWER = (score['WER'], k)

    print("Best WER:{}% at {}/tanslation_{}.txt".format(
        minWER[0], args.outDir, k))