예제 #1
0
def process_feat_ali(training=True):

  if training:
    Name = "train"
  else:
    Name = "dev"

  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/{Name}/fmllr.ark" )

  if args.useCMVN:
      cmvn = exkaldi.load_cmvn(f"{args.expDir}/train_lstm/data/{Name}/cmvn_of_fmllr.ark")
      feat = exkaldi.use_cmvn(feat,cmvn,f"{args.expDir}/train_lstm/data/{Name}/utt2spk")
      del cmvn
  
  if args.delta > 0:
      feat = feat.add_delta(args.delta)

  if args.splice > 0:
      feat = feat.splice(args.splice)

  feat = feat.to_numpy()

  if args.normalizeFeat:
      feat = feat.normalize(std=True)
  
  pdfAli = exkaldi.load_ali( f"{args.expDir}/train_lstm/data/{Name}/pdfID.npy" )
  phoneAli = exkaldi.load_ali( f"{args.expDir}/train_lstm/data/{Name}/phoneID.npy" )
  
  feat.rename("feat")
  pdfAli.rename("pdfID")
  phoneAli.rename("phoneID")

  return feat, pdfAli, phoneAli
예제 #2
0
def prepare_test_data(postProbDim):

  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" )

  if args.useCMVN:
    cmvn = exkaldi.load_cmvn( f"{args.expDir}/train_lstm/data/test/cmvn_of_fmllr.ark" )
    feat = exkaldi.use_cmvn(feat, cmvn, utt2spk=f"{args.expDir}/train_lstm/data/test/utt2spk")
    del cmvn

  if args.delta > 0:
    feat = feat.add_delta(args.delta)

  if args.splice > 0:
    feat = feat.splice(args.splice)

  feat = feat.to_numpy()
  if args.normalizeFeat:
    feat = feat.normalize(std=True)

  # Normalize acoustic model output
  if args.normalizeAMP:
    ali = exkaldi.load_ali(f"{args.expDir}/train_lstm/data/train/pdfID.npy", aliType="pdfID")
    normalizeBias = exkaldi.nn.compute_postprob_norm(ali,postProbDim)
  else:
    normalizeBias = 0
  
  # ref transcription
  trans = exkaldi.load_transcription(f"{args.expDir}/train_lstm/data/test/text")
  convertTable = exkaldi.load_list_table(f"{args.expDir}/dict/phones.48_to_39.map")
  trans = trans.convert(convertTable)

  return feat, normalizeBias, trans
예제 #3
0
 def loadChunkData(iterator, feat, otherArgs):
     # <feat> is KaldiArk object
     global args
     uttSpk, cmvnState, labelPdf, labelPho, toDo = otherArgs
     # use CMVN
     if args.useCMVN:
         feat = E.use_cmvn(feat, cmvnState, uttSpk)
     # Add delta
     if args.delta > 0:
         feat = E.add_delta(feat, args.delta)
     # Splice front-back n frames
     if args.splice > 0:
         feat = feat.splice(args.splice)
     # Transform to KaldiDict and sort them by frame length
     feat = feat.array.sort(by='frame')
     # Normalize
     if args.normalizeChunk:
         feat = feat.normalize()
     # Concatenate label
     datas = feat.concat([labelPdf, labelPho], axis=1)
     # cut frames
     if toDo == 'train':
         if iterator.epoch >= 4:
             datas = datas.cut(1000)
         elif iterator.epoch >= 3:
             datas = datas.cut(800)
         elif iterator.epoch >= 2:
             datas = datas.cut(400)
         elif iterator.epoch >= 1:
             datas = datas.cut(200)
         elif iterator.epoch >= 0:
             datas = datas.cut(100)
     # Transform trainable numpy data
     datas, _ = datas.merge(keepDim=True, sortFrame=True)
     return datas
예제 #4
0
def compute_mfcc():

    featOutDir = os.path.join("exp", "mfcc")
    exkaldi.utils.make_dependent_dirs(featOutDir, pathIsFile=False)

    for Name in ["train", "dev", "test"]:
        print(f"Compute {Name} MFCC feature.")
        exkaldi.utils.make_dependent_dirs(os.path.join(featOutDir, Name),
                                          pathIsFile=False)

        # Compute feature
        feat = exkaldi.compute_mfcc(
            wavFile=os.path.join("exp", "data", Name, "wav.scp"),
            config={"--use-energy": "false"},
        )
        feat.save(os.path.join(featOutDir, Name, "raw_mfcc.ark"))
        print(f"Generate raw MFCC feature done.")
        # Compute CMVN
        cmvn = exkaldi.compute_cmvn_stats(
            feat=feat,
            spk2utt=os.path.join("exp", "data", Name, "spk2utt"),
        )
        cmvn.save(os.path.join(featOutDir, Name, "cmvn.ark"))
        print(f"Generate CMVN statistics done.")
        # Apply CMVN
        feat = exkaldi.use_cmvn(
            feat=feat,
            cmvn=cmvn,
            utt2spk=os.path.join("exp", "data", Name, "utt2spk"),
        )
        feat.save(os.path.join(featOutDir, Name, "mfcc_cmvn.ark"))
        print(f"Generate MFCC feature (applied CMVN) done.")
    print("Compute MFCC done.")
예제 #5
0
def prepare_data(training=True):

    if training:
        flag = "train_clean_5"
    else:
        flag = "dev_clean_2"

    print(f"{flag}: Load feature...")
    featsFile = f"{args.root}/{args.feat}/raw_{args.feat}_{flag}.*.ark"
    feats = exkaldi.load_feat(featsFile)

    if args.cmn:
        print(f"{flag}: Use cmvn...")
        cmvnFile = f"{args.root}/{args.feat}/cmvn_{flag}.ark"
        cmvn = exkaldi.load_cmvn(cmvnFile)
        feats = exkaldi.use_cmvn(feats,
                                 cmvn,
                                 utt2spk=f"{args.root}/data/{flag}/utt2spk")
        del cmvn

    if args.delta > 0:
        print(f"{flag}: Add delta...")
        feats = feats.add_delta(args.delta)

    if args.splice > 0:
        print(f"{flag}: Splice feature...")
        feats = feats.splice(args.splice)

    feats = feats.to_numpy()
    featDim = feats.dim

    print(f"{flag}: Load alignment...")
    ali = exkaldi.load_ali(f"{args.feat}/exp/tri3b_ali_{flag}/ali.*.gz")
    print(f"{flag}: Get pdf alignment...")
    pdfAli = ali.to_numpy(aliType="pdfID",
                          hmm=f"{args.feat}/exp/tri3b_ali_{flag}/final.mdl")
    del ali

    feats.rename("feat")
    pdfAli.rename("pdfID")
    #phoneAli.rename("phoneID")
    print(f"{flag}: Tuple dataset...")
    dataset = exkaldi.tuple_dataset([feats, pdfAli], frameLevel=True)
    random.shuffle(dataset)

    return featDim, dataset
 def loadChunkData(iterator, feat, otherArgs):
     # <feat> is a KaldiArk object
     global args
     uttSpk, cmvnState, labelPdf, labelPho = otherArgs
     # use CMVN
     if args.useCMVN:
         feat = E.use_cmvn(feat, cmvnState, uttSpk)
     # Add delta
     if args.delta > 0:
         feat = E.add_delta(feat, args.delta)
     # Splice front-back n frames
     if args.splice > 0:
         feat = feat.splice(args.splice)
     # Transform to KaldiDict
     feat = feat.array
     # Normalize
     if args.normalizeChunk:
         feat = feat.normalize()
     # Concatenate data-label in dimension
     datas = feat.concat([labelPdf, labelPho], axis=1)
     # Transform to trainable numpy data
     datas, _ = datas.merge(keepDim=False, sort=False)
     return datas
def decode_test(outDimPdf=1968, outDimPho=48):

    global args

    if args.preModel == '':
        raise Exception("Expected Pretrained Model.")
    elif not os.path.isfile(args.preModel):
        raise Exception("No such file:{}.".format(args.preModel))

    print("\n############## Parameters Configure ##############")

    # Show configure information and write them to file
    def configLog(message, f):
        print(message)
        f.write(message + '\n')

    f = open(args.outDir + '/configure', "w")
    configLog(
        'Start System Time:{}'.format(
            datetime.datetime.now().strftime("%Y-%m-%d %X")), f)
    configLog('Host Name:{}'.format(socket.gethostname()), f)
    configLog('Fix Random Seed:{}'.format(args.randomSeed), f)
    configLog('GPU ID:{}'.format(args.gpu), f)
    configLog('Pretrained Model:{}'.format(args.preModel), f)
    configLog('Output Folder:{}'.format(args.outDir), f)
    configLog('Use CMVN:{}'.format(args.useCMVN), f)
    configLog('Splice N Frames:{}'.format(args.splice), f)
    configLog('Add N Deltas:{}'.format(args.delta), f)
    configLog('Normalize Chunk:{}'.format(args.normalizeChunk), f)
    configLog('Normalize AMP:{}'.format(args.normalizeAMP), f)
    configLog('Decode Minimum Active:{}'.format(args.minActive), f)
    configLog('Decode Maximum Active:{}'.format(args.maxActive), f)
    configLog('Decode Maximum Memory:{}'.format(args.maxMemory), f)
    configLog('Decode Beam:{}'.format(args.beam), f)
    configLog('Decode Lattice Beam:{}'.format(args.latBeam), f)
    configLog('Decode Acoustic Weight:{}'.format(args.acwt), f)
    configLog('Decode minimum Language Weight:{}'.format(args.minLmwt), f)
    configLog('Decode maximum Language Weight:{}'.format(args.maxLmwt), f)
    f.close()

    print("\n############## Decode Test ##############")

    #------------------ STEP 1: Load Pretrained Model ------------------

    print('Load Model...')
    # Initialize model
    featDim = 40
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)
    model = MLP(featDim, outDimPdf, outDimPho)
    chainer.serializers.load_npz(args.preModel, model)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)

    #------------------ STEP 2: Prepare Test Data ------------------

    print('Prepare decode test data...')
    # Fmllr file
    testFilePath = args.TIMITpath + '/data-fmllr-tri3/test/feats.scp'
    testFeat = E.load(testFilePath)
    # Use CMVN
    if args.useCMVN:
        testUttSpk = args.TIMITpath + '/data-fmllr-tri3/test/utt2spk'
        testCmvnState = args.TIMITpath + '/data-fmllr-tri3/test/cmvn.ark'
        testFeat = E.use_cmvn(testFeat, testCmvnState, testUttSpk)
    # Add delta
    if args.delta > 0:
        testFeat = E.add_delta(testFeat, args.delta)
    # Splice frames
    if args.splice > 0:
        testFeat = testFeat.splice(args.splice)
    # Transform to array
    testFeat = testFeat.array
    # Normalize
    if args.normalizeChunk:
        testFeat = testFeat.normalize()
    # Normalize acoustic model output
    if args.normalizeAMP:
        # Compute pdf counts in order to normalize acoustic model posterior probability.
        countFile = args.outDir + '/pdfs_counts.txt'
        # Get statistics file
        if not os.path.isfile(countFile):
            trainAliFile = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali/ali.*.gz'
            _ = E.analyze_counts(aliFile=trainAliFile, outFile=countFile)
        with open(countFile) as f:
            line = f.readline().strip().strip("[]").strip()
        # Get AMP bias value
        counts = np.array(list(map(float, line.split())), dtype=np.float32)
        normalizeBias = np.log(counts / np.sum(counts))
    else:
        normalizeBias = 0

    #------------------ STEP 3: Decode  ------------------

    temp = E.KaldiDict()
    print('Compute Test WER: Forward network', end=" " * 20 + '\r')
    with chainer.using_config('train', False), chainer.no_backprop_mode():
        for utt in testFeat.keys():
            data = cp.array(testFeat[utt], dtype=cp.float32)
            out1, out2 = model(data)
            out = F.log_softmax(out1, axis=1)
            out.to_cpu()
            temp[utt] = out.array - normalizeBias
    # Tansform KaldiDict to KaldiArk format
    print('Compute Test WER: Transform to ark', end=" " * 20 + '\r')
    amp = temp.ark
    # Decode and obtain lattice
    hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl'
    hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst'
    lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt'
    print('Compute Test WER: Generate Lattice', end=" " * 20 + '\r')
    lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive,
                               args.maxActive, args.maxMemory, args.beam,
                               args.latBeam, args.acwt)
    # Change language weight from 1 to 10, get the 1best words.
    print('Compute Test WER: Get 1Best', end=" " * 20 + '\r')
    outs = lattice.get_1best(lmwt=args.minLmwt,
                             maxLmwt=args.maxLmwt,
                             outFile=args.outDir + '/outRaw.txt')

    #------------------ STEP 4: Score  ------------------

    # If reference file is not existed, make it.
    phonemap = args.TIMITpath + '/conf/phones.60-48-39.map'
    outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format(
        phonemap)
    if not os.path.isfile(args.outDir + '/test_filt.txt'):
        refText = args.TIMITpath + '/data/test/text'
        cmd = 'cat {} | {} > {}/test_filt.txt'.format(refText, outFilter,
                                                      args.outDir)
        (_, _) = E.run_shell_cmd(cmd)
    # Score WER and find the smallest one.
    print('Compute Test WER: compute WER', end=" " * 20 + '\r')
    minWER = (None, None)
    for k in range(args.minLmwt, args.maxLmwt + 1, 1):
        cmd = 'cat {} | {} > {}/tanslation_{}.txt'.format(
            outs[k], outFilter, args.outDir, k)
        (_, _) = E.run_shell_cmd(cmd)
        os.remove(outs[k])
        score = E.wer('{}/test_filt.txt'.format(args.outDir),
                      "{}/tanslation_{}.txt".format(args.outDir, k),
                      mode='all')
        if minWER[0] == None or score['WER'] < minWER[0]:
            minWER = (score['WER'], k)

    print("Best WER:{}% at {}/tanslation_{}.txt".format(
        minWER[0], args.outDir, k))
def train_model():

    global args

    print("\n############## Parameters Configure ##############")

    # Show configure information and write them to file
    def configLog(message, f):
        print(message)
        f.write(message + '\n')

    f = open(args.outDir + '/configure', "w")
    configLog(
        'Start System Time:{}'.format(
            datetime.datetime.now().strftime("%Y-%m-%d %X")), f)
    configLog('Host Name:{}'.format(socket.gethostname()), f)
    configLog('Fix Random Seed:{}'.format(args.randomSeed), f)
    configLog('Mini Batch Size:{}'.format(args.batchSize), f)
    configLog('GPU ID:{}'.format(args.gpu), f)
    configLog('Train Epochs:{}'.format(args.epoch), f)
    configLog('Output Folder:{}'.format(args.outDir), f)
    configLog('Dropout Rate:{}'.format(args.dropout), f)
    configLog('Use CMVN:{}'.format(args.useCMVN), f)
    configLog('Splice N Frames:{}'.format(args.splice), f)
    configLog('Add N Deltas:{}'.format(args.delta), f)
    configLog('Normalize Chunk:{}'.format(args.normalizeChunk), f)
    configLog('Normalize AMP:{}'.format(args.normalizeAMP), f)
    configLog('Decode Minimum Active:{}'.format(args.minActive), f)
    configLog('Decode Maximum Active:{}'.format(args.maxActive), f)
    configLog('Decode Maximum Memory:{}'.format(args.maxMemory), f)
    configLog('Decode Beam:{}'.format(args.beam), f)
    configLog('Decode Lattice Beam:{}'.format(args.latBeam), f)
    configLog('Decode Acoustic Weight:{}'.format(args.acwt), f)
    configLog('Decode minimum Language Weight:{}'.format(args.minLmwt), f)
    configLog('Decode maximum Language Weight:{}'.format(args.maxLmwt), f)
    f.close()

    print("\n############## Train DNN Acoustic Model ##############")

    #------------------------ STEP 1: Prepare Training and Validation Data -----------------------------

    print('Prepare Data Iterator...')
    # Prepare fMLLR feature files
    trainScpFile = args.TIMITpath + '/data-fmllr-tri3/train/feats.scp'
    devScpFile = args.TIMITpath + '/data-fmllr-tri3/dev/feats.scp'
    # Prepare training labels (alignment data)
    trainAliFile = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali/ali.*.gz'
    trainLabelPdf = E.get_ali(trainAliFile)
    trainLabelPho = E.get_ali(trainAliFile, returnPhone=True)
    for i in trainLabelPho.keys():
        trainLabelPho[i] = trainLabelPho[i] - 1
    # Prepare validation labels (alignment data)
    devAliFile = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_dev/ali.*.gz'
    devLabelPdf = E.get_ali(devAliFile)
    devLabelPho = E.get_ali(devAliFile, returnPhone=True)
    for i in devLabelPho.keys():
        devLabelPho[i] = devLabelPho[i] - 1
    # prepare CMVN files
    trainUttSpk = args.TIMITpath + '/data-fmllr-tri3/train/utt2spk'
    trainCmvnState = args.TIMITpath + '/data-fmllr-tri3/train/cmvn.ark'
    devUttSpk = args.TIMITpath + '/data-fmllr-tri3/dev/utt2spk'
    devCmvnState = args.TIMITpath + '/data-fmllr-tri3/dev/cmvn.ark'

    # Now we try to make training-iterator and validation-iterator.
    # Firstly, customize a function to process feature data.
    def loadChunkData(iterator, feat, otherArgs):
        # <feat> is a KaldiArk object
        global args
        uttSpk, cmvnState, labelPdf, labelPho = otherArgs
        # use CMVN
        if args.useCMVN:
            feat = E.use_cmvn(feat, cmvnState, uttSpk)
        # Add delta
        if args.delta > 0:
            feat = E.add_delta(feat, args.delta)
        # Splice front-back n frames
        if args.splice > 0:
            feat = feat.splice(args.splice)
        # Transform to KaldiDict
        feat = feat.array
        # Normalize
        if args.normalizeChunk:
            feat = feat.normalize()
        # Concatenate data-label in dimension
        datas = feat.concat([labelPdf, labelPho], axis=1)
        # Transform to trainable numpy data
        datas, _ = datas.merge(keepDim=False, sort=False)
        return datas

    # Then get data iterators
    train = E.DataIterator(trainScpFile,
                           loadChunkData,
                           args.batchSize,
                           chunks=5,
                           shuffle=True,
                           otherArgs=(trainUttSpk, trainCmvnState,
                                      trainLabelPdf, trainLabelPho))
    print('Generate train dataset done. Chunks:{} / Batch size:{}'.format(
        train.chunks, train.batchSize))
    dev = E.DataIterator(devScpFile,
                         loadChunkData,
                         args.batchSize,
                         chunks=1,
                         shuffle=False,
                         otherArgs=(devUttSpk, devCmvnState, devLabelPdf,
                                    devLabelPho))
    print(
        'Generate validation dataset done. Chunks:{} / Batch size:{}.'.format(
            dev.chunks, dev.batchSize))

    #--------------------------------- STEP 2: Prepare Model --------------------------

    print('Prepare Model...')
    # Initialize model
    featDim = 40
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)
    model = MLP(featDim, trainLabelPdf.target, trainLabelPho.target)
    if args.gpu >= 0:
        model.to_gpu(args.gpu)
    # Initialize optimizer
    lr = [(0, 0.08), (10, 0.04), (15, 0.02), (17, 0.01), (19, 0.005),
          (22, 0.0025), (25, 0.001)]
    print('Learning Rate (epoch,newLR):', lr)
    optimizer = chainer.optimizers.MomentumSGD(lr[0][1], momentum=0.0)
    lr.pop(0)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(0.0))
    # Prepare a supporter to help handling training information.
    supporter = E.Supporter(args.outDir)

    #------------------ STEP 3: Prepare Decoding Test Data and Function ------------------

    print('Prepare decoding test data...')
    # fMLLR file of test data
    testFilePath = args.TIMITpath + '/data-fmllr-tri3/test/feats.scp'
    testFeat = E.load(testFilePath)
    # Using CMVN
    if args.useCMVN:
        testUttSpk = args.TIMITpath + '/data-fmllr-tri3/test/utt2spk'
        testCmvnState = args.TIMITpath + '/data-fmllr-tri3/test/cmvn.ark'
        testFeat = E.use_cmvn(testFeat, testCmvnState, testUttSpk)
    # Add delta
    if args.delta > 0:
        testFeat = E.add_delta(testFeat, args.delta)
    # Splice frames
    if args.splice > 0:
        testFeat = testFeat.splice(args.splice)
    # Transform to array
    testFeat = testFeat.array
    # Normalize
    if args.normalizeChunk:
        testFeat = testFeat.normalize()
    # Normalize acoustic model output
    if args.normalizeAMP:
        # Compute pdf counts in order to normalize acoustic model posterior probability.
        countFile = args.outDir + '/pdfs_counts.txt'
        # Get statistics file
        if not os.path.isfile(countFile):
            _ = E.analyze_counts(aliFile=trainAliFile, outFile=countFile)
        with open(countFile) as f:
            line = f.readline().strip().strip("[]").strip()
        # Get AMP bias value
        counts = np.array(list(map(float, line.split())), dtype=np.float32)
        normalizeBias = np.log(counts / np.sum(counts))
    else:
        normalizeBias = 0
    # Now, design a function to compute WER score
    def wer_fun(model, testFeat, normalizeBias):
        global args
        # Use decode test data to forward network
        temp = E.KaldiDict()
        print('(testing) Forward network', end=" " * 20 + '\r')
        with chainer.using_config('train', False), chainer.no_backprop_mode():
            for utt in testFeat.keys():
                data = cp.array(testFeat[utt], dtype=cp.float32)
                out1, out2 = model(data)
                out = F.log_softmax(out1, axis=1)
                out.to_cpu()
                temp[utt] = out.array - normalizeBias
        # Tansform KaldiDict to KaldiArk format
        print('(testing) Transform to ark', end=" " * 20 + '\r')
        amp = temp.ark
        # Decoding to obtain a lattice
        hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl'
        hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst'
        lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt'
        print('(testing) Generate Lattice', end=" " * 20 + '\r')
        lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive,
                                   args.maxActive, args.maxMemory, args.beam,
                                   args.latBeam, args.acwt)
        # Change language weight from 1 to 10, get the 1best words.
        print('(testing) Get 1-best words', end=" " * 20 + '\r')
        outs = lattice.get_1best(lmwt=args.minLmwt,
                                 maxLmwt=args.maxLmwt,
                                 outFile=args.outDir + '/outRaw.txt')
        # If reference file is not existed, make it.
        phonemap = args.TIMITpath + '/conf/phones.60-48-39.map'
        outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format(
            phonemap)
        if not os.path.isfile(args.outDir + '/test_filt.txt'):
            refText = args.TIMITpath + '/data/test/text'
            cmd = 'cat {} | {} > {}/test_filt.txt'.format(
                refText, outFilter, args.outDir)
            (_, _) = E.run_shell_cmd(cmd)
        # Score WER and find the smallest one.
        print('(testing) Score', end=" " * 20 + '\r')
        minWER = None
        for k in range(args.minLmwt, args.maxLmwt + 1, 1):
            cmd = 'cat {} | {} > {}/test_prediction_filt.txt'.format(
                outs[k], outFilter, args.outDir)
            (_, _) = E.run_shell_cmd(cmd)
            os.remove(outs[k])
            score = E.wer('{}/test_filt.txt'.format(args.outDir),
                          "{}/test_prediction_filt.txt".format(args.outDir),
                          mode='all')
            if minWER == None or score['WER'] < minWER:
                minWER = score['WER']
        return minWER

    #-------------------------- STEP 4: Train Model ---------------------------

    # While first epoch, the epoch size is computed gradually, so the prograss information will be inaccurate.
    print('Now Start to Train')
    print(
        'Note that: The first epoch will be doing the statistics of total data size gradually.'
    )
    print(
        'Note that: We will evaluate the WER of test dataset after epoch which will cost a few seconds.'
    )

    # Preprocessing batch data which is getten from data iterator
    def convert(batch):
        batch = cp.array(batch, dtype=cp.float32)
        data = batch[:, 0:-2]
        label1 = cp.array(batch[:, -2], dtype=cp.int32)
        label2 = cp.array(batch[:, -1], dtype=cp.int32)
        return data, label1, label2

    # We will save model during training loop, so prepare a model-save function
    def saveFunc(fileName, model):
        global args
        copymodel = model.copy()
        if args.gpu >= 0:
            copymodel.to_cpu()
        chainer.serializers.save_npz(fileName, copymodel)

    # Start training loop
    for e in range(args.epoch):
        supporter.send_report({'epoch': e})
        print()
        i = 1
        usedTime = 0
        # Train
        while True:
            start = time.time()
            # Get data >> Forward network >> Loss back propagation >> Update
            batch = train.next()
            data, label1, label2 = convert(batch)
            with chainer.using_config('Train', True):
                h1, h2 = model(data)
                L1 = F.softmax_cross_entropy(h1, label1)
                L2 = F.softmax_cross_entropy(h2, label2)
                loss = L1 + L2
                acc = F.accuracy(F.softmax(h1, axis=1), label1)
            model.cleargrads()
            loss.backward()
            optimizer.update()
            # Compute time cost
            ut = time.time() - start
            usedTime += ut
            print(
                "(training) Epoch:{}>>>{}% Chunk:{}>>>{}% Iter:{} Used-time:{}s Batch-loss:{} Speed:{}iters/s"
                .format(e, int(100 * train.epochProgress), train.chunk,
                        int(100 * train.chunkProgress), i, int(usedTime),
                        "%.4f" % (float(loss.array)), "%.2f" % (1 / ut)),
                end=" " * 5 + '\r')
            i += 1
            supporter.send_report({'train_loss': loss, 'train_acc': acc})
            # If forward all data, break
            if train.isNewEpoch:
                break
        print()
        i = 1
        usedTime = 0
        # Validate
        while True:
            start = time.time()
            # Get data >> Forward network >> Score
            batch = dev.next()
            data, label1, label2 = convert(batch)
            with chainer.using_config('train',
                                      False), chainer.no_backprop_mode():
                h1, h2 = model(data)
                loss = F.softmax_cross_entropy(h1, label1)
                acc = F.accuracy(F.softmax(h1, axis=1), label1)
            # Compute time cost
            ut = time.time() - start
            usedTime += ut
            print(
                "(Validating) Epoch:{}>>>{}% Chunk:{}>>>{}% Iter:{} Used-time:{}s Batch-loss:{} Speed:{}iters/s"
                .format(e, int(100 * dev.epochProgress), dev.chunk,
                        int(100 * dev.chunkProgress), i, int(usedTime),
                        "%.4f" % (float(loss.array)), "%.2f" % (1 / ut)),
                end=" " * 5 + '\r')
            i += 1
            supporter.send_report({'dev_loss': loss, 'dev_acc': acc})
            # If forward all data, break
            if dev.isNewEpoch:
                break
        print()
        # Compute WER score
        WERscore = wer_fun(model, testFeat, normalizeBias)
        supporter.send_report({'test_wer': WERscore, 'lr': optimizer.lr})
        # Collect all information of this epoch that is reported before, and show them at display
        supporter.collect_report(plot=True)
        # Save model
        supporter.save_model(saveFunc, models={'MLP': model})
        # Change learning rate
        if len(lr) > 0 and supporter.judge('epoch', '>=', lr[0][0]):
            optimizer.lr = lr[0][1]
            lr.pop(0)

    print("DNN Acoustic Model training done.")
    print("The final model has been saved as:", supporter.finalModel)
    print('Over System Time:', datetime.datetime.now().strftime("%Y-%m-%d %X"))
예제 #9
0
def main():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.describe(
        "This program is used to compute MFCC feature and CMVN statistics")
    # 2. Add options
    args.add("--expDir",
             abbr="-e",
             dtype=str,
             default="exp",
             discription="The data and output path of current experiment.")
    args.add("--useEnergy",
             abbr="-u",
             dtype=bool,
             default=False,
             discription="Whether add energy to MFCC feature.")
    args.add(
        "--parallel",
        abbr="-p",
        dtype=int,
        default=4,
        minV=1,
        maxV=10,
        discription=
        "The number of parallel process to compute train feature of train dataset."
    )
    # 3. Then start to parse arguments.
    args.parse()
    # 4. Take a backup of arguments
    args.print_args()  # print arguments to display
    argsLogFile = os.path.join(args.expDir, "conf", "compute_mfcc.args")
    args.save(argsLogFile)

    # ---------- Compute mfcc feature of train, dev and test dataset -----------
    if args.useEnergy:
        mfccConfig = {"--use-energy": "true"}
    else:
        mfccConfig = {"--use-energy": "false"}

    for Name in ["train", "dev", "test"]:
        print(f"Compute {Name} MFCC feature.")

        # 1. compute feature
        if Name == "train" and args.parallel > 1:  # use mutiple processes
            wavFiles = exkaldi.utils.split_txt_file(
                os.path.join(args.expDir, "data", "train", "wav.scp"),
                chunks=args.parallel,
            )
            feats = exkaldi.compute_mfcc(wavFiles,
                                         config=mfccConfig,
                                         outFile=os.path.join(
                                             args.expDir, "mfcc", "train",
                                             "raw_mfcc.ark"))
            feat = exkaldi.merge_archives(feats)
        else:
            feat = exkaldi.compute_mfcc(
                os.path.join(args.expDir, "data", Name, "wav.scp"),
                config=mfccConfig,
            )
            feat.save(os.path.join(args.expDir, "mfcc", Name, "raw_mfcc.ark"))
        print(f"Generate raw MFCC feature done.")
        # Compute CMVN
        cmvn = exkaldi.compute_cmvn_stats(
            feat=feat,
            spk2utt=os.path.join(args.expDir, "data", Name, "spk2utt"),
        )
        cmvn.save(os.path.join(args.expDir, "mfcc", Name, "cmvn.ark"))
        print(f"Generate CMVN statistics done.")
        # Apply CMVN
        feat = exkaldi.use_cmvn(
            feat=feat,
            cmvn=cmvn,
            utt2spk=os.path.join(args.expDir, "data", Name, "utt2spk"),
        )
        feat.save(os.path.join(args.expDir, "mfcc", Name, "mfcc_cmvn.ark"))
        print(f"Generate MFCC feature (applied CMVN) done.")

    print("Compute MFCC done.")
예제 #10
0
def output_probability():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.discribe(
        "This program is used to output DNN probability for realigning")
    # 2. Add options
    args.add("--expDir",
             abbr="-e",
             dtype=str,
             default="exp",
             discription="The data and output path of current experiment.")
    args.add("--dropout",
             abbr="-d",
             dtype=float,
             default=0.2,
             discription="Dropout.")
    args.add("--useCMVN",
             dtype=bool,
             default=False,
             discription="Wether apply CMVN to fmllr feature.")
    args.add(
        "--splice",
        dtype=int,
        default=10,
        discription="Splice how many frames to head and tail for Fmllr feature."
    )
    args.add("--delta",
             dtype=int,
             default=2,
             discription="Wether add delta to fmllr feature.")
    args.add("--normalizeFeat",
             dtype=bool,
             default=True,
             discription="Wether normalize the chunk dataset.")
    args.add("--predictModel",
             abbr="-m",
             dtype=str,
             default="",
             discription="If not void, skip training. Do decoding only.")
    # 3. Then start to parse arguments.
    args.parse()

    declare.is_file(args.predictModel)

    dims = exkaldi.load_list_table(f"{args.expDir}/train_dnn/data/dims")
    featDim = int(dims["fmllr"])
    pdfDim = int(dims["pdfs"])
    phoneDim = int(dims["phones"])

    # Initialize model
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)

    model = make_DNN_model(featDim, pdfDim, phoneDim)
    model.load_weights(args.predictModel)
    print(f"Restorage model from: {args.predictModel}")

    for Name in ["train", "test", "dev"]:
        print(f"Processing: {Name} dataset")
        feat = exkaldi.load_feat(
            f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark")

        if args.useCMVN:
            print("Apply CMVN")
            cmvn = exkaldi.load_cmvn(
                f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark")
            feat = exkaldi.use_cmvn(
                feat,
                cmvn,
                utt2spk=f"{args.expDir}/train_dnn/data/{Name}/utt2spk")
            del cmvn

        if args.delta > 0:
            print("Add delta to feature")
            feat = feat.add_delta(args.delta)

        if args.splice > 0:
            print("Splice feature")
            feat = feat.splice(args.splice)

        feat = feat.to_numpy()
        if args.normalizeFeat:
            print("Normalize")
            feat = feat.normalize(std=True)

        outProb = {}
        print("Forward model...")
        for utt, mat in feat.items():
            predPdf, predPhone = model(mat, training=False)
            outProb[utt] = exkaldi.nn.log_softmax(predPdf.numpy(), axis=1)

        #outProb = exkaldi.load_prob(outProb)
        #outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.npy")
        outProb = exkaldi.load_prob(outProb).to_bytes()
        outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.ark")
        print("Save done!")
예제 #11
0
def compute_dev_wer():

  flag = "dev_clean_2"
  
  featsFile = f"{args.root}/{args.feat}/raw_{args.feat}_{flag}.*.ark"
  feats = exkaldi.load_feat(featsFile)

  if args.cmn:
    print("Use cmvn...")
    cmvnFile = f"{args.root}/{args.feat}/cmvn_{flag}.ark"
    cmvn = exkaldi.load_cmvn(cmvnFile)
    feats = exkaldi.use_cmvn(feats,cmvn,utt2spk=f"{args.root}/data/{flag}/utt2spk")
    del cmvn

  if args.delta > 0:
    print("Add delta...")
    feats = feats.add_delta(args.delta)

  if args.splice > 0:
    print("Splice feature...")
    feats = feats.splice(args.splice)
  
  feats = feats.to_numpy()
  featDim = feats.dim

  hmm = exkaldi.load_hmm(f"{args.root}/exp/tri3b_ali_train_clean_5/final.mdl")
  pdfDim = hmm.info.pdfs
  phoneDim = hmm.info.phones
  del hmm
  
  print("featDim:",featDim,"pdfDim:",pdfDim,"phoneDim:",phoneDim)
  minWER = None

  try:
    for ModelPathID in range(args.epoch,0,-1):
      #ModelPathID = args.epoch
      ModelPath = f"{args.testModelDir}/model_ep{ModelPathID}.h5"
      if not os.path.isfile(ModelPath):
        continue

      print("Use Model:",ModelPath)
      decodeOut = ModelPath[:-3]
      exkaldi.utils.make_dependent_dirs(decodeOut,pathIsFile=False)

      model = make_DNN_acoustic_model(featDim,pdfDim)
      model.load_weights(ModelPath)

      print("Forward...")
      result = {}
      for uttID in feats.keys():
        pdfP = model(feats[uttID],training=False)
        result[uttID] = exkaldi.nn.log_softmax(pdfP.numpy(),axis=1)

      amp = exkaldi.load_prob(result)
      hmmFile = f"{args.root}/exp/tri3b_ali_dev_clean_2/final.mdl"
      HCLGFile = f"{args.root}/exp/tri3b/graph_tgsmall/HCLG.fst"
      table = f"{args.root}/exp/tri3b/graph_tgsmall/words.txt"
      trans = f"{args.root}/data/dev_clean_2/text"

      print("Decoding...")
      lat = exkaldi.decode.wfst.nn_decode(
                                          prob=amp.subset(chunks=4), 
                                          hmm=hmmFile, 
                                          HCLGFile=HCLGFile, 
                                          symbolTable=table,
                                          beam=10,
                                          latBeam=8,
                                          acwt=0.1,
                                          minActive=200,
                                          maxActive=7000,
                                          outFile=os.path.join(decodeOut,"lat")
                                        )
      lat = exkaldi.merge_archives(lat)

      print("Scoring...")
      for LMWT in range(1,10,1):
        #newLat = lat.add_penalty(penalty)
        result = lat.get_1best(table,hmmFile,lmwt=LMWT,acwt=0.1,phoneLevel=False)
        result = exkaldi.hmm.transcription_from_int(result,table)
        result.save( os.path.join(decodeOut,f"trans.{LMWT}") )

        score = exkaldi.decode.score.wer(ref=trans,hyp=result,mode="present")
        print("LMWT: ",LMWT ,"WER: ",score.WER)
        if minWER == None or score.WER < minWER[0]:
          minWER = (score.WER, LMWT, ModelPath)
  finally:
    if minWER is not None:
      werOut = os.path.basename(decodeOut)
      print("Best WER:",minWER)
      with open(f"{args.testModelDir}/best_wer","w") as fw:
        fw.write(str(minWER))
예제 #12
0
def train_model():

    global args

    print("\n############## Parameters Configure ##############")

    # Show configure information and write them to file
    def configLog(message, f):
        print(message)
        f.write(message + '\n')

    f = open(args.outDir + '/configure', "w")
    configLog(
        'Start System Time:{}'.format(
            datetime.datetime.now().strftime("%Y-%m-%d %X")), f)
    configLog('Host Name:{}'.format(socket.gethostname()), f)
    configLog('Fix Random Seed:{}'.format(args.randomSeed), f)
    configLog('Mini Batch Size:{}'.format(args.batchSize), f)
    configLog('GPU ID:{}'.format(args.gpu), f)
    configLog('Train Epochs:{}'.format(args.epoch), f)
    configLog('Output Folder:{}'.format(args.outDir), f)
    configLog('GRU layers:{}'.format(args.layer), f)
    configLog('GRU hidden nodes:{}'.format(args.hiddenNode), f)
    configLog('GRU dropout:{}'.format(args.dropout), f)
    configLog('Use CMVN:{}'.format(args.useCMVN), f)
    configLog('Splice N Frames:{}'.format(args.splice), f)
    configLog('Add N Deltas:{}'.format(args.delta), f)
    configLog('Normalize Chunk:{}'.format(args.normalizeChunk), f)
    configLog('Normalize AMP:{}'.format(args.normalizeAMP), f)
    configLog('Decode Minimum Active:{}'.format(args.minActive), f)
    configLog('Decode Maximum Active:{}'.format(args.maxActive), f)
    configLog('Decode Maximum Memory:{}'.format(args.maxMemory), f)
    configLog('Decode Beam:{}'.format(args.beam), f)
    configLog('Decode Lattice Beam:{}'.format(args.latBeam), f)
    configLog('Decode Acoustic Weight:{}'.format(args.acwt), f)
    configLog('Decode minimum Language Weight:{}'.format(args.minLmwt), f)
    configLog('Decode maximum Language Weight:{}'.format(args.maxLmwt), f)
    f.close()

    print("\n############## Train GRU Acoustic Model ##############")

    #----------------- STEP 1: Prepare Train Data -----------------
    print('Prepare data iterator...')
    # Fmllr data file
    trainScpFile = args.TIMITpath + '/data-fmllr-tri3/train/feats.scp'
    devScpFile = args.TIMITpath + '/data-fmllr-tri3/dev/feats.scp'
    # Alignment label file
    trainAliFile = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali/ali.*.gz'
    devAliFile = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_dev/ali.*.gz'
    # Load label
    trainLabelPdf = E.load_ali(trainAliFile)
    trainLabelPho = E.load_ali(trainAliFile, returnPhone=True)
    for i in trainLabelPho.keys():
        trainLabelPho[i] = trainLabelPho[i] - 1
    devLabelPdf = E.load_ali(devAliFile)
    devLabelPho = E.load_ali(devAliFile, returnPhone=True)
    for i in devLabelPho.keys():
        devLabelPho[i] = devLabelPho[i] - 1
    # CMVN file
    trainUttSpk = args.TIMITpath + '/data-fmllr-tri3/train/utt2spk'
    trainCmvnState = args.TIMITpath + '/data-fmllr-tri3/train/cmvn.ark'
    devUttSpk = args.TIMITpath + '/data-fmllr-tri3/dev/utt2spk'
    devCmvnState = args.TIMITpath + '/data-fmllr-tri3/dev/cmvn.ark'

    # Design a process function
    def loadChunkData(iterator, feat, otherArgs):
        # <feat> is KaldiArk object
        global args
        uttSpk, cmvnState, labelPdf, labelPho, toDo = otherArgs
        # use CMVN
        if args.useCMVN:
            feat = E.use_cmvn(feat, cmvnState, uttSpk)
        # Add delta
        if args.delta > 0:
            feat = E.add_delta(feat, args.delta)
        # Splice front-back n frames
        if args.splice > 0:
            feat = feat.splice(args.splice)
        # Transform to KaldiDict and sort them by frame length
        feat = feat.array.sort(by='frame')
        # Normalize
        if args.normalizeChunk:
            feat = feat.normalize()
        # Concatenate label
        datas = feat.concat([labelPdf, labelPho], axis=1)
        # cut frames
        if toDo == 'train':
            if iterator.epoch >= 4:
                datas = datas.cut(1000)
            elif iterator.epoch >= 3:
                datas = datas.cut(800)
            elif iterator.epoch >= 2:
                datas = datas.cut(400)
            elif iterator.epoch >= 1:
                datas = datas.cut(200)
            elif iterator.epoch >= 0:
                datas = datas.cut(100)
        # Transform trainable numpy data
        datas, _ = datas.merge(keepDim=True, sortFrame=True)
        return datas

    # Make data iterator
    train = E.DataIterator(trainScpFile,
                           loadChunkData,
                           args.batchSize,
                           chunks=5,
                           shuffle=False,
                           otherArgs=(trainUttSpk, trainCmvnState,
                                      trainLabelPdf, trainLabelPho, 'train'))
    print('Generate train dataset. Chunks:{} / Batch size:{}'.format(
        train.chunks, train.batchSize))
    dev = E.DataIterator(devScpFile,
                         loadChunkData,
                         args.batchSize,
                         chunks=1,
                         shuffle=False,
                         otherArgs=(devUttSpk, devCmvnState, devLabelPdf,
                                    devLabelPho, 'dev'))
    print('Generate validation dataset. Chunks:{} / Batch size:{}.'.format(
        dev.chunks, dev.batchSize))
    print("Done.")

    print('Prepare model...')
    featDim = 40
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)
    model = GRU(featDim, trainLabelPdf.target, trainLabelPho.target)
    lossfunc = nn.NLLLoss()
    if args.gpu >= 0:
        model = model.cuda(args.gpu)
        lossfunc = lossfunc.cuda(args.gpu)
    print('Generate model done.')

    print('Prepare optimizer and supporter...')
    #lr = [(0,0.5),(8,0.25),(13,0.125),(15,0.07),(17,0.035),(20,0.02),(23,0.01)]
    lr = [(0, 0.0004)]
    print('Learning Rate:', lr)
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=lr[0][1],
                                    alpha=0.95,
                                    eps=1e-8,
                                    weight_decay=0,
                                    momentum=0,
                                    centered=False)
    lr.pop(0)
    supporter = E.Supporter(args.outDir)
    print('Done.')

    print('Prepare test data...')
    # Fmllr file
    testFilePath = args.TIMITpath + '/data-fmllr-tri3/test/feats.scp'
    testFeat = E.load(testFilePath)
    # Use CMVN
    if args.useCMVN:
        testUttSpk = args.TIMITpath + '/data-fmllr-tri3/test/utt2spk'
        testCmvnState = args.TIMITpath + '/data-fmllr-tri3/test/cmvn.ark'
        testFeat = E.use_cmvn(testFeat, testCmvnState, testUttSpk)
    # Add delta
    if args.delta > 0:
        testFeat = E.add_delta(testFeat, args.delta)
    # Splice frames
    if args.splice > 0:
        testFeat = testFeat.splice(args.splice)
    # Transform to array
    testFeat = testFeat.array
    # Normalize
    if args.normalizeChunk:
        testFeat = testFeat.normalize()
    # Normalize acoustic model output
    if args.normalizeAMP:
        # compute pdf counts in order to normalize acoustic model posterior probability.
        countFile = args.outDir + '/pdfs_counts.txt'
        if not os.path.isfile(countFile):
            _ = E.analyze_counts(aliFile=trainAliFile, outFile=countFile)
        with open(countFile) as f:
            line = f.readline().strip().strip("[]").strip()
        counts = np.array(list(map(float, line.split())), dtype=np.float32)
        normalizeBias = np.log(counts / np.sum(counts))
    else:
        normalizeBias = 0
    print('Done.')

    print('Prepare test data decode and score function...')

    # Design a function to compute WER of test data
    def wer_fun(model, feat, normalizeBias):
        global args
        # Tranform the formate of KaldiDict feature data in order to forward network
        temp = E.KaldiDict()
        utts = feat.utts
        with torch.no_grad():
            for index, utt in enumerate(utts):
                data = torch.Tensor(feat[utt][:, np.newaxis, :])
                data = torch.autograd.Variable(data)
                if args.gpu >= 0:
                    data = data.cuda(args.gpu)
                out1, out2 = model(data, is_training=False, device=args.gpu)
                out = out1.cpu().detach().numpy() - normalizeBias
                temp[utt] = out
                print("(testing) Forward network {}/{}".format(
                    index, len(utts)),
                      end=" " * 20 + '\r')
        # Tansform KaldiDict to KaldiArk format
        print('(testing) Transform to ark', end=" " * 20 + '\r')
        amp = temp.ark
        # Decode and obtain lattice
        hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl'
        hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst'
        lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt'
        print('(testing) Generate Lattice', end=" " * 20 + '\r')
        lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive,
                                   args.maxActive, args.maxMemory, args.beam,
                                   args.latBeam, args.acwt)
        # Change language weight from 1 to 10, get the 1best words.
        print('(testing) Get 1-best words', end=" " * 20 + '\r')
        outs = lattice.get_1best(lmwt=args.minLmwt,
                                 maxLmwt=args.maxLmwt,
                                 outFile=args.outDir + '/outRaw')
        # If reference file is not existed, make it.
        phonemap = args.TIMITpath + '/conf/phones.60-48-39.map'
        outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format(
            phonemap)
        if not os.path.isfile(args.outDir + '/test_filt.txt'):
            refText = args.TIMITpath + '/data/test/text'
            cmd = 'cat {} | {} > {}/test_filt.txt'.format(
                refText, outFilter, args.outDir)
            (_, _) = E.run_shell_cmd(cmd)
        # Score WER and find the smallest one.
        print('(testing) Score', end=" " * 20 + '\r')
        minWER = None
        for k in range(args.minLmwt, args.maxLmwt + 1, 1):
            cmd = 'cat {} | {} > {}/test_prediction_filt.txt'.format(
                outs[k], outFilter, args.outDir)
            (_, _) = E.run_shell_cmd(cmd)
            os.remove(outs[k])
            score = E.wer('{}/test_filt.txt'.format(args.outDir),
                          "{}/test_prediction_filt.txt".format(args.outDir),
                          mode='all')
            if minWER == None or score['WER'] < minWER:
                minWER = score['WER']
        os.remove("{}/test_prediction_filt.txt".format(args.outDir))
        return minWER

    print('Done.')

    print('Now Start to Train')
    for e in range(args.epoch):
        print()
        i = 0
        usedTime = 0
        supporter.send_report({'epoch': e})
        # Train
        model.train()
        while True:
            start = time.time()
            # Get batch data and label
            batch = train.next()
            batch, lengths = E.pad_sequence(batch, shuffle=True, pad=0)
            batch = torch.Tensor(batch)
            data, label1, label2 = batch[:, :, 0:-2], batch[:, :,
                                                            -2], batch[:, :,
                                                                       -1]
            data = torch.autograd.Variable(data)
            label1 = torch.autograd.Variable(label1).view(-1).long()
            label2 = torch.autograd.Variable(label2).view(-1).long()
            # Send to GPU if use
            if args.gpu >= 0:
                data = data.cuda(args.gpu)
                label1 = label1.cuda(args.gpu)
                label2 = label2.cuda(args.gpu)
            # Clear grad
            optimizer.zero_grad()
            # Forward model
            out1, out2 = model(data, is_training=True, device=args.gpu)
            # Loss back propagation
            loss1 = lossfunc(out1, label1)
            loss2 = lossfunc(out2, label2)
            loss = loss1 + loss2
            loss.backward()
            # Update parameter
            optimizer.step()
            # Compute accuracy
            pred = torch.max(out1, dim=1)[1]
            acc = 1 - torch.mean((pred != label1).float())
            # Record train information
            supporter.send_report({
                'train_loss': float(loss),
                'train_acc': float(acc)
            })
            ut = time.time() - start
            usedTime += ut
            batchLoss = float(loss.cpu().detach().numpy())
            print(
                "(training) Epoch:{}/{}% Chunk:{}/{}% Iter:{} Used-time:{}s Batch-loss:{} Speed:{}iters/s"
                .format(e, int(100 * train.epochProgress), train.chunk,
                        int(100 * train.chunkProgress), i, int(usedTime),
                        "%.4f" % (batchLoss), "%.2f" % (1 / ut)),
                end=" " * 5 + '\r')
            i += 1
            # If forwarded all data, break
            if train.isNewEpoch:
                break
        # Evaluation
        model.eval()
        with torch.no_grad():
            while True:
                start = time.time()
                # Get batch data and label
                batch = dev.next()
                batch, lengths = E.pad_sequence(batch, shuffle=True, pad=0)
                maxLen, bSize, _ = batch.shape
                batch = torch.Tensor(batch)
                data, label1, label2 = batch[:, :,
                                             0:-2], batch[:, :,
                                                          -2], batch[:, :, -1]
                data = torch.autograd.Variable(data)
                label1 = torch.autograd.Variable(label1).view(-1).long()
                # Send to GPU if use
                if args.gpu >= 0:
                    data = data.cuda(args.gpu)
                    label1 = label1.cuda(args.gpu)
                # Forward model
                out1, out2 = model(data, is_training=False, device=args.gpu)
                # Compute accuracy of padded label
                pred = torch.max(out1, dim=1)[1]
                acc_pad = 1 - torch.mean((pred != label1).float())
                # Compute accuracy of not padded label. This should be more correct.
                label = label1.cpu().numpy().reshape([maxLen, bSize])
                pred = pred.cpu().numpy().reshape([maxLen, bSize])
                label = E.unpack_padded_sequence(label, lengths)
                pred = E.unpack_padded_sequence(pred, lengths)
                acc_nopad = E.accuracy(pred, label)
                # Record evaluation information
                supporter.send_report({
                    'dev_acc_pad': float(acc_pad),
                    'dev_acc_nopad': acc_nopad
                })
                ut = time.time() - start
                usedTime += ut
                batchLoss = float(loss.cpu().detach().numpy())
                print(
                    "(Validating) Epoch:{}/{}% Chunk:{}/{}% Iter:{} Used-time:{}s Batch-loss:{} Speed:{}iters/s"
                    .format(e, int(100 * dev.epochProgress), dev.chunk,
                            int(100 * dev.chunkProgress), i, int(usedTime),
                            "%.4f" % (batchLoss), "%.2f" % (1 / ut)),
                    end=" " * 5 + '\r')
                i += 1
                # If forwarded all data, break
                if dev.isNewEpoch:
                    break
            print()
            # We compute WER score from 4th epoch
            if e >= 2:
                minWER = wer_fun(model, testFeat, normalizeBias)
                supporter.send_report({'test_wer': minWER})
        # one epoch is over so collect information
        supporter.collect_report(plot=True)

        # Save model
        def saveFunc(archs):
            fileName, model = archs
            torch.save(model.state_dict(), fileName)

        supporter.save_arch(saveFunc, arch={'GRU': model})
        # Change learning rate
        if len(lr) > 0 and supporter.judge('epoch', '>=', lr[0][0]):
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr[0][1]
            lr.pop(0)

    print("GRU Acoustic Model training done.")
    print("The final model has been saved as:", supporter.finalArch["GRU"])
    print('Over System Time:', datetime.datetime.now().strftime("%Y-%m-%d %X"))