def prepare_test_data(postProbDim): feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" ) if args.useCMVN: cmvn = exkaldi.load_cmvn( f"{args.expDir}/train_lstm/data/test/cmvn_of_fmllr.ark" ) feat = exkaldi.use_cmvn(feat, cmvn, utt2spk=f"{args.expDir}/train_lstm/data/test/utt2spk") del cmvn if args.delta > 0: feat = feat.add_delta(args.delta) if args.splice > 0: feat = feat.splice(args.splice) feat = feat.to_numpy() if args.normalizeFeat: feat = feat.normalize(std=True) # Normalize acoustic model output if args.normalizeAMP: ali = exkaldi.load_ali(f"{args.expDir}/train_lstm/data/train/pdfID.npy", aliType="pdfID") normalizeBias = exkaldi.nn.compute_postprob_norm(ali,postProbDim) else: normalizeBias = 0 # ref transcription trans = exkaldi.load_transcription(f"{args.expDir}/train_lstm/data/test/text") convertTable = exkaldi.load_list_table(f"{args.expDir}/dict/phones.48_to_39.map") trans = trans.convert(convertTable) return feat, normalizeBias, trans
def decode_score_test(prob, trans, outDir): trans.save(os.path.join(outDir, "ref.txt")) hmmFile = f"{args.expDir}/train_sat/final.mdl" HCLGFile = f"{args.expDir}/train_sat/graph/HCLG.{args.order}.fst" lexicons = exkaldi.load_lex(f"{args.expDir}/dict/lexicons.lex") phoneMap = exkaldi.load_list_table( f"{args.expDir}/dict/phones.48_to_39.map") #print("Decoding...") lat = exkaldi.decode.wfst.nn_decode( prob=prob, hmm=hmmFile, HCLGFile=HCLGFile, symbolTable=lexicons("words"), beam=args.beam, latBeam=args.latBeam, acwt=args.acwt, minActive=200, maxActive=7000, ) #print("Score...") minWER = None for penalty in [0., 0.5, 1.0]: for LMWT in range(1, 15, 1): newLat = lat.add_penalty(penalty) result = newLat.get_1best(lexicons("phones"), hmmFile, lmwt=LMWT, acwt=1, phoneLevel=True) result = exkaldi.hmm.transcription_from_int( result, lexicons("phones")) result = result.convert(phoneMap) fileName = f"{outDir}/penalty_{penalty}_lmwt_{LMWT}.txt" result.save(fileName) score = exkaldi.decode.score.wer(ref=trans, hyp=result, mode="present") if minWER == None or score.WER < minWER[0]: minWER = (score.WER, fileName) #print(f"{penalty} {LMWT}",score) with open(f"{outDir}/best_PER", "w") as fw: fw.write(f"{minWER[0]}% {minWER[1]}") return minWER[0]
def main(): # ------------- Parse arguments from command line ---------------------- # 1. Add a discription of this program args.discribe( "This program is used to train triphone DNN acoustic model with Tensorflow" ) # 2. Add options args.add("--expDir", abbr="-e", dtype=str, default="exp", discription="The data and output path of current experiment.") args.add( "--LDAsplice", dtype=int, default=3, discription="Splice how many frames to head and tail for LDA feature.") args.add("--randomSeed", dtype=int, default=1234, discription="Random seed.") args.add("--batchSize", abbr="-b", dtype=int, default=128, discription="Mini batch size.") args.add("--gpu", abbr="-g", dtype=str, default="all", choices=["all", "0", "1"], discription="Use GPU.") args.add("--epoch", dtype=int, default=30, discription="Epoches.") args.add("--testStartEpoch", dtype=int, default=5, discription="Start to evaluate test dataset.") args.add("--dropout", abbr="-d", dtype=float, default=0.2, discription="Dropout.") args.add("--useCMVN", dtype=bool, default=False, discription="Wether apply CMVN to fmllr feature.") args.add( "--splice", dtype=int, default=10, discription="Splice how many frames to head and tail for Fmllr feature." ) args.add("--delta", dtype=int, default=2, discription="Wether add delta to fmllr feature.") args.add("--normalizeFeat", dtype=bool, default=True, discription="Wether normalize the chunk dataset.") args.add("--normalizeAMP", dtype=bool, default=False, discription="Wether normalize the post-probability.") args.add("--order", abbr="-o", dtype=int, default=6, discription="Language model order.") args.add("--beam", dtype=int, default=13, discription="Decode beam size.") args.add("--latBeam", dtype=int, default=6, discription="Lattice beam size.") args.add("--acwt", dtype=float, default=0.083333, discription="Acoustic model weight.") args.add("--predictModel", abbr="-m", dtype=str, default="", discription="If not void, skip training. Do decoding only.") # 3. Then start to parse arguments. args.parse() # 4. Take a backup of arguments argsLogFile = os.path.join(args.expDir, "conf", "train_dnn.args") args.save(argsLogFile) random.seed(args.randomSeed) np.random.seed(args.randomSeed) tf.random.set_seed(args.randomSeed) # ------------- Prepare data for dnn training ---------------------- if not os.path.isfile(f"./{args.expDir}/train_dnn/data/dims"): prepare_DNN_data() # ------------- Prepare data for dnn training ---------------------- stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") outDir = f"{args.expDir}/train_dnn/out_{stamp}" exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False) #------------------------ Training and Validation ----------------------------- dims = exkaldi.load_list_table(f"{args.expDir}/train_dnn/data/dims") featDim = int(dims["fmllr"]) pdfDim = int(dims["pdfs"]) phoneDim = int(dims["phones"]) # Initialize model if args.delta > 0: featDim *= (args.delta + 1) if args.splice > 0: featDim *= (2 * args.splice + 1) if len(args.predictModel.strip()) == 0: print('Prepare Data Iterator...') # Prepare fMLLR feature files trainDataset = process_feat_ali(training=True) traindataLen = len(trainDataset) train_gen = tf.data.Dataset.from_generator( lambda: make_generator(trainDataset), (tf.float32, { "pdfID": tf.int32, "phoneID": tf.int32 })).batch(args.batchSize).prefetch(3) steps_per_epoch = traindataLen // args.batchSize devDataset = process_feat_ali(training=False) devdataLen = len(devDataset) dev_gen = tf.data.Dataset.from_generator( lambda: make_generator(devDataset), (tf.float32, { "pdfID": tf.int32, "phoneID": tf.int32 })).batch(args.batchSize).prefetch(3) validation_steps = devdataLen // args.batchSize print('Prepare test data') testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim) def train_step(): model = make_DNN_model(featDim, pdfDim, phoneDim) model.summary() model.compile( loss={ "pdfID": keras.losses.SparseCategoricalCrossentropy( from_logits=True), "phoneID": keras.losses.SparseCategoricalCrossentropy( from_logits=True), }, loss_weights={ "pdfID": 1, "phoneID": 1 }, metrics={ "pdfID": keras.metrics.SparseCategoricalAccuracy(), "phoneID": keras.metrics.SparseCategoricalAccuracy(), }, optimizer=keras.optimizers.SGD(0.08, momentum=0.0), ) def lrScheduler(epoch): if epoch > 25: return 0.001 elif epoch > 22: return 0.0025 elif epoch > 19: return 0.005 elif epoch > 17: return 0.01 elif epoch > 15: return 0.02 elif epoch > 10: return 0.04 else: return 0.08 model.fit( x=train_gen, steps_per_epoch=steps_per_epoch, epochs=args.epoch, validation_data=dev_gen, validation_steps=validation_steps, verbose=1, initial_epoch=0, callbacks=[ keras.callbacks.EarlyStopping(patience=5, verbose=1), keras.callbacks.TensorBoard(log_dir=outDir), keras.callbacks.LearningRateScheduler(lrScheduler), EvaluateWER(model, testFeat, testBias, testTrans, outDir), ModelSaver(model, outDir), ], ) print("Using GPU: ", args.gpu) if args.gpu != "all": os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu train_step() else: my_strategy = tf.distribute.MirroredStrategy() with my_strategy.scope(): train_step() else: declare.is_file(args.predictModel) model = make_DNN_model(featDim, pdfDim, phoneDim) model.summary() model.load_weights(args.predictModel) print('Prepare test data') testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim) scorer = EvaluateWER(model, testFeat, testBias, testTrans, outDir) logs = {} scorer.on_epoch_end(5, logs)
def output_probability(): # ------------- Parse arguments from command line ---------------------- # 1. Add a discription of this program args.discribe( "This program is used to output DNN probability for realigning") # 2. Add options args.add("--expDir", abbr="-e", dtype=str, default="exp", discription="The data and output path of current experiment.") args.add("--dropout", abbr="-d", dtype=float, default=0.2, discription="Dropout.") args.add("--useCMVN", dtype=bool, default=False, discription="Wether apply CMVN to fmllr feature.") args.add( "--splice", dtype=int, default=10, discription="Splice how many frames to head and tail for Fmllr feature." ) args.add("--delta", dtype=int, default=2, discription="Wether add delta to fmllr feature.") args.add("--normalizeFeat", dtype=bool, default=True, discription="Wether normalize the chunk dataset.") args.add("--predictModel", abbr="-m", dtype=str, default="", discription="If not void, skip training. Do decoding only.") # 3. Then start to parse arguments. args.parse() declare.is_file(args.predictModel) dims = exkaldi.load_list_table(f"{args.expDir}/train_dnn/data/dims") featDim = int(dims["fmllr"]) pdfDim = int(dims["pdfs"]) phoneDim = int(dims["phones"]) # Initialize model if args.delta > 0: featDim *= (args.delta + 1) if args.splice > 0: featDim *= (2 * args.splice + 1) model = make_DNN_model(featDim, pdfDim, phoneDim) model.load_weights(args.predictModel) print(f"Restorage model from: {args.predictModel}") for Name in ["train", "test", "dev"]: print(f"Processing: {Name} dataset") feat = exkaldi.load_feat( f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark") if args.useCMVN: print("Apply CMVN") cmvn = exkaldi.load_cmvn( f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark") feat = exkaldi.use_cmvn( feat, cmvn, utt2spk=f"{args.expDir}/train_dnn/data/{Name}/utt2spk") del cmvn if args.delta > 0: print("Add delta to feature") feat = feat.add_delta(args.delta) if args.splice > 0: print("Splice feature") feat = feat.splice(args.splice) feat = feat.to_numpy() if args.normalizeFeat: print("Normalize") feat = feat.normalize(std=True) outProb = {} print("Forward model...") for utt, mat in feat.items(): predPdf, predPhone = model(mat, training=False) outProb[utt] = exkaldi.nn.log_softmax(predPdf.numpy(), axis=1) #outProb = exkaldi.load_prob(outProb) #outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.npy") outProb = exkaldi.load_prob(outProb).to_bytes() outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.ark") print("Save done!")
def main(): # ------------- Parse arguments from command line ---------------------- # 1. Add a discription of this program args.discribe("This program is used to train triphone LSTM scoustic model with Tensorflow") # 2. Add options args.add("--expDir", abbr="-e", dtype=str, default="exp", discription="The data and output path of current experiment.") args.add("--LDAsplice", dtype=int, default=3, discription="Splice how many frames to head and tail for LDA feature.") args.add("--randomSeed", dtype=int, default=1234, discription="Random seed.") args.add("--batchSize", abbr="-b", dtype=int, default=8, discription="Mini batch size.") args.add("--gpu", abbr="-g", dtype=str, default="all", choices=["all","0","1"], discription="Use GPU.") args.add("--epoch", dtype=int, default=30, discription="Epoches.") args.add("--testStartEpoch", dtype=int, default=5, discription="Start to evaluate test dataset.") args.add("--dropout", abbr="-d", dtype=float, default=0.2, discription="Dropout.") args.add("--useCMVN", dtype=bool, default=False, discription="Wether apply CMVN to fmllr feature.") args.add("--splice", dtype=int, default=0, discription="Splice how many frames to head and tail for Fmllr feature.") args.add("--delta", dtype=int, default=2, discription="Wether add delta to fmllr feature.") args.add("--normalizeFeat", dtype=bool, default=True, discription="Wether normalize the chunk dataset.") args.add("--normalizeAMP", dtype=bool, default=False, discription="Wether normalize the post-probability.") args.add("--order", abbr="-o", dtype=int, default=6, discription="Language model order.") args.add("--beam", dtype=int, default=13, discription="Decode beam size.") args.add("--latBeam", dtype=int, default=6, discription="Lattice beam size.") args.add("--acwt", dtype=float, default=0.083333, discription="Acoustic model weight.") args.add("--predictModel", abbr="-m", dtype=str, default="", discription="If not void, skip training. Do decoding only.") # 3. Then start to parse arguments. args.parse() # 4. Take a backup of arguments args.save( f"./{args.expDir}/conf/train_lstm.args" ) random.seed(args.randomSeed) np.random.seed(args.randomSeed) tf.random.set_seed(args.randomSeed) # ------------- Prepare data for dnn training ---------------------- if not os.path.isfile(f"./{args.expDir}/train_lstm/data/dims"): prepare_LSTM_data() # ------------- Prepare data for lstm training ---------------------- stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") outDir = f"{args.expDir}/train_lstm/out_{stamp}" exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False) #------------------------ Training and Validation ----------------------------- dims = exkaldi.load_list_table( f"{args.expDir}/train_lstm/data/dims" ) featDim = int(dims["fmllr"]) pdfDim = int(dims["pdfs"]) phoneDim = int(dims["phones"]) # Initialize model if args.delta > 0: featDim *= (args.delta+1) if args.splice > 0: featDim *= (2*args.splice+1) if len(args.predictModel.strip()) == 0: print('Prepare Data Iterator...') # Prepare fMLLR feature files trainIterator = DataIterator(batchSize=args.batchSize, training=True) devIterator = DataIterator(batchSize=args.batchSize, training=False) print('Prepare test data') testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim) metris = { "train_loss":keras.metrics.Mean(name="train/loss", dtype=tf.float32), "train_pdfID_accuracy":keras.metrics.Mean(name="train/pdfID_accuracy", dtype=tf.float32), "train_phoneID_accuracy":keras.metrics.Mean(name="train/phoneID_accuracy", dtype=tf.float32), "dev_loss":keras.metrics.Mean(name="eval/loss", dtype=tf.float32), "dev_pdfID_accuracy":keras.metrics.Mean(name="eval/pdfID_accuracy", dtype=tf.float32), "dev_phoneID_accuracy":keras.metrics.Mean(name="eval/phoneID_accuracy", dtype=tf.float32), } def train_step(model,optimizer,batch): feat, pdfAli, phoneAli = batch with tf.GradientTape() as tape: pdfPred, phonePred = model(feat, training=True) L1 = keras.losses.sparse_categorical_crossentropy(pdfAli, pdfPred, from_logits=True) L2 = keras.losses.sparse_categorical_crossentropy(phoneAli, phonePred, from_logits=True) loss = L1 + L2 gradients = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) metris["train_loss"](loss) #pdfPred = tf.convert_to_tensor(pdfPred, np.float32) A1 = keras.metrics.sparse_categorical_accuracy(pdfAli, pdfPred) metris["train_pdfID_accuracy"](A1) #phonePred = tf.convert_to_tensor(phonePred, np.float32) A2 = keras.metrics.sparse_categorical_accuracy(phoneAli, phonePred) metris["train_phoneID_accuracy"](A2) return float(np.mean(L1.numpy())), float(np.mean(L2.numpy())),float(np.mean(A1.numpy())),float(np.mean(A2.numpy())) def dev_step(model,batch): feat, pdfAli, phoneAli = batch pdfPred, phonePred = model(feat, training=False) L1 = keras.losses.sparse_categorical_crossentropy(pdfAli, pdfPred, from_logits=True) L2 = keras.losses.sparse_categorical_crossentropy(phoneAli, phonePred, from_logits=True) loss = L1 + L2 metris["dev_loss"](loss) #pdfPred = tf.convert_to_tensor(pdfPred, np.float32) A1 = keras.metrics.sparse_categorical_accuracy(pdfAli, pdfPred) metris["dev_pdfID_accuracy"](A1) #phonePred = tf.convert_to_tensor(phonePred, np.float32) A2 = keras.metrics.sparse_categorical_accuracy(phoneAli, phonePred) metris["dev_phoneID_accuracy"](A2) return float(np.mean(L1.numpy())), float(np.mean(L2.numpy())),float(np.mean(A1.numpy())),float(np.mean(A2.numpy())) def main_loop(): model = make_LSTM_model(args, featDim, pdfDim, phoneDim) model.summary() optimizer = keras.optimizers.RMSprop(learning_rate=0.0004, rho=0.95, momentum=0.0, epsilon=1e-07) scorer = EvaluateWER( model, testFeat, testBias, testTrans, outDir) modelSaver = ModelSaver(model, outDir) for e in range(args.epoch): # Training startTime = time.time() for i in range(trainIterator.epochSize): batch = trainIterator.next() pdfLoss, phoneLoss, pdfAcc, phoneAcc = train_step(model, optimizer, batch) tf.print(f"\rtraining: {i}/{trainIterator.epochSize} pdfID loss {pdfLoss:.3f} phoneID loss {phoneLoss:.3f} pdfID accuracy {pdfAcc:.3f} phoneID accuracy {phoneAcc:.3f}", end="\t") tf.print() # Evaluate for i in range(devIterator.epochSize): batch = devIterator.next() pdfLoss, phoneLoss, pdfAcc, phoneAcc = dev_step(model, batch) tf.print(f"\revaluate: {i}/{devIterator.epochSize} pdfID loss {pdfLoss:.3f} phoneID loss {phoneLoss:.3f} pdfID accuracy {pdfAcc:.3f} phoneID accuracy {phoneAcc:.3f}", end="\t") tf.print() # Test tf.print("testing:", end=" ") testWER = scorer.test(e) tf.print() endTime = time.time() message = f"epoch {e} " for Name in metris.keys(): message += f"{Name} {float(metris[Name].result().numpy()):.3f} " metris[Name].reset_states() message += f"test PER {testWER:.2f} time cost {int(endTime-startTime)}s" tf.print(message) modelSaver.save(e) main_loop() else: declare.is_file(args.predictModel) model = make_LSTM_model(featDim,pdfDim,phoneDim) model.summary() model.load_weights(args.predictModel) print('Prepare test data') testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim) scorer = EvaluateWER(model,testFeat,testBias,testTrans,outDir) scorer.test(0)