Exemple #1
0
def prepare_test_data(postProbDim):

  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" )

  if args.useCMVN:
    cmvn = exkaldi.load_cmvn( f"{args.expDir}/train_lstm/data/test/cmvn_of_fmllr.ark" )
    feat = exkaldi.use_cmvn(feat, cmvn, utt2spk=f"{args.expDir}/train_lstm/data/test/utt2spk")
    del cmvn

  if args.delta > 0:
    feat = feat.add_delta(args.delta)

  if args.splice > 0:
    feat = feat.splice(args.splice)

  feat = feat.to_numpy()
  if args.normalizeFeat:
    feat = feat.normalize(std=True)

  # Normalize acoustic model output
  if args.normalizeAMP:
    ali = exkaldi.load_ali(f"{args.expDir}/train_lstm/data/train/pdfID.npy", aliType="pdfID")
    normalizeBias = exkaldi.nn.compute_postprob_norm(ali,postProbDim)
  else:
    normalizeBias = 0
  
  # ref transcription
  trans = exkaldi.load_transcription(f"{args.expDir}/train_lstm/data/test/text")
  convertTable = exkaldi.load_list_table(f"{args.expDir}/dict/phones.48_to_39.map")
  trans = trans.convert(convertTable)

  return feat, normalizeBias, trans
Exemple #2
0
def decode_score_test(prob, trans, outDir):

    trans.save(os.path.join(outDir, "ref.txt"))

    hmmFile = f"{args.expDir}/train_sat/final.mdl"
    HCLGFile = f"{args.expDir}/train_sat/graph/HCLG.{args.order}.fst"
    lexicons = exkaldi.load_lex(f"{args.expDir}/dict/lexicons.lex")
    phoneMap = exkaldi.load_list_table(
        f"{args.expDir}/dict/phones.48_to_39.map")
    #print("Decoding...")
    lat = exkaldi.decode.wfst.nn_decode(
        prob=prob,
        hmm=hmmFile,
        HCLGFile=HCLGFile,
        symbolTable=lexicons("words"),
        beam=args.beam,
        latBeam=args.latBeam,
        acwt=args.acwt,
        minActive=200,
        maxActive=7000,
    )
    #print("Score...")
    minWER = None
    for penalty in [0., 0.5, 1.0]:
        for LMWT in range(1, 15, 1):

            newLat = lat.add_penalty(penalty)
            result = newLat.get_1best(lexicons("phones"),
                                      hmmFile,
                                      lmwt=LMWT,
                                      acwt=1,
                                      phoneLevel=True)
            result = exkaldi.hmm.transcription_from_int(
                result, lexicons("phones"))
            result = result.convert(phoneMap)
            fileName = f"{outDir}/penalty_{penalty}_lmwt_{LMWT}.txt"
            result.save(fileName)
            score = exkaldi.decode.score.wer(ref=trans,
                                             hyp=result,
                                             mode="present")
            if minWER == None or score.WER < minWER[0]:
                minWER = (score.WER, fileName)
            #print(f"{penalty} {LMWT}",score)

    with open(f"{outDir}/best_PER", "w") as fw:
        fw.write(f"{minWER[0]}% {minWER[1]}")

    return minWER[0]
Exemple #3
0
def main():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.discribe(
        "This program is used to train triphone DNN acoustic model with Tensorflow"
    )
    # 2. Add options
    args.add("--expDir",
             abbr="-e",
             dtype=str,
             default="exp",
             discription="The data and output path of current experiment.")
    args.add(
        "--LDAsplice",
        dtype=int,
        default=3,
        discription="Splice how many frames to head and tail for LDA feature.")
    args.add("--randomSeed",
             dtype=int,
             default=1234,
             discription="Random seed.")
    args.add("--batchSize",
             abbr="-b",
             dtype=int,
             default=128,
             discription="Mini batch size.")
    args.add("--gpu",
             abbr="-g",
             dtype=str,
             default="all",
             choices=["all", "0", "1"],
             discription="Use GPU.")
    args.add("--epoch", dtype=int, default=30, discription="Epoches.")
    args.add("--testStartEpoch",
             dtype=int,
             default=5,
             discription="Start to evaluate test dataset.")
    args.add("--dropout",
             abbr="-d",
             dtype=float,
             default=0.2,
             discription="Dropout.")
    args.add("--useCMVN",
             dtype=bool,
             default=False,
             discription="Wether apply CMVN to fmllr feature.")
    args.add(
        "--splice",
        dtype=int,
        default=10,
        discription="Splice how many frames to head and tail for Fmllr feature."
    )
    args.add("--delta",
             dtype=int,
             default=2,
             discription="Wether add delta to fmllr feature.")
    args.add("--normalizeFeat",
             dtype=bool,
             default=True,
             discription="Wether normalize the chunk dataset.")
    args.add("--normalizeAMP",
             dtype=bool,
             default=False,
             discription="Wether normalize the post-probability.")
    args.add("--order",
             abbr="-o",
             dtype=int,
             default=6,
             discription="Language model order.")
    args.add("--beam", dtype=int, default=13, discription="Decode beam size.")
    args.add("--latBeam",
             dtype=int,
             default=6,
             discription="Lattice beam size.")
    args.add("--acwt",
             dtype=float,
             default=0.083333,
             discription="Acoustic model weight.")
    args.add("--predictModel",
             abbr="-m",
             dtype=str,
             default="",
             discription="If not void, skip training. Do decoding only.")
    # 3. Then start to parse arguments.
    args.parse()
    # 4. Take a backup of arguments
    argsLogFile = os.path.join(args.expDir, "conf", "train_dnn.args")
    args.save(argsLogFile)

    random.seed(args.randomSeed)
    np.random.seed(args.randomSeed)
    tf.random.set_seed(args.randomSeed)

    # ------------- Prepare data for dnn training ----------------------
    if not os.path.isfile(f"./{args.expDir}/train_dnn/data/dims"):
        prepare_DNN_data()

    # ------------- Prepare data for dnn training ----------------------
    stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    outDir = f"{args.expDir}/train_dnn/out_{stamp}"
    exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False)

    #------------------------ Training and Validation -----------------------------
    dims = exkaldi.load_list_table(f"{args.expDir}/train_dnn/data/dims")
    featDim = int(dims["fmllr"])
    pdfDim = int(dims["pdfs"])
    phoneDim = int(dims["phones"])

    # Initialize model
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)

    if len(args.predictModel.strip()) == 0:
        print('Prepare Data Iterator...')
        # Prepare fMLLR feature files
        trainDataset = process_feat_ali(training=True)
        traindataLen = len(trainDataset)
        train_gen = tf.data.Dataset.from_generator(
            lambda: make_generator(trainDataset), (tf.float32, {
                "pdfID": tf.int32,
                "phoneID": tf.int32
            })).batch(args.batchSize).prefetch(3)
        steps_per_epoch = traindataLen // args.batchSize

        devDataset = process_feat_ali(training=False)
        devdataLen = len(devDataset)
        dev_gen = tf.data.Dataset.from_generator(
            lambda: make_generator(devDataset), (tf.float32, {
                "pdfID": tf.int32,
                "phoneID": tf.int32
            })).batch(args.batchSize).prefetch(3)
        validation_steps = devdataLen // args.batchSize

        print('Prepare test data')
        testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim)

        def train_step():

            model = make_DNN_model(featDim, pdfDim, phoneDim)
            model.summary()

            model.compile(
                loss={
                    "pdfID":
                    keras.losses.SparseCategoricalCrossentropy(
                        from_logits=True),
                    "phoneID":
                    keras.losses.SparseCategoricalCrossentropy(
                        from_logits=True),
                },
                loss_weights={
                    "pdfID": 1,
                    "phoneID": 1
                },
                metrics={
                    "pdfID": keras.metrics.SparseCategoricalAccuracy(),
                    "phoneID": keras.metrics.SparseCategoricalAccuracy(),
                },
                optimizer=keras.optimizers.SGD(0.08, momentum=0.0),
            )

            def lrScheduler(epoch):
                if epoch > 25:
                    return 0.001
                elif epoch > 22:
                    return 0.0025
                elif epoch > 19:
                    return 0.005
                elif epoch > 17:
                    return 0.01
                elif epoch > 15:
                    return 0.02
                elif epoch > 10:
                    return 0.04
                else:
                    return 0.08

            model.fit(
                x=train_gen,
                steps_per_epoch=steps_per_epoch,
                epochs=args.epoch,
                validation_data=dev_gen,
                validation_steps=validation_steps,
                verbose=1,
                initial_epoch=0,
                callbacks=[
                    keras.callbacks.EarlyStopping(patience=5, verbose=1),
                    keras.callbacks.TensorBoard(log_dir=outDir),
                    keras.callbacks.LearningRateScheduler(lrScheduler),
                    EvaluateWER(model, testFeat, testBias, testTrans, outDir),
                    ModelSaver(model, outDir),
                ],
            )

        print("Using GPU: ", args.gpu)
        if args.gpu != "all":
            os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
            train_step()

        else:
            my_strategy = tf.distribute.MirroredStrategy()
            with my_strategy.scope():
                train_step()

    else:
        declare.is_file(args.predictModel)

        model = make_DNN_model(featDim, pdfDim, phoneDim)
        model.summary()

        model.load_weights(args.predictModel)

        print('Prepare test data')
        testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim)
        scorer = EvaluateWER(model, testFeat, testBias, testTrans, outDir)

        logs = {}
        scorer.on_epoch_end(5, logs)
Exemple #4
0
def output_probability():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.discribe(
        "This program is used to output DNN probability for realigning")
    # 2. Add options
    args.add("--expDir",
             abbr="-e",
             dtype=str,
             default="exp",
             discription="The data and output path of current experiment.")
    args.add("--dropout",
             abbr="-d",
             dtype=float,
             default=0.2,
             discription="Dropout.")
    args.add("--useCMVN",
             dtype=bool,
             default=False,
             discription="Wether apply CMVN to fmllr feature.")
    args.add(
        "--splice",
        dtype=int,
        default=10,
        discription="Splice how many frames to head and tail for Fmllr feature."
    )
    args.add("--delta",
             dtype=int,
             default=2,
             discription="Wether add delta to fmllr feature.")
    args.add("--normalizeFeat",
             dtype=bool,
             default=True,
             discription="Wether normalize the chunk dataset.")
    args.add("--predictModel",
             abbr="-m",
             dtype=str,
             default="",
             discription="If not void, skip training. Do decoding only.")
    # 3. Then start to parse arguments.
    args.parse()

    declare.is_file(args.predictModel)

    dims = exkaldi.load_list_table(f"{args.expDir}/train_dnn/data/dims")
    featDim = int(dims["fmllr"])
    pdfDim = int(dims["pdfs"])
    phoneDim = int(dims["phones"])

    # Initialize model
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)

    model = make_DNN_model(featDim, pdfDim, phoneDim)
    model.load_weights(args.predictModel)
    print(f"Restorage model from: {args.predictModel}")

    for Name in ["train", "test", "dev"]:
        print(f"Processing: {Name} dataset")
        feat = exkaldi.load_feat(
            f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark")

        if args.useCMVN:
            print("Apply CMVN")
            cmvn = exkaldi.load_cmvn(
                f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark")
            feat = exkaldi.use_cmvn(
                feat,
                cmvn,
                utt2spk=f"{args.expDir}/train_dnn/data/{Name}/utt2spk")
            del cmvn

        if args.delta > 0:
            print("Add delta to feature")
            feat = feat.add_delta(args.delta)

        if args.splice > 0:
            print("Splice feature")
            feat = feat.splice(args.splice)

        feat = feat.to_numpy()
        if args.normalizeFeat:
            print("Normalize")
            feat = feat.normalize(std=True)

        outProb = {}
        print("Forward model...")
        for utt, mat in feat.items():
            predPdf, predPhone = model(mat, training=False)
            outProb[utt] = exkaldi.nn.log_softmax(predPdf.numpy(), axis=1)

        #outProb = exkaldi.load_prob(outProb)
        #outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.npy")
        outProb = exkaldi.load_prob(outProb).to_bytes()
        outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.ark")
        print("Save done!")
Exemple #5
0
def main():

  # ------------- Parse arguments from command line ----------------------
  # 1. Add a discription of this program
  args.discribe("This program is used to train triphone LSTM scoustic model with Tensorflow") 
  # 2. Add options
  args.add("--expDir", abbr="-e", dtype=str, default="exp", discription="The data and output path of current experiment.")
  args.add("--LDAsplice", dtype=int, default=3, discription="Splice how many frames to head and tail for LDA feature.")
  args.add("--randomSeed", dtype=int, default=1234, discription="Random seed.")
  args.add("--batchSize", abbr="-b", dtype=int, default=8, discription="Mini batch size.")
  args.add("--gpu", abbr="-g", dtype=str, default="all", choices=["all","0","1"], discription="Use GPU.")
  args.add("--epoch", dtype=int, default=30, discription="Epoches.")
  args.add("--testStartEpoch", dtype=int, default=5, discription="Start to evaluate test dataset.")
  args.add("--dropout", abbr="-d", dtype=float, default=0.2, discription="Dropout.")
  args.add("--useCMVN", dtype=bool, default=False, discription="Wether apply CMVN to fmllr feature.")
  args.add("--splice", dtype=int, default=0, discription="Splice how many frames to head and tail for Fmllr feature.")
  args.add("--delta", dtype=int, default=2, discription="Wether add delta to fmllr feature.")
  args.add("--normalizeFeat", dtype=bool, default=True, discription="Wether normalize the chunk dataset.")
  args.add("--normalizeAMP", dtype=bool, default=False, discription="Wether normalize the post-probability.")
  args.add("--order", abbr="-o", dtype=int, default=6, discription="Language model order.")
  args.add("--beam", dtype=int, default=13, discription="Decode beam size.")
  args.add("--latBeam", dtype=int, default=6, discription="Lattice beam size.")
  args.add("--acwt", dtype=float, default=0.083333, discription="Acoustic model weight.")
  args.add("--predictModel", abbr="-m", dtype=str, default="", discription="If not void, skip training. Do decoding only.")
  # 3. Then start to parse arguments. 
  args.parse()
  # 4. Take a backup of arguments
  args.save( f"./{args.expDir}/conf/train_lstm.args" )

  random.seed(args.randomSeed)
  np.random.seed(args.randomSeed)
  tf.random.set_seed(args.randomSeed)

  # ------------- Prepare data for dnn training ----------------------
  if not os.path.isfile(f"./{args.expDir}/train_lstm/data/dims"):
    prepare_LSTM_data()

  # ------------- Prepare data for lstm training ----------------------
  stamp = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
  outDir = f"{args.expDir}/train_lstm/out_{stamp}"
  exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False)

  #------------------------ Training and Validation -----------------------------
  dims = exkaldi.load_list_table( f"{args.expDir}/train_lstm/data/dims" )
  featDim = int(dims["fmllr"])
  pdfDim = int(dims["pdfs"])
  phoneDim = int(dims["phones"])

  # Initialize model
  if args.delta > 0:
    featDim *= (args.delta+1)
  if args.splice > 0:
    featDim *= (2*args.splice+1)
  
  if len(args.predictModel.strip()) == 0:

    print('Prepare Data Iterator...')
    # Prepare fMLLR feature files
    trainIterator = DataIterator(batchSize=args.batchSize, training=True)
    devIterator = DataIterator(batchSize=args.batchSize, training=False)

    print('Prepare test data')
    testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim)

    metris = {
            "train_loss":keras.metrics.Mean(name="train/loss", dtype=tf.float32),
            "train_pdfID_accuracy":keras.metrics.Mean(name="train/pdfID_accuracy", dtype=tf.float32),
            "train_phoneID_accuracy":keras.metrics.Mean(name="train/phoneID_accuracy", dtype=tf.float32),
            "dev_loss":keras.metrics.Mean(name="eval/loss", dtype=tf.float32),
            "dev_pdfID_accuracy":keras.metrics.Mean(name="eval/pdfID_accuracy", dtype=tf.float32),
            "dev_phoneID_accuracy":keras.metrics.Mean(name="eval/phoneID_accuracy", dtype=tf.float32),
        }

    def train_step(model,optimizer,batch):
      feat, pdfAli, phoneAli = batch
      with tf.GradientTape() as tape:
        pdfPred, phonePred = model(feat, training=True)
        L1 = keras.losses.sparse_categorical_crossentropy(pdfAli, pdfPred, from_logits=True)
        L2 = keras.losses.sparse_categorical_crossentropy(phoneAli, phonePred, from_logits=True)
        loss = L1 + L2
      gradients = tape.gradient(loss, model.trainable_variables)
      optimizer.apply_gradients(zip(gradients, model.trainable_variables))

      metris["train_loss"](loss)

      #pdfPred = tf.convert_to_tensor(pdfPred, np.float32)
      A1 = keras.metrics.sparse_categorical_accuracy(pdfAli, pdfPred)
      metris["train_pdfID_accuracy"](A1)

      #phonePred = tf.convert_to_tensor(phonePred, np.float32)
      A2 = keras.metrics.sparse_categorical_accuracy(phoneAli, phonePred)
      metris["train_phoneID_accuracy"](A2)
      
      return float(np.mean(L1.numpy())), float(np.mean(L2.numpy())),float(np.mean(A1.numpy())),float(np.mean(A2.numpy()))
  
    def dev_step(model,batch):
      feat, pdfAli, phoneAli = batch
      pdfPred, phonePred = model(feat, training=False)
      L1 = keras.losses.sparse_categorical_crossentropy(pdfAli, pdfPred, from_logits=True)
      L2 = keras.losses.sparse_categorical_crossentropy(phoneAli, phonePred, from_logits=True)
      loss = L1 + L2

      metris["dev_loss"](loss)

      #pdfPred = tf.convert_to_tensor(pdfPred, np.float32)
      A1 = keras.metrics.sparse_categorical_accuracy(pdfAli, pdfPred)
      metris["dev_pdfID_accuracy"](A1)

      #phonePred = tf.convert_to_tensor(phonePred, np.float32)
      A2 = keras.metrics.sparse_categorical_accuracy(phoneAli, phonePred)
      metris["dev_phoneID_accuracy"](A2)      

      return float(np.mean(L1.numpy())), float(np.mean(L2.numpy())),float(np.mean(A1.numpy())),float(np.mean(A2.numpy()))

    def main_loop():

      model = make_LSTM_model(args, featDim, pdfDim, phoneDim)
      model.summary()

      optimizer = keras.optimizers.RMSprop(learning_rate=0.0004, rho=0.95, momentum=0.0, epsilon=1e-07)

      scorer = EvaluateWER( model, testFeat, testBias, testTrans, outDir)
      modelSaver = ModelSaver(model, outDir)

      for e in range(args.epoch):
          # Training
          startTime = time.time()
          for i in range(trainIterator.epochSize):
              batch = trainIterator.next()
              pdfLoss, phoneLoss, pdfAcc, phoneAcc = train_step(model, optimizer, batch)
              tf.print(f"\rtraining: {i}/{trainIterator.epochSize} pdfID loss {pdfLoss:.3f} phoneID loss {phoneLoss:.3f} pdfID accuracy {pdfAcc:.3f} phoneID accuracy {phoneAcc:.3f}", end="\t")
          tf.print()
          # Evaluate
          for i in range(devIterator.epochSize):
              batch = devIterator.next()
              pdfLoss, phoneLoss, pdfAcc, phoneAcc = dev_step(model, batch)
              tf.print(f"\revaluate: {i}/{devIterator.epochSize} pdfID loss {pdfLoss:.3f} phoneID loss {phoneLoss:.3f} pdfID accuracy {pdfAcc:.3f} phoneID accuracy {phoneAcc:.3f}", end="\t")
          tf.print()
          # Test
          tf.print("testing:", end=" ")
          testWER = scorer.test(e)
          tf.print()

          endTime = time.time()
          message = f"epoch {e} "
          for Name in metris.keys():
              message += f"{Name} {float(metris[Name].result().numpy()):.3f} "
              metris[Name].reset_states()
          message += f"test PER {testWER:.2f} time cost {int(endTime-startTime)}s"
          tf.print(message)
          
          modelSaver.save(e)
          
    main_loop()

  else:
    declare.is_file(args.predictModel)

    model = make_LSTM_model(featDim,pdfDim,phoneDim)
    model.summary()

    model.load_weights(args.predictModel)

    print('Prepare test data')
    testFeat, testBias, testTrans = prepare_test_data(postProbDim=pdfDim)
    scorer = EvaluateWER(model,testFeat,testBias,testTrans,outDir)

    scorer.test(0)