Example #1
0
def prepare_test_data(postProbDim):

  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" )

  if args.useCMVN:
    cmvn = exkaldi.load_cmvn( f"{args.expDir}/train_lstm/data/test/cmvn_of_fmllr.ark" )
    feat = exkaldi.use_cmvn(feat, cmvn, utt2spk=f"{args.expDir}/train_lstm/data/test/utt2spk")
    del cmvn

  if args.delta > 0:
    feat = feat.add_delta(args.delta)

  if args.splice > 0:
    feat = feat.splice(args.splice)

  feat = feat.to_numpy()
  if args.normalizeFeat:
    feat = feat.normalize(std=True)

  # Normalize acoustic model output
  if args.normalizeAMP:
    ali = exkaldi.load_ali(f"{args.expDir}/train_lstm/data/train/pdfID.npy", aliType="pdfID")
    normalizeBias = exkaldi.nn.compute_postprob_norm(ali,postProbDim)
  else:
    normalizeBias = 0
  
  # ref transcription
  trans = exkaldi.load_transcription(f"{args.expDir}/train_lstm/data/test/text")
  convertTable = exkaldi.load_list_table(f"{args.expDir}/dict/phones.48_to_39.map")
  trans = trans.convert(convertTable)

  return feat, normalizeBias, trans
Example #2
0
def process_feat_ali(training=True):

  if training:
    Name = "train"
  else:
    Name = "dev"

  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/{Name}/fmllr.ark" )

  if args.useCMVN:
      cmvn = exkaldi.load_cmvn(f"{args.expDir}/train_lstm/data/{Name}/cmvn_of_fmllr.ark")
      feat = exkaldi.use_cmvn(feat,cmvn,f"{args.expDir}/train_lstm/data/{Name}/utt2spk")
      del cmvn
  
  if args.delta > 0:
      feat = feat.add_delta(args.delta)

  if args.splice > 0:
      feat = feat.splice(args.splice)

  feat = feat.to_numpy()

  if args.normalizeFeat:
      feat = feat.normalize(std=True)
  
  pdfAli = exkaldi.load_ali( f"{args.expDir}/train_lstm/data/{Name}/pdfID.npy" )
  phoneAli = exkaldi.load_ali( f"{args.expDir}/train_lstm/data/{Name}/phoneID.npy" )
  
  feat.rename("feat")
  pdfAli.rename("pdfID")
  phoneAli.rename("phoneID")

  return feat, pdfAli, phoneAli
Example #3
0
def prepare_data(training=True):

    if training:
        flag = "train_clean_5"
    else:
        flag = "dev_clean_2"

    print(f"{flag}: Load feature...")
    featsFile = f"{args.root}/{args.feat}/raw_{args.feat}_{flag}.*.ark"
    feats = exkaldi.load_feat(featsFile)

    if args.cmn:
        print(f"{flag}: Use cmvn...")
        cmvnFile = f"{args.root}/{args.feat}/cmvn_{flag}.ark"
        cmvn = exkaldi.load_cmvn(cmvnFile)
        feats = exkaldi.use_cmvn(feats,
                                 cmvn,
                                 utt2spk=f"{args.root}/data/{flag}/utt2spk")
        del cmvn

    if args.delta > 0:
        print(f"{flag}: Add delta...")
        feats = feats.add_delta(args.delta)

    if args.splice > 0:
        print(f"{flag}: Splice feature...")
        feats = feats.splice(args.splice)

    feats = feats.to_numpy()
    featDim = feats.dim

    print(f"{flag}: Load alignment...")
    ali = exkaldi.load_ali(f"{args.feat}/exp/tri3b_ali_{flag}/ali.*.gz")
    print(f"{flag}: Get pdf alignment...")
    pdfAli = ali.to_numpy(aliType="pdfID",
                          hmm=f"{args.feat}/exp/tri3b_ali_{flag}/final.mdl")
    del ali

    feats.rename("feat")
    pdfAli.rename("pdfID")
    #phoneAli.rename("phoneID")
    print(f"{flag}: Tuple dataset...")
    dataset = exkaldi.tuple_dataset([feats, pdfAli], frameLevel=True)
    random.shuffle(dataset)

    return featDim, dataset
Example #4
0
def output_probability():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.discribe(
        "This program is used to output DNN probability for realigning")
    # 2. Add options
    args.add("--expDir",
             abbr="-e",
             dtype=str,
             default="exp",
             discription="The data and output path of current experiment.")
    args.add("--dropout",
             abbr="-d",
             dtype=float,
             default=0.2,
             discription="Dropout.")
    args.add("--useCMVN",
             dtype=bool,
             default=False,
             discription="Wether apply CMVN to fmllr feature.")
    args.add(
        "--splice",
        dtype=int,
        default=10,
        discription="Splice how many frames to head and tail for Fmllr feature."
    )
    args.add("--delta",
             dtype=int,
             default=2,
             discription="Wether add delta to fmllr feature.")
    args.add("--normalizeFeat",
             dtype=bool,
             default=True,
             discription="Wether normalize the chunk dataset.")
    args.add("--predictModel",
             abbr="-m",
             dtype=str,
             default="",
             discription="If not void, skip training. Do decoding only.")
    # 3. Then start to parse arguments.
    args.parse()

    declare.is_file(args.predictModel)

    dims = exkaldi.load_list_table(f"{args.expDir}/train_dnn/data/dims")
    featDim = int(dims["fmllr"])
    pdfDim = int(dims["pdfs"])
    phoneDim = int(dims["phones"])

    # Initialize model
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)

    model = make_DNN_model(featDim, pdfDim, phoneDim)
    model.load_weights(args.predictModel)
    print(f"Restorage model from: {args.predictModel}")

    for Name in ["train", "test", "dev"]:
        print(f"Processing: {Name} dataset")
        feat = exkaldi.load_feat(
            f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark")

        if args.useCMVN:
            print("Apply CMVN")
            cmvn = exkaldi.load_cmvn(
                f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark")
            feat = exkaldi.use_cmvn(
                feat,
                cmvn,
                utt2spk=f"{args.expDir}/train_dnn/data/{Name}/utt2spk")
            del cmvn

        if args.delta > 0:
            print("Add delta to feature")
            feat = feat.add_delta(args.delta)

        if args.splice > 0:
            print("Splice feature")
            feat = feat.splice(args.splice)

        feat = feat.to_numpy()
        if args.normalizeFeat:
            print("Normalize")
            feat = feat.normalize(std=True)

        outProb = {}
        print("Forward model...")
        for utt, mat in feat.items():
            predPdf, predPhone = model(mat, training=False)
            outProb[utt] = exkaldi.nn.log_softmax(predPdf.numpy(), axis=1)

        #outProb = exkaldi.load_prob(outProb)
        #outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.npy")
        outProb = exkaldi.load_prob(outProb).to_bytes()
        outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.ark")
        print("Save done!")
Example #5
0
def prepare_DNN_data():

    print("Start to prepare data for DNN training")
    assert os.path.isdir(f"{args.expDir}/train_sat"
                         ), "Please run previous programs up to SAT training."

    # Lexicons and Gmm-Hmm model
    lexicons = exkaldi.load_lex(f"{args.expDir}/dict/lexicons.lex")
    hmm = f"{args.expDir}/train_sat/final.mdl"
    tree = f"{args.expDir}/train_sat/tree"

    for Name in ["train", "dev", "test"]:

        exkaldi.utils.make_dependent_dirs(
            f"{args.expDir}/train_dnn/data/{Name}", pathIsFile=False)
        # Make LDA feature
        print(f"Make LDA feature for '{Name}'")
        feat = exkaldi.load_feat(f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark")
        feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice)
        feat = exkaldi.transform_feat(
            feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat")
        # Compile the aligning graph
        print(f"Compile aligning graph")
        transInt = exkaldi.hmm.transcription_to_int(
            transcription=f"{args.expDir}/data/{Name}/text",
            symbolTable=lexicons("words"),
            unkSymbol=lexicons("oov"),
        )
        graphFile = exkaldi.decode.wfst.compile_align_graph(
            hmm,
            tree,
            transcription=transInt,
            LFile=f"{args.expDir}/dict/L.fst",
            outFile=f"{args.expDir}/train_dnn/data/{Name}/align_graph",
            lexicons=lexicons,
        )
        # Align first time
        print(f"Align the first time")
        ali = exkaldi.decode.wfst.gmm_align(
            hmm,
            feat,
            alignGraphFile=graphFile,
            lexicons=lexicons,
        )
        # Estimate transform matrix
        print(f"Estimate fMLLR transform matrix")
        fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix(
            aliOrLat=ali,
            lexicons=lexicons,
            aliHmm=hmm,
            feat=feat,
            spk2utt=f"{args.expDir}/data/{Name}/spk2utt",
        )
        fmllrTransMat.save(f"{args.expDir}/train_dnn/data/{Name}/trans.ark")
        # Transform feature
        print(f"Transform feature")
        feat = exkaldi.use_fmllr(
            feat,
            fmllrTransMat,
            utt2spk=f"{args.expDir}/data/{Name}/utt2spk",
        )
        # Align second time with new feature
        print(f"Align the second time")
        ali = exkaldi.decode.wfst.gmm_align(
            hmm,
            feat,
            alignGraphFile=graphFile,
            lexicons=lexicons,
        )
        # Save alignment and feature
        print(f"Save final fmllr feature and alignment")
        feat.save(f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark")
        ali.save(f"{args.expDir}/train_dnn/data/{Name}/ali")
        # Transform alignment
        print(f"Generate pdf ID and phone ID alignment")
        ali.to_numpy(
            aliType="pdfID",
            hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/pdfID.npy")
        ali.to_numpy(
            aliType="phoneID",
            hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/phoneID.npy")
        del ali
        # Compute cmvn for fmllr feature
        print(f"Compute the CMVN for fmllr feature")
        cmvn = exkaldi.compute_cmvn_stats(
            feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt")
        cmvn.save(f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark")
        del cmvn
        del feat
        # copy spk2utt utt2spk and text file
        shutil.copyfile(f"{args.expDir}/data/{Name}/spk2utt",
                        f"{args.expDir}/train_dnn/data/{Name}/spk2utt")
        shutil.copyfile(f"{args.expDir}/data/{Name}/utt2spk",
                        f"{args.expDir}/train_dnn/data/{Name}/utt2spk")
        shutil.copyfile(f"{args.expDir}/data/{Name}/text",
                        f"{args.expDir}/train_dnn/data/{Name}/text")
        transInt.save(f"{args.expDir}/data/{Name}/text.int")

    print("Write feature and alignment dim information")
    dims = exkaldi.ListTable()
    feat = exkaldi.load_feat(f"{args.expDir}/train_dnn/data/test/fmllr.ark")
    dims["fmllr"] = feat.dim
    del feat
    hmm = exkaldi.hmm.load_hmm(f"{args.expDir}/train_sat/final.mdl")
    dims["phones"] = hmm.info.phones + 1
    dims["pdfs"] = hmm.info.pdfs
    del hmm
    dims.save(f"{args.expDir}/train_dnn/data/dims")
Example #6
0
def prepare_LSTM_data():

  print("Start to prepare data for LSTM training")
  declare.is_dir(f"{args.expDir}/train_dnn/prob", debug="Please run previous programs up to DNN training.")

  # Lexicons and Gmm-Hmm model
  lexicons = exkaldi.load_lex( f"{args.expDir}/dict/lexicons.lex" )
  hmm = f"{args.expDir}/train_sat/final.mdl"
  tree = f"{args.expDir}/train_sat/tree"

  for Name in ["train", "dev", "test"]:
    exkaldi.utils.make_dependent_dirs(f"{args.expDir}/train_lstm/data/{Name}", pathIsFile=False)
    # Load feature
    print(f"Make LDA feature for '{Name}'")
    feat = exkaldi.load_feat( f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark" )
    feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice)
    feat = exkaldi.transform_feat(feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat" )
    # Load probability for aligning( File has a large size, so we use index table. )
    prob = exkaldi.load_index_table( f"{args.expDir}/train_dnn/prob/{Name}.ark" )
    # Compile a aligning graph
    print(f"Copy aligning graph from DNN resources")
    shutil.copyfile( f"{args.expDir}/train_dnn/data/{Name}/align_graph",
                    f"{args.expDir}/train_lstm/data/{Name}/align_graph"
                  )
    # Align
    print("Align")
    ali = exkaldi.decode.wfst.nn_align(
                                    hmm,
                                    prob,
                                    alignGraphFile=f"{args.expDir}/train_lstm/data/{Name}/align_graph", 
                                    lexicons=lexicons,
                                    outFile=f"{args.expDir}/train_lstm/data/{Name}/ali",
                                )
    # Estimate transform matrix
    print("Estimate transform matrix")
    fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix(
                                aliOrLat=ali,
                                lexicons=lexicons,
                                aliHmm=hmm,
                                feat=feat,
                                spk2utt=f"{args.expDir}/data/{Name}/spk2utt",
                                outFile=f"{args.expDir}/train_lstm/data/{Name}/trans.ark",
                            )
    # Transform feature
    print("Transform matrix")
    feat = exkaldi.use_fmllr(
                        feat,
                        fmllrTransMat,
                        utt2spk=f"{args.expDir}/data/{Name}/utt2spk",
                        outFile=f"{args.expDir}/train_lstm/data/{Name}/fmllr.ark",
                    )
    # Transform alignment (Because 'ali' is a index table object, we need fetch the alignment data in order to use the 'to_numpy' method.)
    ali = ali.fetch(arkType="ali")
    ali.to_numpy(aliType="pdfID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/pdfID.npy" )
    ali.to_numpy(aliType="phoneID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/phoneID.npy" )
    del ali
    # Compute cmvn for fmllr feature
    cmvn = exkaldi.compute_cmvn_stats(
                                  feat, 
                                  spk2utt=f"{args.expDir}/data/{Name}/spk2utt",
                                  outFile=f"{args.expDir}/train_lstm/data/{Name}/cmvn_of_fmllr.ark",
                                )
    del cmvn
    del feat
    # copy spk2utt utt2spk and text file
    shutil.copyfile( f"{args.expDir}/data/{Name}/spk2utt", f"{args.expDir}/train_lstm/data/{Name}/spk2utt")
    shutil.copyfile( f"{args.expDir}/data/{Name}/utt2spk", f"{args.expDir}/train_lstm/data/{Name}/utt2spk")
    shutil.copyfile( f"{args.expDir}/data/{Name}/text", f"{args.expDir}/train_lstm/data/{Name}/text" )

  print("Write feature and alignment dim information")
  dims = exkaldi.ListTable()
  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" ) 
  dims["fmllr"] = feat.dim
  del feat
  hmm = exkaldi.hmm.load_hmm( f"{args.expDir}/train_sat/final.mdl" )
  dims["phones"] = hmm.info.phones + 1
  dims["pdfs"] = hmm.info.pdfs
  del hmm
  dims.save( f"{args.expDir}/train_lstm/data/dims" )
Example #7
0
def compute_dev_wer():

  flag = "dev_clean_2"
  
  featsFile = f"{args.root}/{args.feat}/raw_{args.feat}_{flag}.*.ark"
  feats = exkaldi.load_feat(featsFile)

  if args.cmn:
    print("Use cmvn...")
    cmvnFile = f"{args.root}/{args.feat}/cmvn_{flag}.ark"
    cmvn = exkaldi.load_cmvn(cmvnFile)
    feats = exkaldi.use_cmvn(feats,cmvn,utt2spk=f"{args.root}/data/{flag}/utt2spk")
    del cmvn

  if args.delta > 0:
    print("Add delta...")
    feats = feats.add_delta(args.delta)

  if args.splice > 0:
    print("Splice feature...")
    feats = feats.splice(args.splice)
  
  feats = feats.to_numpy()
  featDim = feats.dim

  hmm = exkaldi.load_hmm(f"{args.root}/exp/tri3b_ali_train_clean_5/final.mdl")
  pdfDim = hmm.info.pdfs
  phoneDim = hmm.info.phones
  del hmm
  
  print("featDim:",featDim,"pdfDim:",pdfDim,"phoneDim:",phoneDim)
  minWER = None

  try:
    for ModelPathID in range(args.epoch,0,-1):
      #ModelPathID = args.epoch
      ModelPath = f"{args.testModelDir}/model_ep{ModelPathID}.h5"
      if not os.path.isfile(ModelPath):
        continue

      print("Use Model:",ModelPath)
      decodeOut = ModelPath[:-3]
      exkaldi.utils.make_dependent_dirs(decodeOut,pathIsFile=False)

      model = make_DNN_acoustic_model(featDim,pdfDim)
      model.load_weights(ModelPath)

      print("Forward...")
      result = {}
      for uttID in feats.keys():
        pdfP = model(feats[uttID],training=False)
        result[uttID] = exkaldi.nn.log_softmax(pdfP.numpy(),axis=1)

      amp = exkaldi.load_prob(result)
      hmmFile = f"{args.root}/exp/tri3b_ali_dev_clean_2/final.mdl"
      HCLGFile = f"{args.root}/exp/tri3b/graph_tgsmall/HCLG.fst"
      table = f"{args.root}/exp/tri3b/graph_tgsmall/words.txt"
      trans = f"{args.root}/data/dev_clean_2/text"

      print("Decoding...")
      lat = exkaldi.decode.wfst.nn_decode(
                                          prob=amp.subset(chunks=4), 
                                          hmm=hmmFile, 
                                          HCLGFile=HCLGFile, 
                                          symbolTable=table,
                                          beam=10,
                                          latBeam=8,
                                          acwt=0.1,
                                          minActive=200,
                                          maxActive=7000,
                                          outFile=os.path.join(decodeOut,"lat")
                                        )
      lat = exkaldi.merge_archives(lat)

      print("Scoring...")
      for LMWT in range(1,10,1):
        #newLat = lat.add_penalty(penalty)
        result = lat.get_1best(table,hmmFile,lmwt=LMWT,acwt=0.1,phoneLevel=False)
        result = exkaldi.hmm.transcription_from_int(result,table)
        result.save( os.path.join(decodeOut,f"trans.{LMWT}") )

        score = exkaldi.decode.score.wer(ref=trans,hyp=result,mode="present")
        print("LMWT: ",LMWT ,"WER: ",score.WER)
        if minWER == None or score.WER < minWER[0]:
          minWER = (score.WER, LMWT, ModelPath)
  finally:
    if minWER is not None:
      werOut = os.path.basename(decodeOut)
      print("Best WER:",minWER)
      with open(f"{args.testModelDir}/best_wer","w") as fw:
        fw.write(str(minWER))
Example #8
0
def GMM_decode_fmllr_and_score(outDir, hmm, HCLGfile, tansformMatFile=None):

    exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False)

    lexicons = exkaldi.decode.graph.load_lex(
        os.path.join("exp", "dict", "lexicons.lex"))
    print(f"Load test feature.")
    featFile = os.path.join("exp", "mfcc", "test", "mfcc_cmvn.ark")
    feat = exkaldi.load_feat(featFile)
    if tansformMatFile is None:
        print("Feature type is delta")
        feat = feat.add_delta(order=2)
        print("Add 2-order deltas.")
    else:
        print("Feature type is lda+mllt")
        feat = feat.splice(left=3, right=3)
        feat = exkaldi.transform_feat(feat, tansformMatFile)
        print("Transform feature")

    ## 1. Estimate the primary transform matrix from alignment or lattice.
    ## We estimate it from lattice, so we decode it firstly.
    print("Decode the first time with original feature.")
    preLat = exkaldi.decode.wfst.gmm_decode(
        feat,
        hmm,
        HCLGfile,
        wordSymbolTable=lexicons("words"),
        beam=10,
        latBeam=6,
        acwt=0.083333,
        maxActive=2000,
    )
    preLat.save(os.path.join(outDir, "test_premary.lat"))

    print("Estimate the primary fMLLR transform matrix.")
    preTransMatrix = exkaldi.hmm.estimate_fMLLR_matrix(
        aliOrLat=preLat,
        lexicons=lexicons,
        aliHmm=hmm,
        feat=feat,
        adaHmm=None,
        silenceWeight=0.01,
        acwt=0.083333,
        spk2utt=os.path.join("exp", "data", "test", "spk2utt"),
    )
    del preLat
    ## 2. Transform feature. We will use new feature to estimate the secondary transform matrix from lattice.
    print("Transform feature with primary matrix.")
    fmllrFeat = exkaldi.use_fmllr(
        feat,
        preTransMatrix,
        utt2spkFile=os.path.join("exp", "data", "test", "utt2spk"),
    )
    print("Decode the second time with primary fmllr feature.")
    secLat = exkaldi.decode.wfst.gmm_decode(
        fmllrFeat,
        hmm,
        HCLGfile,
        wordSymbolTable=lexicons("words"),
        beam=13,
        latBeam=6,
        acwt=0.083333,
        maxActive=7000,
        config={"--determinize-lattice": "false"},
    )
    print("Determinize secondary lattice.")
    thiLat = secLat.determinize(acwt=0.083333, beam=4)
    print("Estimate the secondary fMLLR transform matrix.")
    secTransMatrix = exkaldi.hmm.estimate_fMLLR_matrix(
        aliOrLat=thiLat,
        lexicons=lexicons,
        aliHmm=hmm,
        feat=fmllrFeat,
        adaHmm=None,
        silenceWeight=0.01,
        acwt=0.083333,
        spk2utt=os.path.join("exp", "data", "test", "spk2utt"),
    )
    del fmllrFeat
    del thiLat
    ## 3. Compose the primary matrix and secondary matrix and get the final transform matrix.
    print("Compose the primary and secondary transform matrix.")
    finalTransMatrix = exkaldi.hmm.compose_transform_matrixs(
        matA=preTransMatrix,
        matB=secTransMatrix,
        bIsAffine=True,
    )
    finalTransMatrix.save(os.path.join(outDir, "trans.ark"))
    print("Transform feature with final matrix.")
    ## 4. Transform feature with the final transform matrix and use it to decode.
    ## We directly use the lattice generated in the second step. The final lattice is obtained.
    finalFmllrFeat = exkaldi.use_fmllr(
        feat,
        finalTransMatrix,
        utt2spkFile=os.path.join("exp", "data", "test", "utt2spk"),
    )
    del finalTransMatrix
    print("Rescore secondary lattice.")
    lat = secLat.am_rescore(
        hmm=hmm,
        feat=finalFmllrFeat,
    )
    print("Determinize secondary lattice.")
    lat = lat.determinize(acwt=0.083333, beam=6)
    lat.save(os.path.join(outDir, "test.lat"))
    print("Generate lattice done.")

    phoneMapFile = os.path.join("exp", "dict", "phones.48_to_39.map")
    phoneMap = exkaldi.ListTable(name="48-39").load(phoneMapFile)
    refText = exkaldi.load_trans(os.path.join("exp", "data", "test",
                                              "text")).convert(phoneMap, None)
    refText.save(os.path.join(outDir, "ref.txt"))
    print("Generate reference text done.")

    print("Now score:")
    bestWER = (1000, 0, 0)
    bestResult = None
    for penalty in [0., 0.5, 1.0]:
        for LMWT in range(1, 11):
            # Add penalty
            newLat = lat.add_penalty(penalty)
            # Get 1-best result (word-level)
            result = newLat.get_1best(lexicons("words"),
                                      hmm,
                                      lmwt=LMWT,
                                      acwt=1)
            # Transform from int value format to text format
            result = exkaldi.hmm.transcription_from_int(
                result, lexicons("words"))
            # Transform 48-phones to 39-phones
            result = result.convert(phoneMap, None)
            # Compute WER
            score = exkaldi.decode.score.wer(ref=refText,
                                             hyp=result,
                                             mode="present")
            if score.WER < bestWER[0]:
                bestResult = result
                bestWER = (score.WER, penalty, LMWT)
            print(f"Penalty: {penalty}, LMWT: {LMWT}, WER: {score.WER}%")
    print("Score done. Save the best result.")
    bestResult.save(os.path.join(outDir, "hyp.txt"))
    with open(os.path.join(outDir, "best_WER"), "w") as fw:
        fw.write(f"WER {bestWER[0]}, penalty {bestWER[1]}, LMWT {bestWER[2]}")
Example #9
0
def GMM_decode_mfcc_and_score(outDir, hmm, HCLGfile, tansformMatFile=None):

    exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False)

    lexicons = exkaldi.decode.graph.load_lex(
        os.path.join("exp", "dict", "lexicons.lex"))
    print(f"Load test feature.")
    featFile = os.path.join("exp", "mfcc", "test", "mfcc_cmvn.ark")
    feat = exkaldi.load_feat(featFile)
    if tansformMatFile is None:
        print("Feature type is delta")
        feat = feat.add_delta(order=2)
        print("Add 2-order deltas.")
    else:
        print("Feature type is lda+mllt")
        feat = feat.splice(left=3, right=3)
        feat = exkaldi.transform_feat(feat, tansformMatFile)
        print("Transform feature")

    print("Start to decode")
    lat = exkaldi.decode.wfst.gmm_decode(feat,
                                         hmm,
                                         HCLGfile,
                                         wordSymbolTable=lexicons("words"),
                                         beam=13,
                                         latBeam=6,
                                         acwt=0.083333)
    lat.save(os.path.join(outDir, "test.lat"))
    print(f"Generate lattice done.")

    phoneMapFile = os.path.join("exp", "dict", "phones.48_to_39.map")
    phoneMap = exkaldi.ListTable(name="48-39").load(phoneMapFile)
    refText = exkaldi.load_trans(os.path.join("exp", "data", "test",
                                              "text")).convert(phoneMap, None)
    refText.save(os.path.join(outDir, "ref.txt"))
    print("Generate reference text done.")

    print("Now score:")
    bestWER = (1000, 0, 0)
    bestResult = None
    for penalty in [0., 0.5, 1.0]:
        for LMWT in range(1, 11):
            # Add penalty
            newLat = lat.add_penalty(penalty)
            # Get 1-best result (word-level)
            result = newLat.get_1best(lexicons("words"),
                                      hmm,
                                      lmwt=LMWT,
                                      acwt=1)
            # Transform from int value format to text format
            result = exkaldi.hmm.transcription_from_int(
                result, lexicons("words"))
            # Transform 48-phones to 39-phones
            result = result.convert(phoneMap, None)
            # Compute WER
            score = exkaldi.decode.score.wer(ref=refText,
                                             hyp=result,
                                             mode="present")
            if score.WER < bestWER[0]:
                bestResult = result
                bestWER = (score.WER, penalty, LMWT)
            print(f"Penalty: {penalty}, LMWT: {LMWT}, WER: {score.WER}%")
    print("Score done. Save the best result.")
    bestResult.save(os.path.join(outDir, "hyp.txt"))
    with open(os.path.join(outDir, "best_WER"), "w") as fw:
        fw.write(f"WER {bestWER[0]}, penalty {bestWER[1]}, LMWT {bestWER[2]}")