Beispiel #1
0
def prepare_test_data(postProbDim):

  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" )

  if args.useCMVN:
    cmvn = exkaldi.load_cmvn( f"{args.expDir}/train_lstm/data/test/cmvn_of_fmllr.ark" )
    feat = exkaldi.use_cmvn(feat, cmvn, utt2spk=f"{args.expDir}/train_lstm/data/test/utt2spk")
    del cmvn

  if args.delta > 0:
    feat = feat.add_delta(args.delta)

  if args.splice > 0:
    feat = feat.splice(args.splice)

  feat = feat.to_numpy()
  if args.normalizeFeat:
    feat = feat.normalize(std=True)

  # Normalize acoustic model output
  if args.normalizeAMP:
    ali = exkaldi.load_ali(f"{args.expDir}/train_lstm/data/train/pdfID.npy", aliType="pdfID")
    normalizeBias = exkaldi.nn.compute_postprob_norm(ali,postProbDim)
  else:
    normalizeBias = 0
  
  # ref transcription
  trans = exkaldi.load_transcription(f"{args.expDir}/train_lstm/data/test/text")
  convertTable = exkaldi.load_list_table(f"{args.expDir}/dict/phones.48_to_39.map")
  trans = trans.convert(convertTable)

  return feat, normalizeBias, trans
Beispiel #2
0
def process_feat_ali(training=True):

  if training:
    Name = "train"
  else:
    Name = "dev"

  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/{Name}/fmllr.ark" )

  if args.useCMVN:
      cmvn = exkaldi.load_cmvn(f"{args.expDir}/train_lstm/data/{Name}/cmvn_of_fmllr.ark")
      feat = exkaldi.use_cmvn(feat,cmvn,f"{args.expDir}/train_lstm/data/{Name}/utt2spk")
      del cmvn
  
  if args.delta > 0:
      feat = feat.add_delta(args.delta)

  if args.splice > 0:
      feat = feat.splice(args.splice)

  feat = feat.to_numpy()

  if args.normalizeFeat:
      feat = feat.normalize(std=True)
  
  pdfAli = exkaldi.load_ali( f"{args.expDir}/train_lstm/data/{Name}/pdfID.npy" )
  phoneAli = exkaldi.load_ali( f"{args.expDir}/train_lstm/data/{Name}/phoneID.npy" )
  
  feat.rename("feat")
  pdfAli.rename("pdfID")
  phoneAli.rename("phoneID")

  return feat, pdfAli, phoneAli
Beispiel #3
0
def prepare_data(training=True):

    if training:
        flag = "train_clean_5"
    else:
        flag = "dev_clean_2"

    print(f"{flag}: Load feature...")
    featsFile = f"{args.root}/{args.feat}/raw_{args.feat}_{flag}.*.ark"
    feats = exkaldi.load_feat(featsFile)

    if args.cmn:
        print(f"{flag}: Use cmvn...")
        cmvnFile = f"{args.root}/{args.feat}/cmvn_{flag}.ark"
        cmvn = exkaldi.load_cmvn(cmvnFile)
        feats = exkaldi.use_cmvn(feats,
                                 cmvn,
                                 utt2spk=f"{args.root}/data/{flag}/utt2spk")
        del cmvn

    if args.delta > 0:
        print(f"{flag}: Add delta...")
        feats = feats.add_delta(args.delta)

    if args.splice > 0:
        print(f"{flag}: Splice feature...")
        feats = feats.splice(args.splice)

    feats = feats.to_numpy()
    featDim = feats.dim

    print(f"{flag}: Load alignment...")
    ali = exkaldi.load_ali(f"{args.feat}/exp/tri3b_ali_{flag}/ali.*.gz")
    print(f"{flag}: Get pdf alignment...")
    pdfAli = ali.to_numpy(aliType="pdfID",
                          hmm=f"{args.feat}/exp/tri3b_ali_{flag}/final.mdl")
    del ali

    feats.rename("feat")
    pdfAli.rename("pdfID")
    #phoneAli.rename("phoneID")
    print(f"{flag}: Tuple dataset...")
    dataset = exkaldi.tuple_dataset([feats, pdfAli], frameLevel=True)
    random.shuffle(dataset)

    return featDim, dataset
Beispiel #4
0
def output_probability():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.discribe(
        "This program is used to output DNN probability for realigning")
    # 2. Add options
    args.add("--expDir",
             abbr="-e",
             dtype=str,
             default="exp",
             discription="The data and output path of current experiment.")
    args.add("--dropout",
             abbr="-d",
             dtype=float,
             default=0.2,
             discription="Dropout.")
    args.add("--useCMVN",
             dtype=bool,
             default=False,
             discription="Wether apply CMVN to fmllr feature.")
    args.add(
        "--splice",
        dtype=int,
        default=10,
        discription="Splice how many frames to head and tail for Fmllr feature."
    )
    args.add("--delta",
             dtype=int,
             default=2,
             discription="Wether add delta to fmllr feature.")
    args.add("--normalizeFeat",
             dtype=bool,
             default=True,
             discription="Wether normalize the chunk dataset.")
    args.add("--predictModel",
             abbr="-m",
             dtype=str,
             default="",
             discription="If not void, skip training. Do decoding only.")
    # 3. Then start to parse arguments.
    args.parse()

    declare.is_file(args.predictModel)

    dims = exkaldi.load_list_table(f"{args.expDir}/train_dnn/data/dims")
    featDim = int(dims["fmllr"])
    pdfDim = int(dims["pdfs"])
    phoneDim = int(dims["phones"])

    # Initialize model
    if args.delta > 0:
        featDim *= (args.delta + 1)
    if args.splice > 0:
        featDim *= (2 * args.splice + 1)

    model = make_DNN_model(featDim, pdfDim, phoneDim)
    model.load_weights(args.predictModel)
    print(f"Restorage model from: {args.predictModel}")

    for Name in ["train", "test", "dev"]:
        print(f"Processing: {Name} dataset")
        feat = exkaldi.load_feat(
            f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark")

        if args.useCMVN:
            print("Apply CMVN")
            cmvn = exkaldi.load_cmvn(
                f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark")
            feat = exkaldi.use_cmvn(
                feat,
                cmvn,
                utt2spk=f"{args.expDir}/train_dnn/data/{Name}/utt2spk")
            del cmvn

        if args.delta > 0:
            print("Add delta to feature")
            feat = feat.add_delta(args.delta)

        if args.splice > 0:
            print("Splice feature")
            feat = feat.splice(args.splice)

        feat = feat.to_numpy()
        if args.normalizeFeat:
            print("Normalize")
            feat = feat.normalize(std=True)

        outProb = {}
        print("Forward model...")
        for utt, mat in feat.items():
            predPdf, predPhone = model(mat, training=False)
            outProb[utt] = exkaldi.nn.log_softmax(predPdf.numpy(), axis=1)

        #outProb = exkaldi.load_prob(outProb)
        #outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.npy")
        outProb = exkaldi.load_prob(outProb).to_bytes()
        outProb.save(f"{args.expDir}/train_dnn/prob/{Name}.ark")
        print("Save done!")
Beispiel #5
0
def compute_dev_wer():

  flag = "dev_clean_2"
  
  featsFile = f"{args.root}/{args.feat}/raw_{args.feat}_{flag}.*.ark"
  feats = exkaldi.load_feat(featsFile)

  if args.cmn:
    print("Use cmvn...")
    cmvnFile = f"{args.root}/{args.feat}/cmvn_{flag}.ark"
    cmvn = exkaldi.load_cmvn(cmvnFile)
    feats = exkaldi.use_cmvn(feats,cmvn,utt2spk=f"{args.root}/data/{flag}/utt2spk")
    del cmvn

  if args.delta > 0:
    print("Add delta...")
    feats = feats.add_delta(args.delta)

  if args.splice > 0:
    print("Splice feature...")
    feats = feats.splice(args.splice)
  
  feats = feats.to_numpy()
  featDim = feats.dim

  hmm = exkaldi.load_hmm(f"{args.root}/exp/tri3b_ali_train_clean_5/final.mdl")
  pdfDim = hmm.info.pdfs
  phoneDim = hmm.info.phones
  del hmm
  
  print("featDim:",featDim,"pdfDim:",pdfDim,"phoneDim:",phoneDim)
  minWER = None

  try:
    for ModelPathID in range(args.epoch,0,-1):
      #ModelPathID = args.epoch
      ModelPath = f"{args.testModelDir}/model_ep{ModelPathID}.h5"
      if not os.path.isfile(ModelPath):
        continue

      print("Use Model:",ModelPath)
      decodeOut = ModelPath[:-3]
      exkaldi.utils.make_dependent_dirs(decodeOut,pathIsFile=False)

      model = make_DNN_acoustic_model(featDim,pdfDim)
      model.load_weights(ModelPath)

      print("Forward...")
      result = {}
      for uttID in feats.keys():
        pdfP = model(feats[uttID],training=False)
        result[uttID] = exkaldi.nn.log_softmax(pdfP.numpy(),axis=1)

      amp = exkaldi.load_prob(result)
      hmmFile = f"{args.root}/exp/tri3b_ali_dev_clean_2/final.mdl"
      HCLGFile = f"{args.root}/exp/tri3b/graph_tgsmall/HCLG.fst"
      table = f"{args.root}/exp/tri3b/graph_tgsmall/words.txt"
      trans = f"{args.root}/data/dev_clean_2/text"

      print("Decoding...")
      lat = exkaldi.decode.wfst.nn_decode(
                                          prob=amp.subset(chunks=4), 
                                          hmm=hmmFile, 
                                          HCLGFile=HCLGFile, 
                                          symbolTable=table,
                                          beam=10,
                                          latBeam=8,
                                          acwt=0.1,
                                          minActive=200,
                                          maxActive=7000,
                                          outFile=os.path.join(decodeOut,"lat")
                                        )
      lat = exkaldi.merge_archives(lat)

      print("Scoring...")
      for LMWT in range(1,10,1):
        #newLat = lat.add_penalty(penalty)
        result = lat.get_1best(table,hmmFile,lmwt=LMWT,acwt=0.1,phoneLevel=False)
        result = exkaldi.hmm.transcription_from_int(result,table)
        result.save( os.path.join(decodeOut,f"trans.{LMWT}") )

        score = exkaldi.decode.score.wer(ref=trans,hyp=result,mode="present")
        print("LMWT: ",LMWT ,"WER: ",score.WER)
        if minWER == None or score.WER < minWER[0]:
          minWER = (score.WER, LMWT, ModelPath)
  finally:
    if minWER is not None:
      werOut = os.path.basename(decodeOut)
      print("Best WER:",minWER)
      with open(f"{args.testModelDir}/best_wer","w") as fw:
        fw.write(str(minWER))