def generate_data(wavFiles, expDir, sphFlag, sph2pipeTool, txtFileSuffix, phoneMap_60_to_48): wavScp = exkaldi.ListTable(name="wavScp") utt2spk = exkaldi.ListTable(name="utt2spk") spk2utt = exkaldi.ListTable(name="spk2utt") transcription = exkaldi.Transcription(name="trans") for Name in wavFiles: if Name[-7:].upper() in ["SA1.WAV", "SA2.WAV", "sa1.wav", "sa2.wav"]: continue speaker = os.path.basename(os.path.dirname(Name)) uttID = speaker + "_" + os.path.basename(Name)[0:-4] wavFilePath = os.path.abspath(Name) # 1. wav.scp if sphFlag: wavScp[uttID] = f"{sph2pipeTool} -f wav {wavFilePath} |" else: wavScp[uttID] = wavFilePath # 2. utt2spk utt2spk[uttID] = speaker # 3. spk2utt if speaker not in spk2utt.keys(): spk2utt[speaker] = f"{uttID}" else: spk2utt[speaker] += f" {uttID}" # 4. transcription txtFile = Name[:-3] + txtFileSuffix phones = [] with open(txtFile, "r", encoding="utf-8") as fr: lines = fr.readlines() for line in lines: line = line.strip() if len(line) == 0: continue phone = line.split()[-1] if phone == "q": # discard phone "q" continue else: phone = phoneMap_60_to_48[phone] phones.append(phone) transcription[uttID] = " ".join(phones) # Save to files wavScp.save(os.path.join(expDir, "wav.scp")) utt2spk.save(os.path.join(expDir, "utt2spk")) spk2utt.save(os.path.join(expDir, "spk2utt")) transcription.save(os.path.join(expDir, "text")) print(f"Generate data done: {expDir}.")
def main(): # ------------- Parse arguments from command line ---------------------- # 1. Add a discription of this program args.discribe( "This program is used to make dictionaries and language model") # 2. Add options args.add( "--expDir", abbr="-e", dtype=str, default="exp", discription="The data resources and output path of current experiment." ) args.add("--order", abbr="-o", dtype=int, default=6, minV=1, maxV=6, discription="The maximum order of N-grams language model.") # 3. Then start to parse arguments. args.parse() # 4. Take a backup of arguments args.print_args() # print arguments to display argsLogFile = os.path.join(args.expDir, "conf", "make_dict_and_LM.args") args.save(argsLogFile) # ------- Make the word-pronumciation lexicon file ------ textFile = os.path.join(args.expDir, "data", "train", "text") trainTrans = exkaldi.load_transcription( textFile) # "trans" is an exkaldi Transcription object wordCount = trainTrans.count_word().sort( ) # accumulate all words and their frequency in the transcription word2pron = exkaldi.ListTable( dict((word, word) for word in wordCount.keys())) # word to pronunciation pronFile = os.path.join(args.expDir, "dict", "pronunciation.txt") word2pron.save(pronFile) # save it to file # ------- Make lexicons ------ # 1. Generate the LexiconBank object from word-pronumciation file. # Depending on task, about 20 lexicons will be generated and managed by the LexiconBank. lexicons = exkaldi.decode.graph.lexicon_bank(pronFile, silWords={"sil": "sil"}, unkSymbol={"sil": "sil"}, optionalSilPhone="sil", extraQuestions=[], positionDependent=False, shareSilPdf=False, extraDisambigPhoneNumbers=1, extraDisambigWords=[]) # 2. Add two extra questions. lexicons.add_extra_question(lexicons("silence_phones")) lexicons.add_extra_question(lexicons("nonsilence_phones")) # 3. Save this lexicon bank for future use. lexicons.save(os.path.join(args.expDir, "dict", "lexicons.lex")) print(f"Generate lexicon bank done.") # ------- Make Lexicon fst ------ # 1. Generate the Lexicon fst exkaldi.decode.graph.make_L(lexicons, outFile=os.path.join(args.expDir, "dict", "L.fst"), useSilprob=0.0, useDisambigLexicon=False) print(f"Generate lexicon fst done.") # 1. Generate the disambig Lexicon fst exkaldi.decode.graph.make_L(lexicons, outFile=os.path.join(args.expDir, "dict", "L_disambig.fst"), useSilprob=0.0, useDisambigLexicon=True) print(f"Generate disambiguation lexicon fst done.") # ------- Make GMM-HMM topological structure for GMM-HMM ------ exkaldi.hmm.make_topology( lexicons, outFile=os.path.join(args.expDir, "dict", "topo"), numNonsilStates=3, numSilStates=5, ) print(f"Generate topo file done.") # ------- Train N-Grams language model ------ # 1. Train a LM. # We have trained 2,3,4 grams model with both srilm and kenlm and chose the best one, which is 3-grams model back kenlm. # So we directly train this one. exkaldi.lm.train_ngrams_kenlm( lexicons, order=args.order, text= trainTrans, # If "text" received an exkaldi Transcription object, the information of utterance IDs will be omitted automatically. outFile=os.path.join(args.expDir, "lm", f"{args.order}grams.arpa"), config={ "--discount_fallback": True, "-S": "20%" }, ) print(f"Generate ARPA language model done.") # 2. Then test this model by compute the perplexity. exkaldi.lm.arpa_to_binary( arpaFile=os.path.join(args.expDir, "lm", f"{args.order}grams.arpa"), outFile=os.path.join(args.expDir, "lm", f"{args.order}grams.binary"), ) model = exkaldi.load_ngrams( os.path.join(args.expDir, "lm", f"{args.order}grams.binary") ) # Actually, "load_ngrams" function also accepts ARPA format file. # 3. Prepare test transcription testTrans = exkaldi.load_transcription( os.path.join(args.expDir, "data", "test", "text")) # 4. score perScore = model.perplexity(testTrans) print(f"The weighted average perplexity of this model is: {perScore}.") del model del testTrans # ------- Make Grammar fst ------ exkaldi.decode.graph.make_G( lexicons, arpaFile=os.path.join(args.expDir, "lm", f"{args.order}grams.arpa"), outFile=os.path.join(args.expDir, "lm", f"G.{args.order}.fst"), order=args.order) print(f"Make Grammar fst done.") # ------- Compose LG fst for futher use ------ exkaldi.decode.graph.compose_LG( LFile=os.path.join(args.expDir, "dict", "L_disambig.fst"), GFile=os.path.join(args.expDir, "lm", f"G.{args.order}.fst"), outFile=os.path.join(args.expDir, "lm", f"LG.{args.order}.fst"), ) print(f"Compose LG fst done.")
def main(): # ------------- Parse arguments from command line ---------------------- # 1. Add a discription of this program args.discribe("This program is used to prepare TIMIT data.") # 2. Add some options args.add("--timitRoot", dtype=str, abbr="-t", default="/Corpus/TIMIT", discription="The root path of timit dataset.") args.add("--expDir", dtype=str, abbr="-e", default="exp", discription="The output path to save generated data.") # 3. Then start to parse arguments. args.parse() # 4. Take a backup of arguments args.save( os.path.join(args.expDir,"conf","prepare_data.args") ) # ------------- Do some preparative work ---------------------- # 2. Ensure Kaldi has existed declare.kaldi_existed() # 3. sph2pipe tool will be used if the timit data is sph format. sph2pipeTool = os.path.join(info.KALDI_ROOT,"tools","sph2pipe_v2.5","sph2pipe") declare.is_file("sph2pipe tool",sph2pipeTool) # ------------- Check TIMIT data format ------------- # 1. Get the directory name declare.is_dir("TIMIT root directory", args.timitRoot) dirNames = os.listdir(args.timitRoot) if "TRAIN" in dirNames and "TEST" in dirNames: uppercaseFlag = True trainResourceDir = "TRAIN" testResourceDir = "TEST" testWavFile = os.path.join(args.timitRoot,"TRAIN","DR1","FCJF0","SA1.WAV") # used to test the file format wavFileSuffix = "WAV" txtFileSuffix = "PHN" elif "train" in dirNames and "test" in dirNames: uppercaseFlag = False trainResourceDir = "train" testResourceDir = "test" testWavFile = os.path.join(args.timitRoot,"train","dr1","fcjf0","sa1.wav") # used to test the file format wavFileSuffix = "wav" txtFileSuffix = "phn" else: raise Exception(f"Wrong format of train or test data directories.") # 2. check whether wave file is sph format. formatCheckCmd = f"{sph2pipeTool} -f wav {testWavFile}" out,err,cod = exkaldi.utils.run_shell_command(formatCheckCmd, stderr="PIPE") if cod == 0: sphFlag = True else: sphFlag = False # --------- Generate phone-map dictionary -------- # 1. Generate 60-48 catagories and 48-39 catagories mapping dictionary phoneMap_60_to_48 = exkaldi.ListTable(name="69-48") phoneMap_48_to_39 = exkaldi.ListTable(name="48-39") mapFile = os.path.join(info.KALDI_ROOT,"egs","timit","s5","conf","phones.60-48-39.map") declare.is_file("60-48-39 phone map", mapFile) # Check whether or not it existed with open(mapFile,"r",encoding="utf-8") as fr: lines = fr.readlines() for line in lines: line = line.strip().split() if len(line) < 3: #phone "q" will be omitted temporarily. continue phoneMap_60_to_48[line[0]] = line[1] phoneMap_48_to_39[line[1]] = line[2] # 2. Save 48-39 phone map for futher use. phoneMap_48_to_39.save(os.path.join(args.expDir,"dict","phones.48_to_39.map")) # --------- Generate train dataset -------- wavs = glob.glob(os.path.join(args.timitRoot,trainResourceDir,"*","*",f"*.{wavFileSuffix}")) out = os.path.join(args.expDir,"data","train") generate_data(wavs,out,sphFlag,sph2pipeTool,txtFileSuffix,phoneMap_60_to_48) # --------- Generate dev and test data -------- for Name in ["dev", "test"]: spkListFile = os.path.join( info.KALDI_ROOT,"egs","timit","s5","conf",f"{Name}_spk.list" ) declare.is_file(f"speakers list for {Name}", spkListFile) # Check whether or not it existed with open(spkListFile,"r",encoding="utf-8") as fr: spkList = fr.readlines() wavs = [] for spk in spkList: spk = spk.strip() if len(spk) == 0: continue if uppercaseFlag: spk = spk.upper() wavs.extend(glob.glob(os.path.join(args.timitRoot,testResourceDir,"*",spk,f"*.{wavFileSuffix}"))) out = os.path.join(args.expDir,"data",Name) generate_data(wavs,out,sphFlag,sph2pipeTool,txtFileSuffix,phoneMap_60_to_48)
def prepare_DNN_data(): print("Start to prepare data for DNN training") assert os.path.isdir(f"{args.expDir}/train_sat" ), "Please run previous programs up to SAT training." # Lexicons and Gmm-Hmm model lexicons = exkaldi.load_lex(f"{args.expDir}/dict/lexicons.lex") hmm = f"{args.expDir}/train_sat/final.mdl" tree = f"{args.expDir}/train_sat/tree" for Name in ["train", "dev", "test"]: exkaldi.utils.make_dependent_dirs( f"{args.expDir}/train_dnn/data/{Name}", pathIsFile=False) # Make LDA feature print(f"Make LDA feature for '{Name}'") feat = exkaldi.load_feat(f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark") feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice) feat = exkaldi.transform_feat( feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat") # Compile the aligning graph print(f"Compile aligning graph") transInt = exkaldi.hmm.transcription_to_int( transcription=f"{args.expDir}/data/{Name}/text", symbolTable=lexicons("words"), unkSymbol=lexicons("oov"), ) graphFile = exkaldi.decode.wfst.compile_align_graph( hmm, tree, transcription=transInt, LFile=f"{args.expDir}/dict/L.fst", outFile=f"{args.expDir}/train_dnn/data/{Name}/align_graph", lexicons=lexicons, ) # Align first time print(f"Align the first time") ali = exkaldi.decode.wfst.gmm_align( hmm, feat, alignGraphFile=graphFile, lexicons=lexicons, ) # Estimate transform matrix print(f"Estimate fMLLR transform matrix") fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix( aliOrLat=ali, lexicons=lexicons, aliHmm=hmm, feat=feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt", ) fmllrTransMat.save(f"{args.expDir}/train_dnn/data/{Name}/trans.ark") # Transform feature print(f"Transform feature") feat = exkaldi.use_fmllr( feat, fmllrTransMat, utt2spk=f"{args.expDir}/data/{Name}/utt2spk", ) # Align second time with new feature print(f"Align the second time") ali = exkaldi.decode.wfst.gmm_align( hmm, feat, alignGraphFile=graphFile, lexicons=lexicons, ) # Save alignment and feature print(f"Save final fmllr feature and alignment") feat.save(f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark") ali.save(f"{args.expDir}/train_dnn/data/{Name}/ali") # Transform alignment print(f"Generate pdf ID and phone ID alignment") ali.to_numpy( aliType="pdfID", hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/pdfID.npy") ali.to_numpy( aliType="phoneID", hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/phoneID.npy") del ali # Compute cmvn for fmllr feature print(f"Compute the CMVN for fmllr feature") cmvn = exkaldi.compute_cmvn_stats( feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt") cmvn.save(f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark") del cmvn del feat # copy spk2utt utt2spk and text file shutil.copyfile(f"{args.expDir}/data/{Name}/spk2utt", f"{args.expDir}/train_dnn/data/{Name}/spk2utt") shutil.copyfile(f"{args.expDir}/data/{Name}/utt2spk", f"{args.expDir}/train_dnn/data/{Name}/utt2spk") shutil.copyfile(f"{args.expDir}/data/{Name}/text", f"{args.expDir}/train_dnn/data/{Name}/text") transInt.save(f"{args.expDir}/data/{Name}/text.int") print("Write feature and alignment dim information") dims = exkaldi.ListTable() feat = exkaldi.load_feat(f"{args.expDir}/train_dnn/data/test/fmllr.ark") dims["fmllr"] = feat.dim del feat hmm = exkaldi.hmm.load_hmm(f"{args.expDir}/train_sat/final.mdl") dims["phones"] = hmm.info.phones + 1 dims["pdfs"] = hmm.info.pdfs del hmm dims.save(f"{args.expDir}/train_dnn/data/dims")
def prepare_LSTM_data(): print("Start to prepare data for LSTM training") declare.is_dir(f"{args.expDir}/train_dnn/prob", debug="Please run previous programs up to DNN training.") # Lexicons and Gmm-Hmm model lexicons = exkaldi.load_lex( f"{args.expDir}/dict/lexicons.lex" ) hmm = f"{args.expDir}/train_sat/final.mdl" tree = f"{args.expDir}/train_sat/tree" for Name in ["train", "dev", "test"]: exkaldi.utils.make_dependent_dirs(f"{args.expDir}/train_lstm/data/{Name}", pathIsFile=False) # Load feature print(f"Make LDA feature for '{Name}'") feat = exkaldi.load_feat( f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark" ) feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice) feat = exkaldi.transform_feat(feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat" ) # Load probability for aligning( File has a large size, so we use index table. ) prob = exkaldi.load_index_table( f"{args.expDir}/train_dnn/prob/{Name}.ark" ) # Compile a aligning graph print(f"Copy aligning graph from DNN resources") shutil.copyfile( f"{args.expDir}/train_dnn/data/{Name}/align_graph", f"{args.expDir}/train_lstm/data/{Name}/align_graph" ) # Align print("Align") ali = exkaldi.decode.wfst.nn_align( hmm, prob, alignGraphFile=f"{args.expDir}/train_lstm/data/{Name}/align_graph", lexicons=lexicons, outFile=f"{args.expDir}/train_lstm/data/{Name}/ali", ) # Estimate transform matrix print("Estimate transform matrix") fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix( aliOrLat=ali, lexicons=lexicons, aliHmm=hmm, feat=feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt", outFile=f"{args.expDir}/train_lstm/data/{Name}/trans.ark", ) # Transform feature print("Transform matrix") feat = exkaldi.use_fmllr( feat, fmllrTransMat, utt2spk=f"{args.expDir}/data/{Name}/utt2spk", outFile=f"{args.expDir}/train_lstm/data/{Name}/fmllr.ark", ) # Transform alignment (Because 'ali' is a index table object, we need fetch the alignment data in order to use the 'to_numpy' method.) ali = ali.fetch(arkType="ali") ali.to_numpy(aliType="pdfID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/pdfID.npy" ) ali.to_numpy(aliType="phoneID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/phoneID.npy" ) del ali # Compute cmvn for fmllr feature cmvn = exkaldi.compute_cmvn_stats( feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt", outFile=f"{args.expDir}/train_lstm/data/{Name}/cmvn_of_fmllr.ark", ) del cmvn del feat # copy spk2utt utt2spk and text file shutil.copyfile( f"{args.expDir}/data/{Name}/spk2utt", f"{args.expDir}/train_lstm/data/{Name}/spk2utt") shutil.copyfile( f"{args.expDir}/data/{Name}/utt2spk", f"{args.expDir}/train_lstm/data/{Name}/utt2spk") shutil.copyfile( f"{args.expDir}/data/{Name}/text", f"{args.expDir}/train_lstm/data/{Name}/text" ) print("Write feature and alignment dim information") dims = exkaldi.ListTable() feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" ) dims["fmllr"] = feat.dim del feat hmm = exkaldi.hmm.load_hmm( f"{args.expDir}/train_sat/final.mdl" ) dims["phones"] = hmm.info.phones + 1 dims["pdfs"] = hmm.info.pdfs del hmm dims.save( f"{args.expDir}/train_lstm/data/dims" )
def GMM_decode_fmllr_and_score(outDir, hmm, HCLGfile, tansformMatFile=None): exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False) lexicons = exkaldi.decode.graph.load_lex( os.path.join("exp", "dict", "lexicons.lex")) print(f"Load test feature.") featFile = os.path.join("exp", "mfcc", "test", "mfcc_cmvn.ark") feat = exkaldi.load_feat(featFile) if tansformMatFile is None: print("Feature type is delta") feat = feat.add_delta(order=2) print("Add 2-order deltas.") else: print("Feature type is lda+mllt") feat = feat.splice(left=3, right=3) feat = exkaldi.transform_feat(feat, tansformMatFile) print("Transform feature") ## 1. Estimate the primary transform matrix from alignment or lattice. ## We estimate it from lattice, so we decode it firstly. print("Decode the first time with original feature.") preLat = exkaldi.decode.wfst.gmm_decode( feat, hmm, HCLGfile, wordSymbolTable=lexicons("words"), beam=10, latBeam=6, acwt=0.083333, maxActive=2000, ) preLat.save(os.path.join(outDir, "test_premary.lat")) print("Estimate the primary fMLLR transform matrix.") preTransMatrix = exkaldi.hmm.estimate_fMLLR_matrix( aliOrLat=preLat, lexicons=lexicons, aliHmm=hmm, feat=feat, adaHmm=None, silenceWeight=0.01, acwt=0.083333, spk2utt=os.path.join("exp", "data", "test", "spk2utt"), ) del preLat ## 2. Transform feature. We will use new feature to estimate the secondary transform matrix from lattice. print("Transform feature with primary matrix.") fmllrFeat = exkaldi.use_fmllr( feat, preTransMatrix, utt2spkFile=os.path.join("exp", "data", "test", "utt2spk"), ) print("Decode the second time with primary fmllr feature.") secLat = exkaldi.decode.wfst.gmm_decode( fmllrFeat, hmm, HCLGfile, wordSymbolTable=lexicons("words"), beam=13, latBeam=6, acwt=0.083333, maxActive=7000, config={"--determinize-lattice": "false"}, ) print("Determinize secondary lattice.") thiLat = secLat.determinize(acwt=0.083333, beam=4) print("Estimate the secondary fMLLR transform matrix.") secTransMatrix = exkaldi.hmm.estimate_fMLLR_matrix( aliOrLat=thiLat, lexicons=lexicons, aliHmm=hmm, feat=fmllrFeat, adaHmm=None, silenceWeight=0.01, acwt=0.083333, spk2utt=os.path.join("exp", "data", "test", "spk2utt"), ) del fmllrFeat del thiLat ## 3. Compose the primary matrix and secondary matrix and get the final transform matrix. print("Compose the primary and secondary transform matrix.") finalTransMatrix = exkaldi.hmm.compose_transform_matrixs( matA=preTransMatrix, matB=secTransMatrix, bIsAffine=True, ) finalTransMatrix.save(os.path.join(outDir, "trans.ark")) print("Transform feature with final matrix.") ## 4. Transform feature with the final transform matrix and use it to decode. ## We directly use the lattice generated in the second step. The final lattice is obtained. finalFmllrFeat = exkaldi.use_fmllr( feat, finalTransMatrix, utt2spkFile=os.path.join("exp", "data", "test", "utt2spk"), ) del finalTransMatrix print("Rescore secondary lattice.") lat = secLat.am_rescore( hmm=hmm, feat=finalFmllrFeat, ) print("Determinize secondary lattice.") lat = lat.determinize(acwt=0.083333, beam=6) lat.save(os.path.join(outDir, "test.lat")) print("Generate lattice done.") phoneMapFile = os.path.join("exp", "dict", "phones.48_to_39.map") phoneMap = exkaldi.ListTable(name="48-39").load(phoneMapFile) refText = exkaldi.load_trans(os.path.join("exp", "data", "test", "text")).convert(phoneMap, None) refText.save(os.path.join(outDir, "ref.txt")) print("Generate reference text done.") print("Now score:") bestWER = (1000, 0, 0) bestResult = None for penalty in [0., 0.5, 1.0]: for LMWT in range(1, 11): # Add penalty newLat = lat.add_penalty(penalty) # Get 1-best result (word-level) result = newLat.get_1best(lexicons("words"), hmm, lmwt=LMWT, acwt=1) # Transform from int value format to text format result = exkaldi.hmm.transcription_from_int( result, lexicons("words")) # Transform 48-phones to 39-phones result = result.convert(phoneMap, None) # Compute WER score = exkaldi.decode.score.wer(ref=refText, hyp=result, mode="present") if score.WER < bestWER[0]: bestResult = result bestWER = (score.WER, penalty, LMWT) print(f"Penalty: {penalty}, LMWT: {LMWT}, WER: {score.WER}%") print("Score done. Save the best result.") bestResult.save(os.path.join(outDir, "hyp.txt")) with open(os.path.join(outDir, "best_WER"), "w") as fw: fw.write(f"WER {bestWER[0]}, penalty {bestWER[1]}, LMWT {bestWER[2]}")
def GMM_decode_mfcc_and_score(outDir, hmm, HCLGfile, tansformMatFile=None): exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False) lexicons = exkaldi.decode.graph.load_lex( os.path.join("exp", "dict", "lexicons.lex")) print(f"Load test feature.") featFile = os.path.join("exp", "mfcc", "test", "mfcc_cmvn.ark") feat = exkaldi.load_feat(featFile) if tansformMatFile is None: print("Feature type is delta") feat = feat.add_delta(order=2) print("Add 2-order deltas.") else: print("Feature type is lda+mllt") feat = feat.splice(left=3, right=3) feat = exkaldi.transform_feat(feat, tansformMatFile) print("Transform feature") print("Start to decode") lat = exkaldi.decode.wfst.gmm_decode(feat, hmm, HCLGfile, wordSymbolTable=lexicons("words"), beam=13, latBeam=6, acwt=0.083333) lat.save(os.path.join(outDir, "test.lat")) print(f"Generate lattice done.") phoneMapFile = os.path.join("exp", "dict", "phones.48_to_39.map") phoneMap = exkaldi.ListTable(name="48-39").load(phoneMapFile) refText = exkaldi.load_trans(os.path.join("exp", "data", "test", "text")).convert(phoneMap, None) refText.save(os.path.join(outDir, "ref.txt")) print("Generate reference text done.") print("Now score:") bestWER = (1000, 0, 0) bestResult = None for penalty in [0., 0.5, 1.0]: for LMWT in range(1, 11): # Add penalty newLat = lat.add_penalty(penalty) # Get 1-best result (word-level) result = newLat.get_1best(lexicons("words"), hmm, lmwt=LMWT, acwt=1) # Transform from int value format to text format result = exkaldi.hmm.transcription_from_int( result, lexicons("words")) # Transform 48-phones to 39-phones result = result.convert(phoneMap, None) # Compute WER score = exkaldi.decode.score.wer(ref=refText, hyp=result, mode="present") if score.WER < bestWER[0]: bestResult = result bestWER = (score.WER, penalty, LMWT) print(f"Penalty: {penalty}, LMWT: {LMWT}, WER: {score.WER}%") print("Score done. Save the best result.") bestResult.save(os.path.join(outDir, "hyp.txt")) with open(os.path.join(outDir, "best_WER"), "w") as fw: fw.write(f"WER {bestWER[0]}, penalty {bestWER[1]}, LMWT {bestWER[2]}")
def prepare_data(): dataOutDir = os.path.join("exp", "data") exkaldi.utils.make_dependent_dirs(dataOutDir, pathIsFile=False) # Prepare tools ExkaldiInfo.vertify_kaldi_existed() sph2pipeTool = os.path.join(ExkaldiInfo.KALDI_ROOT, "tools", "sph2pipe_v2.5", "sph2pipe") if not os.path.join(sph2pipeTool): raise Exception(f"Expected sph2pipe tool existed.") # Check TIMIT data format if not os.path.isdir(timitRoot): raise Exception(f"No such directory: {timitRoot}.") dirNames = os.listdir(timitRoot) if "TRAIN" in dirNames and "TEST" in dirNames: uppercaseFlag = True trainResourceDir = "TRAIN" testResourceDir = "TEST" testWavFile = os.path.join(timitRoot, "TRAIN", "DR1", "FCJF0", "SA1.WAV") wavFileSuffix = "WAV" txtFileSuffix = "PHN" elif "train" in dirNames and "test" in dirNames: uppercaseFlag = False trainResourceDir = "train" testResourceDir = "test" testWavFile = os.path.join(timitRoot, "train", "dr1", "fcjf0", "sa1.wav") wavFileSuffix = "wav" txtFileSuffix = "phn" else: raise Exception(f"Wrong format of train or test data directories.") formatCheckCmd = f"{sph2pipeTool} -f wav {testWavFile}" out, err, cod = exkaldi.utils.run_shell_command(formatCheckCmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if cod == 0: sphFlag = True else: sphFlag = False # Transform phones from 60 categories to 48 catagories and generate the 48 to 39 transform dictionary phoneMap_60_to_48 = exkaldi.ListTable(name="69-48") phoneMap_48_to_39 = exkaldi.ListTable(name="48-39") with open(os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit", "s5", "conf", "phones.60-48-39.map"), "r", encoding="utf-8") as fr: lines = fr.readlines() for line in lines: line = line.strip().split() if len(line) < 3: continue phoneMap_60_to_48[line[0]] = line[1] phoneMap_48_to_39[line[1]] = line[2] phoneMap_48_to_39.save(os.path.join("exp", "dict", "phones.48_to_39.map")) # Design a a function to generate wav.scp, spk2utt, utt2spk, text files. def generate_data(wavFiles, outDir): wavScp = exkaldi.ListTable(name="wavScp") utt2spk = exkaldi.ListTable(name="utt2spk") spk2utt = exkaldi.ListTable(name="spk2utt") transcription = exkaldi.ListTable(name="trans") for Name in wavFiles: if Name[-7:].upper() in [ "SA1.WAV", "SA2.WAV", "sa1.wav", "sa2.wav" ]: continue speaker = os.path.basename(os.path.dirname(Name)) uttID = speaker + "_" + os.path.basename(Name)[0:-4] wavFilePath = os.path.abspath(Name) # wav.scp if sphFlag: wavScp[uttID] = f"{sph2pipeTool} -f wav {wavFilePath} |" else: wavScp[uttID] = wavFilePath # utt2spk utt2spk[uttID] = speaker # spk2utt if speaker not in spk2utt.keys(): spk2utt[speaker] = f"{uttID}" else: spk2utt[speaker] += f" {uttID}" # transcription txtFile = Name[:-3] + txtFileSuffix phones = [] with open(txtFile, "r", encoding="utf-8") as fr: lines = fr.readlines() for line in lines: line = line.strip() if len(line) == 0: continue phone = line.split()[-1] if phone == "q": continue else: phone = phoneMap_60_to_48[phone] phones.append(phone) transcription[uttID] = " ".join(phones) # Save to files wavScp.save(os.path.join(outDir, "wav.scp")) utt2spk.save(os.path.join(outDir, "utt2spk")) spk2utt.save(os.path.join(outDir, "spk2utt")) transcription.save(os.path.join(outDir, "text")) print(f"Generate data done: {outDir}.") # generate train data wavFiles = glob.glob( os.path.join(timitRoot, trainResourceDir, "*", "*", f"*.{wavFileSuffix}")) generate_data( wavFiles=wavFiles, outDir=os.path.join(dataOutDir, "train"), ) # generate dev and test data. for Name in ["dev", "test"]: spkListFile = os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit", "s5", "conf", f"{Name}_spk.list") with open(spkListFile, "r", encoding="utf-8") as fr: spkList = fr.readlines() wavFiles = [] for spk in spkList: spk = spk.strip() if len(spk) == 0: continue if uppercaseFlag: spk = spk.upper() wavFiles.extend( glob.glob( os.path.join(timitRoot, testResourceDir, "*", spk, f"*.{wavFileSuffix}"))) generate_data( wavFiles=wavFiles, outDir=os.path.join(dataOutDir, Name), )