def compute_mfcc(): featOutDir = os.path.join("exp", "mfcc") exkaldi.utils.make_dependent_dirs(featOutDir, pathIsFile=False) for Name in ["train", "dev", "test"]: print(f"Compute {Name} MFCC feature.") exkaldi.utils.make_dependent_dirs(os.path.join(featOutDir, Name), pathIsFile=False) # Compute feature feat = exkaldi.compute_mfcc( wavFile=os.path.join("exp", "data", Name, "wav.scp"), config={"--use-energy": "false"}, ) feat.save(os.path.join(featOutDir, Name, "raw_mfcc.ark")) print(f"Generate raw MFCC feature done.") # Compute CMVN cmvn = exkaldi.compute_cmvn_stats( feat=feat, spk2utt=os.path.join("exp", "data", Name, "spk2utt"), ) cmvn.save(os.path.join(featOutDir, Name, "cmvn.ark")) print(f"Generate CMVN statistics done.") # Apply CMVN feat = exkaldi.use_cmvn( feat=feat, cmvn=cmvn, utt2spk=os.path.join("exp", "data", Name, "utt2spk"), ) feat.save(os.path.join(featOutDir, Name, "mfcc_cmvn.ark")) print(f"Generate MFCC feature (applied CMVN) done.") print("Compute MFCC done.")
def main(): # ------------- Parse arguments from command line ---------------------- # 1. Add a discription of this program args.describe( "This program is used to compute MFCC feature and CMVN statistics") # 2. Add options args.add("--expDir", abbr="-e", dtype=str, default="exp", discription="The data and output path of current experiment.") args.add("--useEnergy", abbr="-u", dtype=bool, default=False, discription="Whether add energy to MFCC feature.") args.add( "--parallel", abbr="-p", dtype=int, default=4, minV=1, maxV=10, discription= "The number of parallel process to compute train feature of train dataset." ) # 3. Then start to parse arguments. args.parse() # 4. Take a backup of arguments args.print_args() # print arguments to display argsLogFile = os.path.join(args.expDir, "conf", "compute_mfcc.args") args.save(argsLogFile) # ---------- Compute mfcc feature of train, dev and test dataset ----------- if args.useEnergy: mfccConfig = {"--use-energy": "true"} else: mfccConfig = {"--use-energy": "false"} for Name in ["train", "dev", "test"]: print(f"Compute {Name} MFCC feature.") # 1. compute feature if Name == "train" and args.parallel > 1: # use mutiple processes wavFiles = exkaldi.utils.split_txt_file( os.path.join(args.expDir, "data", "train", "wav.scp"), chunks=args.parallel, ) feats = exkaldi.compute_mfcc(wavFiles, config=mfccConfig, outFile=os.path.join( args.expDir, "mfcc", "train", "raw_mfcc.ark")) feat = exkaldi.merge_archives(feats) else: feat = exkaldi.compute_mfcc( os.path.join(args.expDir, "data", Name, "wav.scp"), config=mfccConfig, ) feat.save(os.path.join(args.expDir, "mfcc", Name, "raw_mfcc.ark")) print(f"Generate raw MFCC feature done.") # Compute CMVN cmvn = exkaldi.compute_cmvn_stats( feat=feat, spk2utt=os.path.join(args.expDir, "data", Name, "spk2utt"), ) cmvn.save(os.path.join(args.expDir, "mfcc", Name, "cmvn.ark")) print(f"Generate CMVN statistics done.") # Apply CMVN feat = exkaldi.use_cmvn( feat=feat, cmvn=cmvn, utt2spk=os.path.join(args.expDir, "data", Name, "utt2spk"), ) feat.save(os.path.join(args.expDir, "mfcc", Name, "mfcc_cmvn.ark")) print(f"Generate MFCC feature (applied CMVN) done.") print("Compute MFCC done.")
def prepare_DNN_data(): print("Start to prepare data for DNN training") assert os.path.isdir(f"{args.expDir}/train_sat" ), "Please run previous programs up to SAT training." # Lexicons and Gmm-Hmm model lexicons = exkaldi.load_lex(f"{args.expDir}/dict/lexicons.lex") hmm = f"{args.expDir}/train_sat/final.mdl" tree = f"{args.expDir}/train_sat/tree" for Name in ["train", "dev", "test"]: exkaldi.utils.make_dependent_dirs( f"{args.expDir}/train_dnn/data/{Name}", pathIsFile=False) # Make LDA feature print(f"Make LDA feature for '{Name}'") feat = exkaldi.load_feat(f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark") feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice) feat = exkaldi.transform_feat( feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat") # Compile the aligning graph print(f"Compile aligning graph") transInt = exkaldi.hmm.transcription_to_int( transcription=f"{args.expDir}/data/{Name}/text", symbolTable=lexicons("words"), unkSymbol=lexicons("oov"), ) graphFile = exkaldi.decode.wfst.compile_align_graph( hmm, tree, transcription=transInt, LFile=f"{args.expDir}/dict/L.fst", outFile=f"{args.expDir}/train_dnn/data/{Name}/align_graph", lexicons=lexicons, ) # Align first time print(f"Align the first time") ali = exkaldi.decode.wfst.gmm_align( hmm, feat, alignGraphFile=graphFile, lexicons=lexicons, ) # Estimate transform matrix print(f"Estimate fMLLR transform matrix") fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix( aliOrLat=ali, lexicons=lexicons, aliHmm=hmm, feat=feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt", ) fmllrTransMat.save(f"{args.expDir}/train_dnn/data/{Name}/trans.ark") # Transform feature print(f"Transform feature") feat = exkaldi.use_fmllr( feat, fmllrTransMat, utt2spk=f"{args.expDir}/data/{Name}/utt2spk", ) # Align second time with new feature print(f"Align the second time") ali = exkaldi.decode.wfst.gmm_align( hmm, feat, alignGraphFile=graphFile, lexicons=lexicons, ) # Save alignment and feature print(f"Save final fmllr feature and alignment") feat.save(f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark") ali.save(f"{args.expDir}/train_dnn/data/{Name}/ali") # Transform alignment print(f"Generate pdf ID and phone ID alignment") ali.to_numpy( aliType="pdfID", hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/pdfID.npy") ali.to_numpy( aliType="phoneID", hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/phoneID.npy") del ali # Compute cmvn for fmllr feature print(f"Compute the CMVN for fmllr feature") cmvn = exkaldi.compute_cmvn_stats( feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt") cmvn.save(f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark") del cmvn del feat # copy spk2utt utt2spk and text file shutil.copyfile(f"{args.expDir}/data/{Name}/spk2utt", f"{args.expDir}/train_dnn/data/{Name}/spk2utt") shutil.copyfile(f"{args.expDir}/data/{Name}/utt2spk", f"{args.expDir}/train_dnn/data/{Name}/utt2spk") shutil.copyfile(f"{args.expDir}/data/{Name}/text", f"{args.expDir}/train_dnn/data/{Name}/text") transInt.save(f"{args.expDir}/data/{Name}/text.int") print("Write feature and alignment dim information") dims = exkaldi.ListTable() feat = exkaldi.load_feat(f"{args.expDir}/train_dnn/data/test/fmllr.ark") dims["fmllr"] = feat.dim del feat hmm = exkaldi.hmm.load_hmm(f"{args.expDir}/train_sat/final.mdl") dims["phones"] = hmm.info.phones + 1 dims["pdfs"] = hmm.info.pdfs del hmm dims.save(f"{args.expDir}/train_dnn/data/dims")
def prepare_LSTM_data(): print("Start to prepare data for LSTM training") declare.is_dir(f"{args.expDir}/train_dnn/prob", debug="Please run previous programs up to DNN training.") # Lexicons and Gmm-Hmm model lexicons = exkaldi.load_lex( f"{args.expDir}/dict/lexicons.lex" ) hmm = f"{args.expDir}/train_sat/final.mdl" tree = f"{args.expDir}/train_sat/tree" for Name in ["train", "dev", "test"]: exkaldi.utils.make_dependent_dirs(f"{args.expDir}/train_lstm/data/{Name}", pathIsFile=False) # Load feature print(f"Make LDA feature for '{Name}'") feat = exkaldi.load_feat( f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark" ) feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice) feat = exkaldi.transform_feat(feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat" ) # Load probability for aligning( File has a large size, so we use index table. ) prob = exkaldi.load_index_table( f"{args.expDir}/train_dnn/prob/{Name}.ark" ) # Compile a aligning graph print(f"Copy aligning graph from DNN resources") shutil.copyfile( f"{args.expDir}/train_dnn/data/{Name}/align_graph", f"{args.expDir}/train_lstm/data/{Name}/align_graph" ) # Align print("Align") ali = exkaldi.decode.wfst.nn_align( hmm, prob, alignGraphFile=f"{args.expDir}/train_lstm/data/{Name}/align_graph", lexicons=lexicons, outFile=f"{args.expDir}/train_lstm/data/{Name}/ali", ) # Estimate transform matrix print("Estimate transform matrix") fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix( aliOrLat=ali, lexicons=lexicons, aliHmm=hmm, feat=feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt", outFile=f"{args.expDir}/train_lstm/data/{Name}/trans.ark", ) # Transform feature print("Transform matrix") feat = exkaldi.use_fmllr( feat, fmllrTransMat, utt2spk=f"{args.expDir}/data/{Name}/utt2spk", outFile=f"{args.expDir}/train_lstm/data/{Name}/fmllr.ark", ) # Transform alignment (Because 'ali' is a index table object, we need fetch the alignment data in order to use the 'to_numpy' method.) ali = ali.fetch(arkType="ali") ali.to_numpy(aliType="pdfID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/pdfID.npy" ) ali.to_numpy(aliType="phoneID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/phoneID.npy" ) del ali # Compute cmvn for fmllr feature cmvn = exkaldi.compute_cmvn_stats( feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt", outFile=f"{args.expDir}/train_lstm/data/{Name}/cmvn_of_fmllr.ark", ) del cmvn del feat # copy spk2utt utt2spk and text file shutil.copyfile( f"{args.expDir}/data/{Name}/spk2utt", f"{args.expDir}/train_lstm/data/{Name}/spk2utt") shutil.copyfile( f"{args.expDir}/data/{Name}/utt2spk", f"{args.expDir}/train_lstm/data/{Name}/utt2spk") shutil.copyfile( f"{args.expDir}/data/{Name}/text", f"{args.expDir}/train_lstm/data/{Name}/text" ) print("Write feature and alignment dim information") dims = exkaldi.ListTable() feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" ) dims["fmllr"] = feat.dim del feat hmm = exkaldi.hmm.load_hmm( f"{args.expDir}/train_sat/final.mdl" ) dims["phones"] = hmm.info.phones + 1 dims["pdfs"] = hmm.info.pdfs del hmm dims.save( f"{args.expDir}/train_lstm/data/dims" )