Ejemplo n.º 1
0
def generate_data(wavFiles, expDir, sphFlag, sph2pipeTool, txtFileSuffix,
                  phoneMap_60_to_48):

    wavScp = exkaldi.ListTable(name="wavScp")
    utt2spk = exkaldi.ListTable(name="utt2spk")
    spk2utt = exkaldi.ListTable(name="spk2utt")
    transcription = exkaldi.Transcription(name="trans")

    for Name in wavFiles:
        if Name[-7:].upper() in ["SA1.WAV", "SA2.WAV", "sa1.wav", "sa2.wav"]:
            continue
        speaker = os.path.basename(os.path.dirname(Name))
        uttID = speaker + "_" + os.path.basename(Name)[0:-4]
        wavFilePath = os.path.abspath(Name)
        # 1. wav.scp
        if sphFlag:
            wavScp[uttID] = f"{sph2pipeTool} -f wav {wavFilePath} |"
        else:
            wavScp[uttID] = wavFilePath
        # 2. utt2spk
        utt2spk[uttID] = speaker
        # 3. spk2utt
        if speaker not in spk2utt.keys():
            spk2utt[speaker] = f"{uttID}"
        else:
            spk2utt[speaker] += f" {uttID}"
        # 4. transcription
        txtFile = Name[:-3] + txtFileSuffix
        phones = []
        with open(txtFile, "r", encoding="utf-8") as fr:
            lines = fr.readlines()
        for line in lines:
            line = line.strip()
            if len(line) == 0:
                continue
            phone = line.split()[-1]
            if phone == "q":  # discard phone "q"
                continue
            else:
                phone = phoneMap_60_to_48[phone]
            phones.append(phone)
        transcription[uttID] = " ".join(phones)
    # Save to files
    wavScp.save(os.path.join(expDir, "wav.scp"))
    utt2spk.save(os.path.join(expDir, "utt2spk"))
    spk2utt.save(os.path.join(expDir, "spk2utt"))
    transcription.save(os.path.join(expDir, "text"))
    print(f"Generate data done: {expDir}.")
Ejemplo n.º 2
0
def main():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.discribe(
        "This program is used to make dictionaries and language model")
    # 2. Add options
    args.add(
        "--expDir",
        abbr="-e",
        dtype=str,
        default="exp",
        discription="The data resources and output path of current experiment."
    )
    args.add("--order",
             abbr="-o",
             dtype=int,
             default=6,
             minV=1,
             maxV=6,
             discription="The maximum order of N-grams language model.")
    # 3. Then start to parse arguments.
    args.parse()
    # 4. Take a backup of arguments
    args.print_args()  # print arguments to display
    argsLogFile = os.path.join(args.expDir, "conf", "make_dict_and_LM.args")
    args.save(argsLogFile)

    # ------- Make the word-pronumciation lexicon file ------
    textFile = os.path.join(args.expDir, "data", "train", "text")
    trainTrans = exkaldi.load_transcription(
        textFile)  # "trans" is an exkaldi Transcription object
    wordCount = trainTrans.count_word().sort(
    )  # accumulate all words and their frequency in the transcription

    word2pron = exkaldi.ListTable(
        dict((word, word)
             for word in wordCount.keys()))  # word to pronunciation

    pronFile = os.path.join(args.expDir, "dict", "pronunciation.txt")
    word2pron.save(pronFile)  # save it to file

    # -------  Make lexicons ------
    # 1. Generate the LexiconBank object from word-pronumciation file.
    # Depending on task, about 20 lexicons will be generated and managed by the LexiconBank.
    lexicons = exkaldi.decode.graph.lexicon_bank(pronFile,
                                                 silWords={"sil": "sil"},
                                                 unkSymbol={"sil": "sil"},
                                                 optionalSilPhone="sil",
                                                 extraQuestions=[],
                                                 positionDependent=False,
                                                 shareSilPdf=False,
                                                 extraDisambigPhoneNumbers=1,
                                                 extraDisambigWords=[])

    # 2. Add two extra questions.
    lexicons.add_extra_question(lexicons("silence_phones"))
    lexicons.add_extra_question(lexicons("nonsilence_phones"))

    # 3. Save this lexicon bank for future use.
    lexicons.save(os.path.join(args.expDir, "dict", "lexicons.lex"))
    print(f"Generate lexicon bank done.")

    # -------  Make Lexicon fst ------
    # 1. Generate the Lexicon fst
    exkaldi.decode.graph.make_L(lexicons,
                                outFile=os.path.join(args.expDir, "dict",
                                                     "L.fst"),
                                useSilprob=0.0,
                                useDisambigLexicon=False)
    print(f"Generate lexicon fst done.")
    # 1. Generate the disambig Lexicon fst
    exkaldi.decode.graph.make_L(lexicons,
                                outFile=os.path.join(args.expDir, "dict",
                                                     "L_disambig.fst"),
                                useSilprob=0.0,
                                useDisambigLexicon=True)
    print(f"Generate disambiguation lexicon fst done.")

    # -------  Make GMM-HMM topological structure for GMM-HMM ------
    exkaldi.hmm.make_topology(
        lexicons,
        outFile=os.path.join(args.expDir, "dict", "topo"),
        numNonsilStates=3,
        numSilStates=5,
    )
    print(f"Generate topo file done.")

    # -------  Train N-Grams language model ------
    # 1. Train a LM.
    # We have trained 2,3,4 grams model with both srilm and kenlm and chose the best one, which is 3-grams model back kenlm.
    # So we directly train this one.
    exkaldi.lm.train_ngrams_kenlm(
        lexicons,
        order=args.order,
        text=
        trainTrans,  # If "text" received an exkaldi Transcription object, the information of utterance IDs will be omitted automatically.
        outFile=os.path.join(args.expDir, "lm", f"{args.order}grams.arpa"),
        config={
            "--discount_fallback": True,
            "-S": "20%"
        },
    )
    print(f"Generate ARPA language model done.")

    # 2. Then test this model by compute the perplexity.
    exkaldi.lm.arpa_to_binary(
        arpaFile=os.path.join(args.expDir, "lm", f"{args.order}grams.arpa"),
        outFile=os.path.join(args.expDir, "lm", f"{args.order}grams.binary"),
    )
    model = exkaldi.load_ngrams(
        os.path.join(args.expDir, "lm", f"{args.order}grams.binary")
    )  # Actually, "load_ngrams" function also accepts ARPA format file.

    # 3. Prepare test transcription
    testTrans = exkaldi.load_transcription(
        os.path.join(args.expDir, "data", "test", "text"))

    # 4. score
    perScore = model.perplexity(testTrans)
    print(f"The weighted average perplexity of this model is: {perScore}.")
    del model
    del testTrans

    # ------- Make Grammar fst ------
    exkaldi.decode.graph.make_G(
        lexicons,
        arpaFile=os.path.join(args.expDir, "lm", f"{args.order}grams.arpa"),
        outFile=os.path.join(args.expDir, "lm", f"G.{args.order}.fst"),
        order=args.order)
    print(f"Make Grammar fst done.")

    # ------- Compose LG fst for futher use ------
    exkaldi.decode.graph.compose_LG(
        LFile=os.path.join(args.expDir, "dict", "L_disambig.fst"),
        GFile=os.path.join(args.expDir, "lm", f"G.{args.order}.fst"),
        outFile=os.path.join(args.expDir, "lm", f"LG.{args.order}.fst"),
    )
    print(f"Compose LG fst done.")
Ejemplo n.º 3
0
def main():

    # ------------- Parse arguments from command line ----------------------
    # 1. Add a discription of this program
    args.discribe("This program is used to prepare TIMIT data.") 
    # 2. Add some options
    args.add("--timitRoot", dtype=str, abbr="-t", default="/Corpus/TIMIT", discription="The root path of timit dataset.")
    args.add("--expDir", dtype=str, abbr="-e", default="exp", discription="The output path to save generated data.")
    # 3. Then start to parse arguments. 
    args.parse()
    # 4. Take a backup of arguments
    args.save( os.path.join(args.expDir,"conf","prepare_data.args") )

    # ------------- Do some preparative work ----------------------
    # 2. Ensure Kaldi has existed
    declare.kaldi_existed()
    # 3. sph2pipe tool will be used if the timit data is sph format.
    sph2pipeTool = os.path.join(info.KALDI_ROOT,"tools","sph2pipe_v2.5","sph2pipe")
    declare.is_file("sph2pipe tool",sph2pipeTool)

    # ------------- Check TIMIT data format -------------
    # 1. Get the directory name
    declare.is_dir("TIMIT root directory", args.timitRoot)
    dirNames = os.listdir(args.timitRoot)
    if "TRAIN" in dirNames and "TEST" in dirNames:
        uppercaseFlag = True
        trainResourceDir = "TRAIN"
        testResourceDir = "TEST"
        testWavFile = os.path.join(args.timitRoot,"TRAIN","DR1","FCJF0","SA1.WAV") # used to test the file format
        wavFileSuffix = "WAV"
        txtFileSuffix = "PHN"
    elif "train" in dirNames and "test" in dirNames:
        uppercaseFlag = False
        trainResourceDir = "train"
        testResourceDir = "test"
        testWavFile = os.path.join(args.timitRoot,"train","dr1","fcjf0","sa1.wav") # used to test the file format
        wavFileSuffix = "wav"
        txtFileSuffix = "phn"
    else:
        raise Exception(f"Wrong format of train or test data directories.")
    # 2. check whether wave file is sph format.
    formatCheckCmd = f"{sph2pipeTool} -f wav {testWavFile}"
    out,err,cod = exkaldi.utils.run_shell_command(formatCheckCmd, stderr="PIPE")
    if cod == 0:
        sphFlag = True
    else:
        sphFlag = False
    
    # --------- Generate phone-map dictionary --------
    # 1. Generate 60-48 catagories and 48-39 catagories mapping dictionary
    phoneMap_60_to_48 = exkaldi.ListTable(name="69-48")
    phoneMap_48_to_39 = exkaldi.ListTable(name="48-39")
    mapFile = os.path.join(info.KALDI_ROOT,"egs","timit","s5","conf","phones.60-48-39.map")
    declare.is_file("60-48-39 phone map", mapFile) # Check whether or not it existed
    with open(mapFile,"r",encoding="utf-8") as fr:
        lines = fr.readlines()
        for line in lines:
            line = line.strip().split()
            if len(line) < 3: #phone "q" will be omitted temporarily.
                continue
            phoneMap_60_to_48[line[0]] = line[1]
            phoneMap_48_to_39[line[1]] = line[2]
    # 2. Save 48-39 phone map for futher use.
    phoneMap_48_to_39.save(os.path.join(args.expDir,"dict","phones.48_to_39.map"))

    # --------- Generate train dataset --------
    wavs = glob.glob(os.path.join(args.timitRoot,trainResourceDir,"*","*",f"*.{wavFileSuffix}"))
    out = os.path.join(args.expDir,"data","train")
    generate_data(wavs,out,sphFlag,sph2pipeTool,txtFileSuffix,phoneMap_60_to_48)

    # --------- Generate dev and test data --------
    for Name in ["dev", "test"]:
        spkListFile = os.path.join( info.KALDI_ROOT,"egs","timit","s5","conf",f"{Name}_spk.list" )
        declare.is_file(f"speakers list for {Name}", spkListFile) # Check whether or not it existed
        with open(spkListFile,"r",encoding="utf-8") as fr:
            spkList = fr.readlines()
        wavs = []
        for spk in spkList:
            spk = spk.strip()
            if len(spk) == 0:
                continue
            if uppercaseFlag:
                spk = spk.upper()
            wavs.extend(glob.glob(os.path.join(args.timitRoot,testResourceDir,"*",spk,f"*.{wavFileSuffix}")))
        
        out = os.path.join(args.expDir,"data",Name)
        generate_data(wavs,out,sphFlag,sph2pipeTool,txtFileSuffix,phoneMap_60_to_48)
Ejemplo n.º 4
0
def prepare_DNN_data():

    print("Start to prepare data for DNN training")
    assert os.path.isdir(f"{args.expDir}/train_sat"
                         ), "Please run previous programs up to SAT training."

    # Lexicons and Gmm-Hmm model
    lexicons = exkaldi.load_lex(f"{args.expDir}/dict/lexicons.lex")
    hmm = f"{args.expDir}/train_sat/final.mdl"
    tree = f"{args.expDir}/train_sat/tree"

    for Name in ["train", "dev", "test"]:

        exkaldi.utils.make_dependent_dirs(
            f"{args.expDir}/train_dnn/data/{Name}", pathIsFile=False)
        # Make LDA feature
        print(f"Make LDA feature for '{Name}'")
        feat = exkaldi.load_feat(f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark")
        feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice)
        feat = exkaldi.transform_feat(
            feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat")
        # Compile the aligning graph
        print(f"Compile aligning graph")
        transInt = exkaldi.hmm.transcription_to_int(
            transcription=f"{args.expDir}/data/{Name}/text",
            symbolTable=lexicons("words"),
            unkSymbol=lexicons("oov"),
        )
        graphFile = exkaldi.decode.wfst.compile_align_graph(
            hmm,
            tree,
            transcription=transInt,
            LFile=f"{args.expDir}/dict/L.fst",
            outFile=f"{args.expDir}/train_dnn/data/{Name}/align_graph",
            lexicons=lexicons,
        )
        # Align first time
        print(f"Align the first time")
        ali = exkaldi.decode.wfst.gmm_align(
            hmm,
            feat,
            alignGraphFile=graphFile,
            lexicons=lexicons,
        )
        # Estimate transform matrix
        print(f"Estimate fMLLR transform matrix")
        fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix(
            aliOrLat=ali,
            lexicons=lexicons,
            aliHmm=hmm,
            feat=feat,
            spk2utt=f"{args.expDir}/data/{Name}/spk2utt",
        )
        fmllrTransMat.save(f"{args.expDir}/train_dnn/data/{Name}/trans.ark")
        # Transform feature
        print(f"Transform feature")
        feat = exkaldi.use_fmllr(
            feat,
            fmllrTransMat,
            utt2spk=f"{args.expDir}/data/{Name}/utt2spk",
        )
        # Align second time with new feature
        print(f"Align the second time")
        ali = exkaldi.decode.wfst.gmm_align(
            hmm,
            feat,
            alignGraphFile=graphFile,
            lexicons=lexicons,
        )
        # Save alignment and feature
        print(f"Save final fmllr feature and alignment")
        feat.save(f"{args.expDir}/train_dnn/data/{Name}/fmllr.ark")
        ali.save(f"{args.expDir}/train_dnn/data/{Name}/ali")
        # Transform alignment
        print(f"Generate pdf ID and phone ID alignment")
        ali.to_numpy(
            aliType="pdfID",
            hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/pdfID.npy")
        ali.to_numpy(
            aliType="phoneID",
            hmm=hmm).save(f"{args.expDir}/train_dnn/data/{Name}/phoneID.npy")
        del ali
        # Compute cmvn for fmllr feature
        print(f"Compute the CMVN for fmllr feature")
        cmvn = exkaldi.compute_cmvn_stats(
            feat, spk2utt=f"{args.expDir}/data/{Name}/spk2utt")
        cmvn.save(f"{args.expDir}/train_dnn/data/{Name}/cmvn_of_fmllr.ark")
        del cmvn
        del feat
        # copy spk2utt utt2spk and text file
        shutil.copyfile(f"{args.expDir}/data/{Name}/spk2utt",
                        f"{args.expDir}/train_dnn/data/{Name}/spk2utt")
        shutil.copyfile(f"{args.expDir}/data/{Name}/utt2spk",
                        f"{args.expDir}/train_dnn/data/{Name}/utt2spk")
        shutil.copyfile(f"{args.expDir}/data/{Name}/text",
                        f"{args.expDir}/train_dnn/data/{Name}/text")
        transInt.save(f"{args.expDir}/data/{Name}/text.int")

    print("Write feature and alignment dim information")
    dims = exkaldi.ListTable()
    feat = exkaldi.load_feat(f"{args.expDir}/train_dnn/data/test/fmllr.ark")
    dims["fmllr"] = feat.dim
    del feat
    hmm = exkaldi.hmm.load_hmm(f"{args.expDir}/train_sat/final.mdl")
    dims["phones"] = hmm.info.phones + 1
    dims["pdfs"] = hmm.info.pdfs
    del hmm
    dims.save(f"{args.expDir}/train_dnn/data/dims")
Ejemplo n.º 5
0
def prepare_LSTM_data():

  print("Start to prepare data for LSTM training")
  declare.is_dir(f"{args.expDir}/train_dnn/prob", debug="Please run previous programs up to DNN training.")

  # Lexicons and Gmm-Hmm model
  lexicons = exkaldi.load_lex( f"{args.expDir}/dict/lexicons.lex" )
  hmm = f"{args.expDir}/train_sat/final.mdl"
  tree = f"{args.expDir}/train_sat/tree"

  for Name in ["train", "dev", "test"]:
    exkaldi.utils.make_dependent_dirs(f"{args.expDir}/train_lstm/data/{Name}", pathIsFile=False)
    # Load feature
    print(f"Make LDA feature for '{Name}'")
    feat = exkaldi.load_feat( f"{args.expDir}/mfcc/{Name}/mfcc_cmvn.ark" )
    feat = feat.splice(left=args.LDAsplice, right=args.LDAsplice)
    feat = exkaldi.transform_feat(feat, matFile=f"{args.expDir}/train_lda_mllt/trans.mat" )
    # Load probability for aligning( File has a large size, so we use index table. )
    prob = exkaldi.load_index_table( f"{args.expDir}/train_dnn/prob/{Name}.ark" )
    # Compile a aligning graph
    print(f"Copy aligning graph from DNN resources")
    shutil.copyfile( f"{args.expDir}/train_dnn/data/{Name}/align_graph",
                    f"{args.expDir}/train_lstm/data/{Name}/align_graph"
                  )
    # Align
    print("Align")
    ali = exkaldi.decode.wfst.nn_align(
                                    hmm,
                                    prob,
                                    alignGraphFile=f"{args.expDir}/train_lstm/data/{Name}/align_graph", 
                                    lexicons=lexicons,
                                    outFile=f"{args.expDir}/train_lstm/data/{Name}/ali",
                                )
    # Estimate transform matrix
    print("Estimate transform matrix")
    fmllrTransMat = exkaldi.hmm.estimate_fMLLR_matrix(
                                aliOrLat=ali,
                                lexicons=lexicons,
                                aliHmm=hmm,
                                feat=feat,
                                spk2utt=f"{args.expDir}/data/{Name}/spk2utt",
                                outFile=f"{args.expDir}/train_lstm/data/{Name}/trans.ark",
                            )
    # Transform feature
    print("Transform matrix")
    feat = exkaldi.use_fmllr(
                        feat,
                        fmllrTransMat,
                        utt2spk=f"{args.expDir}/data/{Name}/utt2spk",
                        outFile=f"{args.expDir}/train_lstm/data/{Name}/fmllr.ark",
                    )
    # Transform alignment (Because 'ali' is a index table object, we need fetch the alignment data in order to use the 'to_numpy' method.)
    ali = ali.fetch(arkType="ali")
    ali.to_numpy(aliType="pdfID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/pdfID.npy" )
    ali.to_numpy(aliType="phoneID",hmm=hmm).save( f"{args.expDir}/train_lstm/data/{Name}/phoneID.npy" )
    del ali
    # Compute cmvn for fmllr feature
    cmvn = exkaldi.compute_cmvn_stats(
                                  feat, 
                                  spk2utt=f"{args.expDir}/data/{Name}/spk2utt",
                                  outFile=f"{args.expDir}/train_lstm/data/{Name}/cmvn_of_fmllr.ark",
                                )
    del cmvn
    del feat
    # copy spk2utt utt2spk and text file
    shutil.copyfile( f"{args.expDir}/data/{Name}/spk2utt", f"{args.expDir}/train_lstm/data/{Name}/spk2utt")
    shutil.copyfile( f"{args.expDir}/data/{Name}/utt2spk", f"{args.expDir}/train_lstm/data/{Name}/utt2spk")
    shutil.copyfile( f"{args.expDir}/data/{Name}/text", f"{args.expDir}/train_lstm/data/{Name}/text" )

  print("Write feature and alignment dim information")
  dims = exkaldi.ListTable()
  feat = exkaldi.load_feat( f"{args.expDir}/train_lstm/data/test/fmllr.ark" ) 
  dims["fmllr"] = feat.dim
  del feat
  hmm = exkaldi.hmm.load_hmm( f"{args.expDir}/train_sat/final.mdl" )
  dims["phones"] = hmm.info.phones + 1
  dims["pdfs"] = hmm.info.pdfs
  del hmm
  dims.save( f"{args.expDir}/train_lstm/data/dims" )
Ejemplo n.º 6
0
def GMM_decode_fmllr_and_score(outDir, hmm, HCLGfile, tansformMatFile=None):

    exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False)

    lexicons = exkaldi.decode.graph.load_lex(
        os.path.join("exp", "dict", "lexicons.lex"))
    print(f"Load test feature.")
    featFile = os.path.join("exp", "mfcc", "test", "mfcc_cmvn.ark")
    feat = exkaldi.load_feat(featFile)
    if tansformMatFile is None:
        print("Feature type is delta")
        feat = feat.add_delta(order=2)
        print("Add 2-order deltas.")
    else:
        print("Feature type is lda+mllt")
        feat = feat.splice(left=3, right=3)
        feat = exkaldi.transform_feat(feat, tansformMatFile)
        print("Transform feature")

    ## 1. Estimate the primary transform matrix from alignment or lattice.
    ## We estimate it from lattice, so we decode it firstly.
    print("Decode the first time with original feature.")
    preLat = exkaldi.decode.wfst.gmm_decode(
        feat,
        hmm,
        HCLGfile,
        wordSymbolTable=lexicons("words"),
        beam=10,
        latBeam=6,
        acwt=0.083333,
        maxActive=2000,
    )
    preLat.save(os.path.join(outDir, "test_premary.lat"))

    print("Estimate the primary fMLLR transform matrix.")
    preTransMatrix = exkaldi.hmm.estimate_fMLLR_matrix(
        aliOrLat=preLat,
        lexicons=lexicons,
        aliHmm=hmm,
        feat=feat,
        adaHmm=None,
        silenceWeight=0.01,
        acwt=0.083333,
        spk2utt=os.path.join("exp", "data", "test", "spk2utt"),
    )
    del preLat
    ## 2. Transform feature. We will use new feature to estimate the secondary transform matrix from lattice.
    print("Transform feature with primary matrix.")
    fmllrFeat = exkaldi.use_fmllr(
        feat,
        preTransMatrix,
        utt2spkFile=os.path.join("exp", "data", "test", "utt2spk"),
    )
    print("Decode the second time with primary fmllr feature.")
    secLat = exkaldi.decode.wfst.gmm_decode(
        fmllrFeat,
        hmm,
        HCLGfile,
        wordSymbolTable=lexicons("words"),
        beam=13,
        latBeam=6,
        acwt=0.083333,
        maxActive=7000,
        config={"--determinize-lattice": "false"},
    )
    print("Determinize secondary lattice.")
    thiLat = secLat.determinize(acwt=0.083333, beam=4)
    print("Estimate the secondary fMLLR transform matrix.")
    secTransMatrix = exkaldi.hmm.estimate_fMLLR_matrix(
        aliOrLat=thiLat,
        lexicons=lexicons,
        aliHmm=hmm,
        feat=fmllrFeat,
        adaHmm=None,
        silenceWeight=0.01,
        acwt=0.083333,
        spk2utt=os.path.join("exp", "data", "test", "spk2utt"),
    )
    del fmllrFeat
    del thiLat
    ## 3. Compose the primary matrix and secondary matrix and get the final transform matrix.
    print("Compose the primary and secondary transform matrix.")
    finalTransMatrix = exkaldi.hmm.compose_transform_matrixs(
        matA=preTransMatrix,
        matB=secTransMatrix,
        bIsAffine=True,
    )
    finalTransMatrix.save(os.path.join(outDir, "trans.ark"))
    print("Transform feature with final matrix.")
    ## 4. Transform feature with the final transform matrix and use it to decode.
    ## We directly use the lattice generated in the second step. The final lattice is obtained.
    finalFmllrFeat = exkaldi.use_fmllr(
        feat,
        finalTransMatrix,
        utt2spkFile=os.path.join("exp", "data", "test", "utt2spk"),
    )
    del finalTransMatrix
    print("Rescore secondary lattice.")
    lat = secLat.am_rescore(
        hmm=hmm,
        feat=finalFmllrFeat,
    )
    print("Determinize secondary lattice.")
    lat = lat.determinize(acwt=0.083333, beam=6)
    lat.save(os.path.join(outDir, "test.lat"))
    print("Generate lattice done.")

    phoneMapFile = os.path.join("exp", "dict", "phones.48_to_39.map")
    phoneMap = exkaldi.ListTable(name="48-39").load(phoneMapFile)
    refText = exkaldi.load_trans(os.path.join("exp", "data", "test",
                                              "text")).convert(phoneMap, None)
    refText.save(os.path.join(outDir, "ref.txt"))
    print("Generate reference text done.")

    print("Now score:")
    bestWER = (1000, 0, 0)
    bestResult = None
    for penalty in [0., 0.5, 1.0]:
        for LMWT in range(1, 11):
            # Add penalty
            newLat = lat.add_penalty(penalty)
            # Get 1-best result (word-level)
            result = newLat.get_1best(lexicons("words"),
                                      hmm,
                                      lmwt=LMWT,
                                      acwt=1)
            # Transform from int value format to text format
            result = exkaldi.hmm.transcription_from_int(
                result, lexicons("words"))
            # Transform 48-phones to 39-phones
            result = result.convert(phoneMap, None)
            # Compute WER
            score = exkaldi.decode.score.wer(ref=refText,
                                             hyp=result,
                                             mode="present")
            if score.WER < bestWER[0]:
                bestResult = result
                bestWER = (score.WER, penalty, LMWT)
            print(f"Penalty: {penalty}, LMWT: {LMWT}, WER: {score.WER}%")
    print("Score done. Save the best result.")
    bestResult.save(os.path.join(outDir, "hyp.txt"))
    with open(os.path.join(outDir, "best_WER"), "w") as fw:
        fw.write(f"WER {bestWER[0]}, penalty {bestWER[1]}, LMWT {bestWER[2]}")
Ejemplo n.º 7
0
def GMM_decode_mfcc_and_score(outDir, hmm, HCLGfile, tansformMatFile=None):

    exkaldi.utils.make_dependent_dirs(outDir, pathIsFile=False)

    lexicons = exkaldi.decode.graph.load_lex(
        os.path.join("exp", "dict", "lexicons.lex"))
    print(f"Load test feature.")
    featFile = os.path.join("exp", "mfcc", "test", "mfcc_cmvn.ark")
    feat = exkaldi.load_feat(featFile)
    if tansformMatFile is None:
        print("Feature type is delta")
        feat = feat.add_delta(order=2)
        print("Add 2-order deltas.")
    else:
        print("Feature type is lda+mllt")
        feat = feat.splice(left=3, right=3)
        feat = exkaldi.transform_feat(feat, tansformMatFile)
        print("Transform feature")

    print("Start to decode")
    lat = exkaldi.decode.wfst.gmm_decode(feat,
                                         hmm,
                                         HCLGfile,
                                         wordSymbolTable=lexicons("words"),
                                         beam=13,
                                         latBeam=6,
                                         acwt=0.083333)
    lat.save(os.path.join(outDir, "test.lat"))
    print(f"Generate lattice done.")

    phoneMapFile = os.path.join("exp", "dict", "phones.48_to_39.map")
    phoneMap = exkaldi.ListTable(name="48-39").load(phoneMapFile)
    refText = exkaldi.load_trans(os.path.join("exp", "data", "test",
                                              "text")).convert(phoneMap, None)
    refText.save(os.path.join(outDir, "ref.txt"))
    print("Generate reference text done.")

    print("Now score:")
    bestWER = (1000, 0, 0)
    bestResult = None
    for penalty in [0., 0.5, 1.0]:
        for LMWT in range(1, 11):
            # Add penalty
            newLat = lat.add_penalty(penalty)
            # Get 1-best result (word-level)
            result = newLat.get_1best(lexicons("words"),
                                      hmm,
                                      lmwt=LMWT,
                                      acwt=1)
            # Transform from int value format to text format
            result = exkaldi.hmm.transcription_from_int(
                result, lexicons("words"))
            # Transform 48-phones to 39-phones
            result = result.convert(phoneMap, None)
            # Compute WER
            score = exkaldi.decode.score.wer(ref=refText,
                                             hyp=result,
                                             mode="present")
            if score.WER < bestWER[0]:
                bestResult = result
                bestWER = (score.WER, penalty, LMWT)
            print(f"Penalty: {penalty}, LMWT: {LMWT}, WER: {score.WER}%")
    print("Score done. Save the best result.")
    bestResult.save(os.path.join(outDir, "hyp.txt"))
    with open(os.path.join(outDir, "best_WER"), "w") as fw:
        fw.write(f"WER {bestWER[0]}, penalty {bestWER[1]}, LMWT {bestWER[2]}")
Ejemplo n.º 8
0
def prepare_data():

    dataOutDir = os.path.join("exp", "data")
    exkaldi.utils.make_dependent_dirs(dataOutDir, pathIsFile=False)

    # Prepare tools
    ExkaldiInfo.vertify_kaldi_existed()
    sph2pipeTool = os.path.join(ExkaldiInfo.KALDI_ROOT, "tools",
                                "sph2pipe_v2.5", "sph2pipe")
    if not os.path.join(sph2pipeTool):
        raise Exception(f"Expected sph2pipe tool existed.")

    # Check TIMIT data format
    if not os.path.isdir(timitRoot):
        raise Exception(f"No such directory: {timitRoot}.")
    dirNames = os.listdir(timitRoot)
    if "TRAIN" in dirNames and "TEST" in dirNames:
        uppercaseFlag = True
        trainResourceDir = "TRAIN"
        testResourceDir = "TEST"
        testWavFile = os.path.join(timitRoot, "TRAIN", "DR1", "FCJF0",
                                   "SA1.WAV")
        wavFileSuffix = "WAV"
        txtFileSuffix = "PHN"
    elif "train" in dirNames and "test" in dirNames:
        uppercaseFlag = False
        trainResourceDir = "train"
        testResourceDir = "test"
        testWavFile = os.path.join(timitRoot, "train", "dr1", "fcjf0",
                                   "sa1.wav")
        wavFileSuffix = "wav"
        txtFileSuffix = "phn"
    else:
        raise Exception(f"Wrong format of train or test data directories.")
    formatCheckCmd = f"{sph2pipeTool}  -f wav {testWavFile}"
    out, err, cod = exkaldi.utils.run_shell_command(formatCheckCmd,
                                                    stdout=subprocess.PIPE,
                                                    stderr=subprocess.PIPE)
    if cod == 0:
        sphFlag = True
    else:
        sphFlag = False

    # Transform phones from 60 categories to 48 catagories and generate the 48 to 39 transform dictionary
    phoneMap_60_to_48 = exkaldi.ListTable(name="69-48")
    phoneMap_48_to_39 = exkaldi.ListTable(name="48-39")
    with open(os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit", "s5",
                           "conf", "phones.60-48-39.map"),
              "r",
              encoding="utf-8") as fr:
        lines = fr.readlines()
        for line in lines:
            line = line.strip().split()
            if len(line) < 3:
                continue
            phoneMap_60_to_48[line[0]] = line[1]
            phoneMap_48_to_39[line[1]] = line[2]
    phoneMap_48_to_39.save(os.path.join("exp", "dict", "phones.48_to_39.map"))

    # Design a a function to generate wav.scp, spk2utt, utt2spk, text files.
    def generate_data(wavFiles, outDir):
        wavScp = exkaldi.ListTable(name="wavScp")
        utt2spk = exkaldi.ListTable(name="utt2spk")
        spk2utt = exkaldi.ListTable(name="spk2utt")
        transcription = exkaldi.ListTable(name="trans")
        for Name in wavFiles:
            if Name[-7:].upper() in [
                    "SA1.WAV", "SA2.WAV", "sa1.wav", "sa2.wav"
            ]:
                continue
            speaker = os.path.basename(os.path.dirname(Name))
            uttID = speaker + "_" + os.path.basename(Name)[0:-4]
            wavFilePath = os.path.abspath(Name)
            # wav.scp
            if sphFlag:
                wavScp[uttID] = f"{sph2pipeTool} -f wav {wavFilePath} |"
            else:
                wavScp[uttID] = wavFilePath
            # utt2spk
            utt2spk[uttID] = speaker
            # spk2utt
            if speaker not in spk2utt.keys():
                spk2utt[speaker] = f"{uttID}"
            else:
                spk2utt[speaker] += f" {uttID}"
            # transcription
            txtFile = Name[:-3] + txtFileSuffix
            phones = []
            with open(txtFile, "r", encoding="utf-8") as fr:
                lines = fr.readlines()
            for line in lines:
                line = line.strip()
                if len(line) == 0:
                    continue
                phone = line.split()[-1]
                if phone == "q":
                    continue
                else:
                    phone = phoneMap_60_to_48[phone]
                phones.append(phone)
            transcription[uttID] = " ".join(phones)
        # Save to files
        wavScp.save(os.path.join(outDir, "wav.scp"))
        utt2spk.save(os.path.join(outDir, "utt2spk"))
        spk2utt.save(os.path.join(outDir, "spk2utt"))
        transcription.save(os.path.join(outDir, "text"))
        print(f"Generate data done: {outDir}.")

    # generate train data
    wavFiles = glob.glob(
        os.path.join(timitRoot, trainResourceDir, "*", "*",
                     f"*.{wavFileSuffix}"))
    generate_data(
        wavFiles=wavFiles,
        outDir=os.path.join(dataOutDir, "train"),
    )

    # generate dev and test data.
    for Name in ["dev", "test"]:
        spkListFile = os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit",
                                   "s5", "conf", f"{Name}_spk.list")
        with open(spkListFile, "r", encoding="utf-8") as fr:
            spkList = fr.readlines()
        wavFiles = []
        for spk in spkList:
            spk = spk.strip()
            if len(spk) == 0:
                continue
            if uppercaseFlag:
                spk = spk.upper()
            wavFiles.extend(
                glob.glob(
                    os.path.join(timitRoot, testResourceDir, "*", spk,
                                 f"*.{wavFileSuffix}")))
        generate_data(
            wavFiles=wavFiles,
            outDir=os.path.join(dataOutDir, Name),
        )