Example #1
0
	def determinize(self, acwt=1.0, beam=6):
		'''
		Determinize the lattice.

		Args:
			<acwt>: acoustic scale.
			<beam>: prune beam.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		assert isinstance(acwt, float) and acwt >= 0, "Expected <acwt> is positive float value."
		assert isinstance(beam, int) and beam >= 0, "Expected <beam> is positive int value."
		
		cmd = f"lattice-determinize-pruned --acoustic-scale={acwt} --beam={beam} ark:- ark:-"

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to determinize lattice.")
		else:
			newName = f"determinize({self.name})"
			return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)		
Example #2
0
	def add_penalty(self, penalty=0):
		'''
		Add penalty to lattice.

		Args:
			<penalty>: penalty.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice to scale.')

		assert isinstance(penalty, (int,float)) and penalty >= 0, "Expected <penalty> is positive int or float value."
		
		cmd = f"lattice-add-penalty --word-ins-penalty={penalty} ark:- ark:-"

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to add penalty.")
		else:
			newName = f"add_penalty({self.name})"
			return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
Example #3
0
def compute_postprob_norm(ali, posrProbDim):
    '''
	Compute alignment counts in order to normalize acoustic model posterior probability.
	For more help information, look at the Kaldi <analyze-counts> command.

	Args:
		<ali>: exkaldi NumpyAlignmentPhone or NumpyAlignmentPdf object.
		<posrProbDim>: the dimensionality of posterior probability.
	Return:
		A numpy array of the normalization.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(ali) in ["NumpyAlignmentPhone", "NumpyAlignmentPdf"]:
        pass
    else:
        raise UnsupportedType(
            f'Expected exkaldi AlignmentPhone or  but got a {type_name(ali)}.')

    cmd = f"analyze-counts --print-args=False --verbose=0 --binary=false --counts-dim={posrProbDim} ark:- -"
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=ali.data)
    if (isinstance(cod, int) and cod != 0) or out == b"":
        print(err.decode())
        raise KaldiProcessError('Analyze counts defailed.')
    else:
        out = out.decode().strip().strip("[]").strip().split()
        counts = np.array(out, dtype=np.int32)
        countBias = np.log(counts / np.sum(counts))
        return countBias
Example #4
0
def compute_cmvn_stats(feat, spk2utt=None, name="cmvn"):
    '''
	Compute CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<spk2utt>: spk2utt file or exkaldi ScriptTable object.
		<name>: a string.

	Return:
		A exkaldi CMVN statistics object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort("utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort("utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is a exkaldi feature object but got {type_name(feat)}."
        )

    spk2uttTemp = tempfile.NamedTemporaryFile("w+", encoding="utf-8")
    try:
        if spk2utt is None:
            cmd = 'compute-cmvn-stats ark:- ark:-'
        else:
            if isinstance(spk2utt, str):
                if not os.path.isfile(spk2utt):
                    raise WrongPath(f"No such file:{spk2utt}.")
                spk2uttSorted = ScriptTable(
                    name="spk2utt").load(spk2utt).sort()
                spk2uttSorted.save(spk2uttTemp)
            elif isinstance(spk2utt, ScriptTable):
                spk2uttSorted = spk2utt.sort()
                spk2uttSorted.save(spk2uttTemp)
            else:
                raise UnsupportedType(
                    f"<spk2utt> should be a file path or ScriptTable object but got {type_name(spk2utt)}."
                )
            spk2uttTemp.seek(0)

            cmd = f'compute-cmvn-stats --spk2utt=ark:{spk2uttTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to compute CMVN statistics.')
        else:
            return BytesCMVNStatistics(out, name, indexTable=None)
    finally:
        spk2uttTemp.close()
Example #5
0
	def am_rescore(self, hmm, feat):
		"""
		Replace the acoustic scores with new HMM-GMM model.
		"""
		'''
		Determinize the lattice.

		Args:
			<hmm>: exkaldi HMM object or file path.

		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		featTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		try:
			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["BaseHMM", "MonophoneHMM", "TriphoneHMM"]:
				hmmTemp.write(hmm.data)
				hmmTemp.seek(0)
				hmmFile = hmmTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file path or exkaldi HMM object but got: {type_name(hmm)}.")
	
			if type_name(feat) == "BytesFeature":
				feat = feat.sort(by="utt")
			elif type_name(feat) == "NumpyFeature":
				feat = feat.sort(by="utt").to_numpy()
			else:
				raise UnsupportedType(f"<feat> should be exkaldi feature object but got: {type_name(feat)}.")

			featTemp.write(feat.data)
			featTemp.seek(0)
			featFile = featTemp.name

			cmd = f"gmm-rescore-lattice	{hmmFile} ark:- ark:{featFile} ark:-"

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError("Failed to determinize lattice.")
			else:
				newName = f"am_rescore({self.name})"
				return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
		finally:
			hmmTemp.close()
			featTemp.close()
Example #6
0
def use_cmvn_sliding(feat, windowsSize=None, std=False):
    '''
	Allpy sliding CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<windowsSize>: windows size, If None, use windows size larger than the frames of feature.
		<std>: a bool value.

	Return:
		An exkaldi feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if isinstance(feat, BytesFeature):
        pass
    elif type_name(feat) == "NumpyFeature":
        feat = feat.to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is an exkaldi feature object but got {type_name(feat)}."
        )

    if windowsSize == None:
        featLen = feat.lens[1]
        maxLen = max([length for utt, length in featLen])
        windowsSize = math.ceil(maxLen / 100) * 100
    else:
        assert isinstance(windowsSize,
                          int), "Expected <windowsSize> is an int value."

    if std == True:
        std = 'true'
    else:
        std = 'false'

    cmd = f'apply-cmvn-sliding --cmn-window={windowsSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-'
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=feat.data)
    if (isinstance(cod, int) and cod != 0) or out == b'':
        print(err.decode())
        raise KaldiProcessError('Failed to use sliding CMVN.')
    else:
        newName = f"cmvn({feat.name},{windowsSize})"
        return BytesFeature(out, newName, indexTable=None)
Example #7
0
    def loadArkScpFile(fileName, suffix):
        ExkaldiInfo.vertify_kaldi_existed()

        if suffix == "ark":
            cmd = 'copy-feats ark:'
        else:
            cmd = 'copy-feats scp:'

        cmd += '{} ark:-'.format(fileName)
        out, err, cod = run_shell_command(cmd,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE)
        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Copy feat defeated.')
        else:
            #if sys.getsizeof(out) > 10000000000:
            #    print('Warning: Data is extramely large. It could not be used correctly sometimes.')
            return BytesMatrix(out)
Example #8
0
	def scale(self, acwt=1, invAcwt=1, ac2lm=0, lmwt=1, lm2ac=0):
		'''
		Scale lattice.

		Args:
			<acwt>: acoustic scale.
			<invAcwt>: inverse acoustic scale.
			<ac2lm>: acoustic to lm scale.
			<lmwt>: language lm scale.
			<lm2ac>: lm scale to acoustic.
		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice to scale.')           

		for x in [acwt, invAcwt, ac2lm, lmwt, lm2ac]:
			assert x >= 0, "Expected scale is positive value."
		
		cmd = 'lattice-scale'
		cmd += ' --acoustic-scale={}'.format(acwt)
		cmd += ' --acoustic2lm-scale={}'.format(ac2lm)
		cmd += ' --inv-acoustic-scale={}'.format(invAcwt)
		cmd += ' --lm-scale={}'.format(lmwt)
		cmd += ' --lm2acoustic-scale={}'.format(lm2ac)
		cmd += ' ark:- ark:-'

		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError("Failed to scale lattice.")
		else:
			newName = f"scale({self.name})"
			return Lattice(data=out,wordSymbolTable=self.wordSymbolTable,hmm=self.hmm,name=newName)
Example #9
0
def load_ali(target, aliType=None, name="ali", hmm=None):
    '''
	Load alignment data.

	Args:
		<target>: Python dict object, bytes object, exkaldi alignment object, kaldi alignment file or .npy file.
		<aliType>: None, or one of 'transitionID', 'phoneID', 'pdfID'. It will return different alignment object.
		<name>: a string.
		<hmm>: file path or exkaldi HMM object.
	Return:
		exkaldi alignment data objects.
	'''
    assert isinstance(
        name, str) and len(name) > 0, "Name shoud be a string avaliable."

    ExkaldiInfo.vertify_kaldi_existed()

    def transform(data, cmd):
        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=data)
        if (isinstance(cod, int) and cod != 0) and out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to transform alignment.')
        else:
            result = {}
            sp = BytesIO(out)
            for line in sp.readlines():
                line = line.decode()
                line = line.strip().split()
                utt = line[0]
                matrix = np.array(line[1:], dtype=np.int32)
                result[utt] = matrix
            return results

    if isinstance(target, dict):
        if aliType is None:
            result = NumpyAlignment(target, name)
        elif aliType == "transitionID":
            result = NumpyAlignmentTrans(target, name)
        elif aliType == "phoneID":
            result = NumpyAlignmentPhone(target, name)
        elif aliType == "pdfID":
            result = NumpyAlignmentPdf(target, name)
        else:
            raise WrongOperation(
                f"<aliType> should be None, 'transitionID', 'phoneID' or 'pdfID' but got {aliType}."
            )
        result.check_format()
        return result

    elif type_name(target) in [
            "NumpyAlignment", "NumpyAlignmentTrans", "NumpyAlignmentPhone",
            "NumpyAlignmentPdf", "BytesAlignmentTrans"
    ]:
        result = copy.deepcopy(target)
        result.rename(name)
        return result

    elif isinstance(target, str):

        allFiles = list_files(target)

        results = {
            "NumpyAlignment": NumpyAlignment(),
            "NumpyAlignmentTrans": NumpyAlignmentTrans(),
            "NumpyAlignmentPhone": NumpyAlignmentPhone(),
            "NumpyAlignmentPdf": NumpyAlignmentPdf(),
            "BytesAlignmentTrans": BytesAlignmentTrans(),
        }

        for fileName in allFiles:
            fileName = os.path.abspath(fileName)

            if fileName.endswith(".npy"):
                temp = __read_data_from_file(fileName, "npy")
                if aliType is None:
                    temp = NumpyAlignment(temp.data)
                    results["NumpyAlignment"] += temp
                elif aliType == "transitionID":
                    temp = NumpyAlignmentTrans(temp.data)
                    results["NumpyAlignmentTrans"] += temp
                elif aliType == "phoneID":
                    temp = NumpyAlignmentPhone(temp.data)
                    results["NumpyAlignmentPhone"] += temp
                elif aliType == "pdfID":
                    temp = NumpyAlignmentPdf(temp.data)
                    results["NumpyAlignmentPdf"] += temp
                else:
                    raise WrongOperation(
                        f"<aliType> should be None, 'transitionID','phoneID' or 'pdfID' but got {aliType}."
                    )

            else:
                if fileName.endswith('.gz'):
                    cmd = f'gunzip -c {fileName}'
                else:
                    cmd = f'cat {fileName}'

                if aliType is None or aliType == "transitionID":
                    out, err, cod = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)
                    if (isinstance(cod, int) and cod != 0) or out == b'':
                        print(err.decode())
                        raise ShellProcessError(
                            "Failed to get the alignment data from file.")
                    else:
                        temp = BytesAlignmentTrans(out)
                        results["BytesAlignmentTrans"] += temp

                else:
                    temp = tempfile.NamedTemporaryFile("wb+")
                    try:
                        if type_name(hmm) in ("HMM", "MonophoneHMM",
                                              "TriphoneHMM"):
                            hmm.save(temp)
                            hmmFileName = temp.name
                        elif isinstance(hmm, str):
                            if not os.path.isfile(hmm):
                                raise WrongPath(f"No such file:{hmm}.")
                            hmmFileName = hmm
                        else:
                            raise UnsupportedType(
                                f"<hmm> should be a filePath or exkaldi HMM and its sub-class object. but got {type_name(hmm)}."
                            )

                        if aliType == "phoneID":
                            cmd += f" | ali-to-phones --per-frame=true {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPhone(temp)
                            results["NumpyAlignmentPhone"] += temp

                        elif target == "pdfID":
                            cmd = f" | ali-to-pdf {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPdf(temp)
                            results["NumpyAlignmentPdf"] += temp
                        else:
                            raise WrongOperation(
                                f"<target> should be 'trainsitionID', 'phoneID' or 'pdfID' but got {target}."
                            )

                    finally:
                        temp.close()

        finalResult = []
        for obj in results.values():
            if not obj.is_void:
                obj.rename(name)
                finalResult.append(obj)

        if len(finalResult) == 0:
            raise WrongOperation(
                "<target> dose not include any data avaliable.")
        elif len(finalResult) == 1:
            finalResult = finalResult[0]

        return finalResult
Example #10
0
def use_cmvn(feat, cmvn, utt2spk=None, std=False):
    '''
	Apply CMVN statistics to feature.

	Args:
		<feat>: exkaldi feature object.
		<cmvn>: exkaldi CMVN statistics object.
		<utt2spk>: utt2spk file path or ScriptTable object.
		<std>: If true, apply std normalization.

	Return:
		A new feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort(by="utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi feature but got {type_name(feat)}.")

    if type_name(cmvn) == "BytesCMVNStatistics":
        cmvn = cmvn.sort(by="utt")
    elif type_name(cmvn) == "NumpyCMVNStatistics":
        cmvn = cmvn.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi CMVN statistics but got {type_name(cmvn)}.")

    cmvnTemp = tempfile.NamedTemporaryFile('wb+', suffix='_cmvn.ark')
    utt2spkTemp = tempfile.NamedTemporaryFile('w+',
                                              suffix="_utt2spk",
                                              encoding="utf-8")
    try:
        cmvnTemp.write(cmvn.data)
        cmvnTemp.seek(0)

        if std is True:
            stdOption = " --norm-vars true"
        else:
            stdOption = ""

        if utt2spk is None:
            cmd = f'apply-cmvn{stdOption} ark:{cmvnTemp.name} ark:- ark:-'
        else:
            if isinstance(utt2spk, str):
                if not os.path.isfile(utt2spk):
                    raise WrongPath(f"No such file:{utt2spk}.")
                utt2spkSorted = ScriptTable(
                    name="utt2spk").load(utt2spk).sort()
                utt2spkSorted.save(utt2spkTemp)
            elif isinstance(utt2spk, ScriptTable):
                utt2spkSorted = utt2spk.sort()
                utt2spkSorted.save(utt2spkTemp)
            else:
                raise UnsupportedType(
                    f"<utt2spk> should be a file path or ScriptTable object but got {type_name(utt2spk)}."
                )
            utt2spkTemp.seek(0)

            cmd = f'apply-cmvn{stdOption} --utt2spk=ark:{utt2spkTemp.name} ark:{cmvnTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to apply CMVN statistics.')
        else:
            newName = f"cmvn({feat.name},{cmvn.name})"
            if type_name(feat) == "NumpyFeature":
                return BytesFeature(out, newName, indexTable=None).to_numpy()
            else:
                return BytesFeature(out, newName, indexTable=None)
    finally:
        cmvnTemp.close()
        utt2spkTemp.close()
Example #11
0
def __compute_feature(wavFile, kaldiTool, useSuffix=None, name="feat"):

    if useSuffix != None:
        assert isinstance(useSuffix, str), "Expected <useSuffix> is a string."
        useSuffix = useSuffix.strip().lower()[-3:]
    else:
        useSuffix = ""
    assert useSuffix in ["", "scp",
                         "wav"], 'Expected <useSuffix> is "scp" or "wav".'

    ExkaldiInfo.vertify_kaldi_existed()

    wavFileTemp = tempfile.NamedTemporaryFile("w+",
                                              suffix=".scp",
                                              encoding="utf-8")
    try:
        if isinstance(wavFile, str):
            if os.path.isdir(wavFile):
                raise WrongOperation(
                    f'Expected <wavFile> is file path but got a directory:{wavFile}.'
                )
            else:
                out, err, cod = run_shell_command(f'ls {wavFile}',
                                                  stdout=subprocess.PIPE,
                                                  stderr=subprocess.PIPE)
                if out == b'':
                    raise WrongPath(f"No such file:{wavFile}.")
                else:
                    allFiles = out.decode().strip().split('\n')
        elif isinstance(wavFile, ScriptTable):
            wavFile = wavFile.sort()
            wavFile.save(wavFileTemp)
            allFiles = [
                wavFileTemp.name,
            ]
        else:
            raise UnsupportedType(
                f'Expected filename-like string but got a {type_name(wavFile)}.'
            )

        results = []
        for wavFile in allFiles:
            wavFile = os.path.abspath(wavFile)
            if wavFile[-3:].lower() == "wav":
                dirName = os.path.dirname(wavFile)
                fileName = os.path.basename(wavFile)
                uttID = "".join(fileName[0:-4].split("."))
                cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-"
            elif wavFile[-3:].lower() == 'scp':
                cmd = f"{kaldiTool} scp,p:{wavFile} ark:-"
            elif "wav" in useSuffix:
                dirName = os.path.dirname(wavFile)
                fileName = os.path.basename(wavFile)
                uttID = "".join(fileName[0:-4].split("."))
                cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-"
            elif "scp" in useSuffix:
                cmd = f"{kaldiTool} scp,p:{wavFile} ark:-"
            else:
                raise UnsupportedType(
                    'Unknown file suffix. You can declare it by making <useSuffix> "wav" or "scp".'
                )

            out, err, cod = run_shell_command(cmd,
                                              stdout=subprocess.PIPE,
                                              stderr=subprocess.PIPE)
            if (isinstance(out, int) and cod != 0) or out == b'':
                print(err.decode())
                raise KaldiProcessError(f'Failed to compute feature:{name}.')
            else:
                results.append(BytesFeature(out))
    finally:
        wavFileTemp.close()

    if len(results) == 0:
        raise WrongOperation("No any feature date in file path.")
    else:
        result = results[0]
        for i in results[1:]:
            result += i
        result.rename(name)
        return result
Example #12
0
	def get_1best(self, wordSymbolTable=None, hmm=None, lmwt=1, acwt=1.0, phoneLevel=False):
		'''
		Get 1 best result with text formation.

		Args:
			<wordSymbolTable>: None or file path or ListTable object or LexiconBank object.
			<hmm>: None or file path or HMM object.
			<lmwt>: language model weight.
			<acwt>: acoustic model weight.
			<phoneLevel>: If Ture, return phone results.
		Return:
			An exkaldi Transcription object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any data in lattice.')

		assert isinstance(lmwt, int) and lmwt >=0, "Expected <lmwt> is a non-negative int number."

		if wordSymbolTable is None:
			assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable."
			wordSymbolTable = self.wordSymbolTable
		
		if hmm is None:
			assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable."
			hmm = self.hmm

		modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		wordSymbolTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8")

		try:
			if isinstance(wordSymbolTable, str):
				assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}."
				wordsFile = wordSymbolTable
			elif type_name(wordSymbolTable) == "LexiconBank":
				if phoneLevel:
					wordSymbolTable.dump_dict("phones", wordSymbolTemp)
				else:
					wordSymbolTable.dump_dict("words", wordSymbolTemp)
				wordsFile = wordSymbolTemp.name
			elif type_name(wordSymbolTable) == "ListTable":
				wordSymbolTable.save(wordSymbolTemp)
				wordSymbolTemp.seek(0)
				wordsFile = wordSymbolTemp.name
			else:
				raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.")

			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]:
				hmm.save(modelTemp)
				hmmFile = modelTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.")

			if phoneLevel:
				cmd0 = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | '
			else:
				cmd0 = ""

			cmd1 = f"lattice-best-path --lm-scale={lmwt} --acoustic-scale={acwt} --word-symbol-table={wordsFile} --verbose=2 ark:- ark,t:- "
			cmd = cmd0 + cmd1

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)
			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError('Failed to get 1-best from lattice.')
			else:
				out = out.decode().strip().split("\n")
				if phoneLevel:
					newName = "1-best-phones"
				else:
					newName = "1-best-words"

				results = Transcription(name=newName)
				for re in out:
					re = re.strip().split(maxsplit=1)
					if len(re) == 0:
						continue
					elif len(re) == 1:
						results[re[0]] = " "
					else:
						results[re[0]] = re[1]
				return results

		finally:
			modelTemp.close()
			wordSymbolTemp.close()
Example #13
0
def gmm_decode(feat, hmm, HCLGFile, wordSymbolTable, beam=10, latBeam=8, acwt=1,
				minActive=200, maxActive=7000, maxMem=50000000, config=None, maxThreads=1):
	'''
	Decode by generating lattice from feature and GMM model.

	Args:
		<feat>: An exkaldi feature object.
		<hmm>: An exkaldi HMM object or file path.
		<HCLGFile>: HCLG file path.
		<wordSymbolTable>: words.txt file path or exkaldi LexiconBank object or exkaldi ListTable object.
		<beam>: beam size.
		<latBeam>: lattice beam size.
		<acwt>: acoustic model weight.
		<minActivate>: .
		<maxActive>: .
		<maxMem>: .
		<config>: decode configure file.
		<maxThreads>: the number of mutiple threads.
		
		Some usual options can be assigned directly. If you want use more, set <config> = your-configure, but if you do this, these usual configures we provided will be ignored.
		You can use .check_config('gmm_decode') function to get configure information you could set.
		Also run shell command "gmm-latgen-faster" to look their meaning.
	Return:
		An exkaldi Lattice object.
	''' 
	ExkaldiInfo.vertify_kaldi_existed()

	if type_name(feat) == "BytesFeature":
		pass
	elif type_name(feat) == "NumpyFeature":
		feat = feat.to_bytes()
	else:
		raise UnsupportedType(f"Expected <feat> is an exkaldi feature object but got: {type_name(feat)}.")
		
	assert isinstance(HCLGFile, str), "<HCLGFile> should be a file path."
	if not os.path.isfile(HCLGFile):
		raise WrongPath(f"No such file:{HCLGFile}")

	if maxThreads > 1:
		kaldiTool = f"gmm-latgen-faster-parallel --num-threads={maxThreads} "
	else:
		kaldiTool = "gmm-latgen-faster " 

	kaldiTool += f'--allow-partial=true '
	kaldiTool += f'--min-active={minActive} '
	kaldiTool += f'--max-active={maxActive} '  
	kaldiTool += f'--max_mem={maxMem} '
	kaldiTool += f'--beam={beam} '
	kaldiTool += f'--lattice-beam={latBeam} '
	kaldiTool += f'--acoustic-scale={acwt} '

	wordsTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8")
	modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")

	try:
		if type_name(wordSymbolTable) == "LexiconBank":
			wordSymbolTable.dump_dict("words", wordsTemp)
			wordsFile = wordsTemp.name
		elif type_name(wordSymbolTable) == "ListTable":
			wordSymbolTable.save(wordsTemp)
			wordsTemp.seek(0)
			wordsFile = wordsTemp.name
		elif isinstance(wordSymbolTable, str):
			if not os.path.isfile(wordSymbolTable):
				raise WrongPath(f"No such file:{wordSymbolTable}.")
			else:
				wordsFile = wordSymbolTable
		else:
			raise UnsupportedType(f"<wordSymbolTable> should be a file path or exkaldi LexiconBank object but got {type_name(wordSymbolTable)}.")

		kaldiTool += f'--word-symbol-table={wordsFile} '

		if config is not None:
			if check_config(name='gmm_decode', config=config):
				for key,value in config.items():
					if isinstance(value, bool):
						if value is True:
							kaldiTool += f"{key} "
					else:
						kaldiTool += f" {key}={value}"

		if type_name(hmm) in ["MonophoneHMM", "TriphoneHMM"]:
			modelTemp.write(hmm.data)
			modelTemp.seek(0)
			hmmFile = modelTemp.name
		elif isinstance(hmm, str):
			if not os.path.isfile(hmm):
				raise WrongPath(f"No such file:{hmm}.")
			else:
				hmmFile = hmm
		else:
			raise UnsupportedType(f"<hmm> should be exkaldi HMM object or file path but got {type_name(hmm)}.")
		
		cmd = f'{kaldiTool} {hmmFile} {HCLGFile} ark:- ark:-'
		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError('Failed to generate lattice.')
		else:
			newName = f"lat({feat.name})"
			return Lattice(data=out, name=newName)
	
	finally:
		wordsTemp.close()
		modelTemp.close()
Example #14
0
def wer(ref, hyp, ignore=None, mode='all'):
    '''
	Compute WER (word error rate) between <ref> and <hyp>. 

	Args:
		<ref>, <hyp>: exkaldi transcription object or file path.
		<ignore>: ignore a symbol.
		<mode>: "all" or "present".
	Return:
		a namedtuple of score information.
	'''
    assert mode in ['all',
                    'present'], 'Expected <mode> to be "present" or "all".'
    ExkaldiInfo.vertify_kaldi_existed()

    hypTemp = tempfile.NamedTemporaryFile("w+",
                                          suffix=".txt",
                                          encoding="utf-8")
    refTemp = tempfile.NamedTemporaryFile("w+",
                                          suffix=".txt",
                                          encoding="utf-8")
    try:
        if ignore is None:
            if type_name(hyp) == "Transcription":
                hyp.save(hypTemp)
                hypTemp.seek(0)
                hypFileName = hypTemp.name
            elif isinstance(hyp, str):
                if not os.path.isfile(hyp):
                    raise WrongPath(f"No such file:{hyp}.")
                else:
                    hypFileName = hyp
            else:
                raise UnsupportedType(
                    '<hyp> should be exkaldi Transcription object or file path.'
                )

            if type_name(ref) == "Transcription":
                ref.save(refTemp)
                refTemp.seek(0)
                refFileName = refTemp.name
            elif isinstance(ref, str):
                if not os.path.isfile(ref):
                    raise WrongPath(f"No such file:{ref}.")
                else:
                    refFileName = ref
            else:
                raise UnsupportedType(
                    '<ref> should be exkaldi Transcription object or file path.'
                )

            cmd = f'compute-wer --text --mode={mode} ark:{refFileName} ark,p:{hypFileName}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)
        else:
            if type_name(hyp) == "Transcription":
                hyp = hyp.save()
            elif isinstance(hyp, str):
                if not os.path.isfile(hyp):
                    raise WrongPath(f"No such file:{hyp}.")
                else:
                    with open(hyp, "r", encoding="utf-8") as fr:
                        hyp = fr.read()
            else:
                raise UnsupportedType(
                    '<hyp> should be exkaldi Transcription object or file path.'
                )

            cmd = f'sed "s/{ignore} //g" > {hypTemp.name}'
            hypOut, err, _ = run_shell_command(cmd,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE,
                                               inputs=hyp.encode())
            if len(hypOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<hyp> has wrong data formation.")

            if type_name(ref) == "Transcription":
                ref = ref.save()
            elif isinstance(ref, str):
                if not os.path.isfile(ref):
                    raise WrongPath(f"No such file:{ref}.")
                else:
                    with open(ref, "r", encoding="utf-8") as fr:
                        ref = fr.read()
            else:
                raise UnsupportedType(
                    '<ref> should be exkaldi Transcription object or file path.'
                )

            cmd = f'sed "s/{ignore} //g" > {refTemp.name}'
            refOut, err, cod = run_shell_command(cmd,
                                                 stdin=subprocess.PIPE,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.PIPE,
                                                 inputs=hyp.encode())
            if cod != 0 or len(refOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<ref> has wrong data formation.")

            cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)

    finally:
        hypTemp.close()
        refTemp.close()

    if len(scoreOut) == 0:
        print(scoreErr.decode())
        raise KaldiProcessError("Failed to compute WER.")

    else:
        out = scoreOut.decode().split("\n")
        pattern1 = '%WER (.*) \[ (.*) \/ (.*), (.*) ins, (.*) del, (.*) sub \]'
        pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]"
        pattern3 = "Scored (.*) sentences, (.*) not present in hyp."
        s1 = re.findall(pattern1, out[0])[0]
        s2 = re.findall(pattern2, out[1])[0]
        s3 = re.findall(pattern3, out[2])[0]

        return namedtuple("Score", [
            "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences",
            "wrongSentences", "missedSentences"
        ])(
            float(s1[0]),  #WER
            int(s1[2]),  #words
            int(s1[3]),  #ins
            int(s1[4]),  #del
            int(s1[5]),  #sub
            float(s2[0]),  #SER
            int(s2[1]),  #sentences
            int(s2[2]),  #wrong sentences
            int(s3[1])  #missed sentences
        )
Example #15
0
def prepare_data():

    dataOutDir = os.path.join("exp", "data")
    exkaldi.utils.make_dependent_dirs(dataOutDir, pathIsFile=False)

    # Prepare tools
    ExkaldiInfo.vertify_kaldi_existed()
    sph2pipeTool = os.path.join(ExkaldiInfo.KALDI_ROOT, "tools",
                                "sph2pipe_v2.5", "sph2pipe")
    if not os.path.join(sph2pipeTool):
        raise Exception(f"Expected sph2pipe tool existed.")

    # Check TIMIT data format
    if not os.path.isdir(timitRoot):
        raise Exception(f"No such directory: {timitRoot}.")
    dirNames = os.listdir(timitRoot)
    if "TRAIN" in dirNames and "TEST" in dirNames:
        uppercaseFlag = True
        trainResourceDir = "TRAIN"
        testResourceDir = "TEST"
        testWavFile = os.path.join(timitRoot, "TRAIN", "DR1", "FCJF0",
                                   "SA1.WAV")
        wavFileSuffix = "WAV"
        txtFileSuffix = "PHN"
    elif "train" in dirNames and "test" in dirNames:
        uppercaseFlag = False
        trainResourceDir = "train"
        testResourceDir = "test"
        testWavFile = os.path.join(timitRoot, "train", "dr1", "fcjf0",
                                   "sa1.wav")
        wavFileSuffix = "wav"
        txtFileSuffix = "phn"
    else:
        raise Exception(f"Wrong format of train or test data directories.")
    formatCheckCmd = f"{sph2pipeTool}  -f wav {testWavFile}"
    out, err, cod = exkaldi.utils.run_shell_command(formatCheckCmd,
                                                    stdout=subprocess.PIPE,
                                                    stderr=subprocess.PIPE)
    if cod == 0:
        sphFlag = True
    else:
        sphFlag = False

    # Transform phones from 60 categories to 48 catagories and generate the 48 to 39 transform dictionary
    phoneMap_60_to_48 = exkaldi.ListTable(name="69-48")
    phoneMap_48_to_39 = exkaldi.ListTable(name="48-39")
    with open(os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit", "s5",
                           "conf", "phones.60-48-39.map"),
              "r",
              encoding="utf-8") as fr:
        lines = fr.readlines()
        for line in lines:
            line = line.strip().split()
            if len(line) < 3:
                continue
            phoneMap_60_to_48[line[0]] = line[1]
            phoneMap_48_to_39[line[1]] = line[2]
    phoneMap_48_to_39.save(os.path.join("exp", "dict", "phones.48_to_39.map"))

    # Design a a function to generate wav.scp, spk2utt, utt2spk, text files.
    def generate_data(wavFiles, outDir):
        wavScp = exkaldi.ListTable(name="wavScp")
        utt2spk = exkaldi.ListTable(name="utt2spk")
        spk2utt = exkaldi.ListTable(name="spk2utt")
        transcription = exkaldi.ListTable(name="trans")
        for Name in wavFiles:
            if Name[-7:].upper() in [
                    "SA1.WAV", "SA2.WAV", "sa1.wav", "sa2.wav"
            ]:
                continue
            speaker = os.path.basename(os.path.dirname(Name))
            uttID = speaker + "_" + os.path.basename(Name)[0:-4]
            wavFilePath = os.path.abspath(Name)
            # wav.scp
            if sphFlag:
                wavScp[uttID] = f"{sph2pipeTool} -f wav {wavFilePath} |"
            else:
                wavScp[uttID] = wavFilePath
            # utt2spk
            utt2spk[uttID] = speaker
            # spk2utt
            if speaker not in spk2utt.keys():
                spk2utt[speaker] = f"{uttID}"
            else:
                spk2utt[speaker] += f" {uttID}"
            # transcription
            txtFile = Name[:-3] + txtFileSuffix
            phones = []
            with open(txtFile, "r", encoding="utf-8") as fr:
                lines = fr.readlines()
            for line in lines:
                line = line.strip()
                if len(line) == 0:
                    continue
                phone = line.split()[-1]
                if phone == "q":
                    continue
                else:
                    phone = phoneMap_60_to_48[phone]
                phones.append(phone)
            transcription[uttID] = " ".join(phones)
        # Save to files
        wavScp.save(os.path.join(outDir, "wav.scp"))
        utt2spk.save(os.path.join(outDir, "utt2spk"))
        spk2utt.save(os.path.join(outDir, "spk2utt"))
        transcription.save(os.path.join(outDir, "text"))
        print(f"Generate data done: {outDir}.")

    # generate train data
    wavFiles = glob.glob(
        os.path.join(timitRoot, trainResourceDir, "*", "*",
                     f"*.{wavFileSuffix}"))
    generate_data(
        wavFiles=wavFiles,
        outDir=os.path.join(dataOutDir, "train"),
    )

    # generate dev and test data.
    for Name in ["dev", "test"]:
        spkListFile = os.path.join(ExkaldiInfo.KALDI_ROOT, "egs", "timit",
                                   "s5", "conf", f"{Name}_spk.list")
        with open(spkListFile, "r", encoding="utf-8") as fr:
            spkList = fr.readlines()
        wavFiles = []
        for spk in spkList:
            spk = spk.strip()
            if len(spk) == 0:
                continue
            if uppercaseFlag:
                spk = spk.upper()
            wavFiles.extend(
                glob.glob(
                    os.path.join(timitRoot, testResourceDir, "*", spk,
                                 f"*.{wavFileSuffix}")))
        generate_data(
            wavFiles=wavFiles,
            outDir=os.path.join(dataOutDir, Name),
        )