Ejemplo n.º 1
0
def merge_archives(archives):
	'''
	Merge multiple archives to one.
	Particularly,exkaldi Lattice objects also support this operation.
	Do the plus operation between all archives.

	Args:
		<archives>: a list or tuple of multiple exkaldi archive objects which are the same class.
	
	Return:
		a new archive object.
	'''
	declare.is_classes("archives",archives,(list,tuple))
	declare.not_void("archives",archives)
	
	if type_name(archives[0]) != "Lattice":
		declare.belong_classes("archives",archives[0],[BytesMatrix,BytesVector,ListTable,NumpyMatrix,NumpyVector])

	result = archives[0]
	typeName = type_name(archives[0])
	names = [archives[0].name]

	for ark in archives[1:]:
		assert type_name(ark) == typeName,f"All archives needed to be merged must be the same class but got: {typeName}!={type_name(ark)}."
		result += ark
		names.append(ark.name)
	
	names = ",".join(names)
	result.rename(f"merge({names})")
	return result
Ejemplo n.º 2
0
def use_fmllr(feat, transMatrix, utt2spkFile):
    '''
	Transform feat by a transform matrix. Typically, LDA, MLLt matrixes.

	Args:
		<feat>: exkaldi feature object.
		<transFile>: exkaldi fMLLR transform matrix object.
		<utt2spkFile>: utt2spk file name.
	
	Return:
		a new exkaldi feature object.
	'''
    if type_name(feat) == "BytesFeature":
        bytesFlag = True
        feat = feat.sort(by="utt")
    elif type_name(feat) == "NumpyFeature":
        bytesFlag = False
        feat = feat.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"<feat> should exkaldi feature object but got: {type_name(feat)}."
        )

    if type_name(transMatrix) == "BytesFmllrMatrix":
        transMatrix = transMatrix.sort(by="utt")
    elif type_name(transMatrix) == "NumpyFmllrMatrix":
        transMatrix = transMatrix.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"<transMatrix> should exkaldi fMLLR transform matrix object but got: {type_name(transMatrix)}."
        )

    transTemp = tempfile.NamedTemporaryFile("wb+", suffix="_trans.ark")
    try:
        transTemp.write(transMatrix.data)
        transTemp.seek(0)

        cmd = f'transform-feats --utt2spk=ark:{utt2spkFile} ark:{transTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if cod != 0:
            print(err.decode())
            raise KaldiProcessError(
                "Failed to transform feature to fMLLR feature.")
        else:
            newName = f"fmllr({feat.name})"
            newFeat = BytesFeature(out, name=newName)
            if bytesFlag:
                return newFeat
            else:
                return newFeat.to_numpy()
    finally:
        transTemp.close()
Ejemplo n.º 3
0
def compute_cmvn_stats(feat, spk2utt=None, name="cmvn"):
    '''
	Compute CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<spk2utt>: spk2utt file or exkaldi ScriptTable object.
		<name>: a string.

	Return:
		A exkaldi CMVN statistics object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort("utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort("utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is a exkaldi feature object but got {type_name(feat)}."
        )

    spk2uttTemp = tempfile.NamedTemporaryFile("w+", encoding="utf-8")
    try:
        if spk2utt is None:
            cmd = 'compute-cmvn-stats ark:- ark:-'
        else:
            if isinstance(spk2utt, str):
                if not os.path.isfile(spk2utt):
                    raise WrongPath(f"No such file:{spk2utt}.")
                spk2uttSorted = ScriptTable(
                    name="spk2utt").load(spk2utt).sort()
                spk2uttSorted.save(spk2uttTemp)
            elif isinstance(spk2utt, ScriptTable):
                spk2uttSorted = spk2utt.sort()
                spk2uttSorted.save(spk2uttTemp)
            else:
                raise UnsupportedType(
                    f"<spk2utt> should be a file path or ScriptTable object but got {type_name(spk2utt)}."
                )
            spk2uttTemp.seek(0)

            cmd = f'compute-cmvn-stats --spk2utt=ark:{spk2uttTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to compute CMVN statistics.')
        else:
            return BytesCMVNStatistics(out, name, indexTable=None)
    finally:
        spk2uttTemp.close()
Ejemplo n.º 4
0
	def __init__(self, data=None, wordSymbolTable=None, hmm=None, name="lat"):
		super().__init__(data, name)
		if wordSymbolTable is not None:
			assert type_name(wordSymbolTable) == "ListTable", f"<wordSymbolTable> must be exkaldi ListTable object but got: {type_name(wordSymbolTable)}."
		if hmm is not None:
			assert type_name(hmm) in ["MonophoneHMM","TriphoneHMM"], f"<hmm> must be exkaldi HMM object but got: {type_name(hmm)}."
		
		self.__wordSymbolTable = wordSymbolTable
		self.__hmm = hmm
Ejemplo n.º 5
0
def ctc_greedy_search(prob, vocabs, blankID=None):
    '''
    The best path decoding algorithm.

    Args:
        <prob>: An exkaldi probability object. This probalility should be an output of Neural Network with CTC loss fucntion.
        <vocabs>: a list of vocabulary.
        <blankID>: specify the ID of blank symbol. If None, use the last dimentionality of <prob>.
    Return:
        An exkaldi Transcription object of decoding results.  
    '''
    assert isinstance(
        vocabs,
        list), f"<vocabs> must be a list of vocabulary but got {vocabs}."

    if type_name(prob) == "BytesProbability":
        prob = prob.to_numpy()
    elif type_name(prob) == "NumpyProbability":
        pass
    else:
        raise UnsupportedType(
            f"<prob> should be an exkaldi probability object but got {type_name(prob)}."
        )

    probDim = prob.dim
    if len(vocabs) == probDim:
        if blankID is None:
            blankID = probDim - 1
        else:
            assert isinstance(
                blankID, int
            ) and 0 <= blankID < probDim, f"BlankID {blankID} is out of range of int sequences from 0 to {probDim-1}."
    elif len(vocabs) == probDim - 1:
        if blankID == None:
            blankID = probDim - 1
        else:
            assert blankID == probDim - 1, f"The dimensibality of probability is {probDim} but only have {len(vocabs)} words. In this case, blank ID must be {probDim-1} but got {blankID}"
    else:
        raise WrongDataFormat(
            f"The dimensibality of probability {probDim} does not match the numbers of words {len(vocabs)}."
        )

    results = Transcription(name="bestPathResult")
    for utt, pb in prob.items:
        assert isinstance(pb, np.ndarray) and len(
            pb.shape) == 2, "Unsupported probability matrix formatation."
        best_path = np.argmax(pb, 1)
        best_chars_collapsed = [
            vocabs[ID] for ID, _ in groupby(best_path) if ID != blankID
        ]
        try:
            results[utt] = " ".join(best_chars_collapsed)
        except Exception as e:
            print("<vocab> might has non-string items.")
            raise e
    return results
Ejemplo n.º 6
0
	def am_rescore(self, hmm, feat):
		"""
		Replace the acoustic scores with new HMM-GMM model.
		"""
		'''
		Determinize the lattice.

		Args:
			<hmm>: exkaldi HMM object or file path.

		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		featTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		try:
			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["BaseHMM", "MonophoneHMM", "TriphoneHMM"]:
				hmmTemp.write(hmm.data)
				hmmTemp.seek(0)
				hmmFile = hmmTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file path or exkaldi HMM object but got: {type_name(hmm)}.")
	
			if type_name(feat) == "BytesFeature":
				feat = feat.sort(by="utt")
			elif type_name(feat) == "NumpyFeature":
				feat = feat.sort(by="utt").to_numpy()
			else:
				raise UnsupportedType(f"<feat> should be exkaldi feature object but got: {type_name(feat)}.")

			featTemp.write(feat.data)
			featTemp.seek(0)
			featFile = featTemp.name

			cmd = f"gmm-rescore-lattice	{hmmFile} ark:- ark:{featFile} ark:-"

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError("Failed to determinize lattice.")
			else:
				newName = f"am_rescore({self.name})"
				return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
		finally:
			hmmTemp.close()
			featTemp.close()
Ejemplo n.º 7
0
def ctc_greedy_search(prob, vocabs, blankID=None):
    '''
    The best path decoding algorithm.

    Args:
        <prob>: An exkaldi probability object. This probalility should be an output of Neural Network with CTC loss fucntion.
        <vocabs>: a list of vocabulary.
        <blankID>: specify the ID of blank symbol. If None, use the last dimentionality of <prob>.
    Return:
        An exkaldi Transcription object of decoding results.  
    '''
    declare.is_classes("vocabs", vocabs, list)

    declare.is_probability("prob", prob)
    if type_name(prob) == "BytesProb":
        prob = prob.to_numpy()
    elif type_name(prob) == "IndexTable":
        prob = prob.read_record("prob").to_numpy()

    probDim = prob.dim
    if len(vocabs) == probDim:
        if blankID is None:
            blankID = probDim - 1
        declare.is_positive_int("blankID", blackID)
        declare.in_boundary("blankID", blackID, 0, probDim - 1)
    elif len(vocabs) == probDim - 1:
        if blankID == None:
            blankID = probDim - 1
        else:
            assert blankID == probDim - 1, f"The dimensibality of probability is {probDim} but only have {len(vocabs)} words. In this case, blank ID must be {probDim-1} but got {blankID}"
    else:
        raise WrongDataFormat(
            f"The dimensibality of probability {probDim} does not match the numbers of words {len(vocabs)}."
        )

    results = Transcription(name="bestPathResult")
    for utt, pb in prob.items:
        declare.is_classes("prob", prob, np.ndarray)
        declare.is_classes("the rank of matrix shape", len(pb.shape),
                           "expected rank", 2)
        best_path = np.argmax(pb, 1)
        best_chars_collapsed = [
            vocabs[ID] for ID, _ in groupby(best_path) if ID != blankID
        ]
        try:
            results[utt] = " ".join(best_chars_collapsed)
        except Exception as e:
            e.args = ("<vocab> might has non-string items.\n" + e.args[0], )
            raise e
    return results
Ejemplo n.º 8
0
    def perplexity(self, sentence):
        '''
		Compute perplexity of a sentence.

		Args:
			<sentence>: a sentence which has words-in blank and has not boundary or exkaldi Transcription object.

		Return:
			If <sentence> is string, return a perplexity value.
			Else return an exkaldi Metric object.
		'''
        def perplexity_one(one):
            if one.count(" ") < 1:
                print(
                    f"Warning: sentence doesn't seem to be separated by spaces or extremely short: {one}."
                )
            return self.__model.perplexity(one)

        if isinstance(sentence, str):
            return perplexity_one(sentence)
        elif type_name(sentence) == "Transcription":
            scores = {}
            for uttID, txt in sentence.items():
                assert isinstance(
                    txt, str
                ), f"Transcription should be string od words but got:{type_name(txt)} at utt-ID {uttID}."
                scores[uttID] = perplexity_one(txt)
            return Metric(scores, name=f"LMperplexity({sentence.name})")
        else:
            raise UnsupportedType(
                f"<sentence> should be string or exkaldi Transcription object ut got: {type_name(sentence)}."
            )
Ejemplo n.º 9
0
def load_trans(target, name="transcription"):
    '''
	Load transcription from file.

	Args:
		<target>: transcription file path.
		<name>: a string.

	Return:
		An exkaldi Transcription object.
	'''
    if type_name(target) in ["dict", "Transcription", "ScriptTable"]:
        for utt, utterance in target.items():
            assert isinstance(
                utt, str) and len(utt) > 0, "Utterance ID should be a string."
            assert isinstance(utterance,
                              str), "Utterance text should a string."

        return Transcription(target, name)

    elif isinstance(target, str):
        assert os.path.isfile(target), f"No such file:{target}."

        result = Transcription(name=name)
        result.load(target)

        return result

    else:
        raise UnsupportedType(
            "<target> should be file path, dict object or ScriptTable object.")
Ejemplo n.º 10
0
def compute_postprob_norm(ali, posrProbDim):
    '''
	Compute alignment counts in order to normalize acoustic model posterior probability.
	For more help information, look at the Kaldi <analyze-counts> command.

	Args:
		<ali>: exkaldi NumpyAlignmentPhone or NumpyAlignmentPdf object.
		<posrProbDim>: the dimensionality of posterior probability.
	Return:
		A numpy array of the normalization.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(ali) in ["NumpyAlignmentPhone", "NumpyAlignmentPdf"]:
        pass
    else:
        raise UnsupportedType(
            f'Expected exkaldi AlignmentPhone or  but got a {type_name(ali)}.')

    cmd = f"analyze-counts --print-args=False --verbose=0 --binary=false --counts-dim={posrProbDim} ark:- -"
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=ali.data)
    if (isinstance(cod, int) and cod != 0) or out == b"":
        print(err.decode())
        raise KaldiProcessError('Analyze counts defailed.')
    else:
        out = out.decode().strip().strip("[]").strip().split()
        counts = np.array(out, dtype=np.int32)
        countBias = np.log(counts / np.sum(counts))
        return countBias
Ejemplo n.º 11
0
def gmm_align(hmm, feat, trainGraphFile, transitionScale=1.0, acousticScale=0.1, 
				selfloopScale=0.1, beam=10, retry_beam=40, boost_silence=1.0, careful=False, name="ali", lexicons=None):
		'''
		Align acoustic feature with kaldi vertibi algorithm.
			<lexicons>: None. If no any lexicons provided in DecisionTree, this is expected.
						In this step, we will use "context_indep" lexicon.
		'''
		if isinstance(hmm,str):
			assert os.path.isfile(hmm), f"No such file: {hmm}."
			assert type_name(lexicons) == "LexiconBank", "Expected <lexicons> is provided in this case."
			hmm = load_hmm(hmm, lexicons=lexicons)
		else:
			assert type_name(hmm) in ["BaseHMM","MonophoneHMM","TriphoneHMM"], f"<hmm> should be exkaldi HMM object but got: {hmm}."
		
		return hmm.align(feat, trainGraphFile, transitionScale, acousticScale, selfloopScale, 
						beam, retry_beam, boost_silence, careful, name, lexicons)
Ejemplo n.º 12
0
def load_list_table(target, name="listTable"):
    '''
	Generate a list table object from dict object or file.

	Args:
		<target>: dict object or a file path.
	
	Return:
		a ListTable object.
	'''
    declare.is_classes("target", target, [dict, ListTable, str])

    newTable = ListTable(name=name)
    if type_name(target) in ["dict", "ListTable"]:
        newTable.update(target)
        return newTable

    else:
        files = list_files(target)
        for filePath in files:
            with open(filePath, "r", encoding="utf-8") as fr:
                lines = fr.readlines()
            for index, line in enumerate(lines, start=1):
                t = line.strip().split(maxsplit=1)
                if len(t) < 2:
                    raise WrongDataFormat(
                        f"Line Number: {index}\n" + f"Line Content: {line}\n" +
                        f"Missing paired key and value information in file:{filePath}."
                    )
                else:
                    newTable[t[0]] = t[1]

        return newTable
Ejemplo n.º 13
0
    def score(self, sentence, bos=True, eos=True):
        '''
		Score a sentence.

		Args:
			<sentence>: a string with out boundary symbols or exkaldi Transcription object.
			<bos>: If True, add <s> to the head.
			<eos>: If True, add </s> to the tail.
		Return:
			If <sentence> is string, return a float log-value.
			Else, return an exkaldi Metric object.
		'''
        def score_one(one, bos, eos):
            if one.count(" ") < 1:
                print(
                    f"Warning: sentence doesn't seem to be separated by spaces or extremely short: {one}."
                )
            return self.__model.score(one, bos, eos)

        if isinstance(sentence, str):
            return score_one(sentence, bos, eos)
        elif type_name(sentence) == "Transcription":
            scores = {}
            for uttID, txt in sentence.items():
                assert isinstance(
                    txt, str
                ), f"Transcription should be string od words but got:{type_name(txt)} at utt-ID {uttID}."
                scores[uttID] = score_one(txt, bos, eos)
            return Metric(scores, name=f"LMscore({sentence.name})")
        else:
            raise UnsupportedType(
                f"<sentence> should be string or exkaldi Transcription object ut got: {type_name(sentence)}."
            )
Ejemplo n.º 14
0
def transform_feat(feat, matrixFile):
    '''
	Transform feat by a transform matrix. Typically, LDA, MLLt matrixes.

	Args:
		<feat>: exkaldi feature object.
		<matrixFile>: file name.
	
	Return:
		a new exkaldi feature object.
	'''
    assert isinstance(
        matrixFile, str
    ), f"<transformMatrix> should be a file path but got: {type_name(matrixFile)}."
    if not os.path.isfile(matrixFile):
        raise WrongPath(f"No such file: {matrixFile}.")

    if type_name(feat) == "BytesFeature":
        bytesFlag = True
    elif type_name(feat) == "NumpyFeature":
        bytesFlag = False
        feat = feat.to_bytes()
    else:
        raise UnsupportedType(
            f"<feat> should exkaldi feature object but got: {type_name(feat)}."
        )

    cmd = f'transform-feats {matrixFile} ark:- ark:-'

    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=feat.data)

    if cod != 0:
        print(err.decode())
        raise KaldiProcessError("Failed to transform feature.")
    else:
        newName = f"tansform({feat.name})"
        newFeat = BytesFeature(out, name=newName)
        if bytesFlag:
            return newFeat
        else:
            return newFeat.to_numpy()
Ejemplo n.º 15
0
def nn_align(hmm, prob, trainGraphFile, transitionScale=1.0, acousticScale=0.1, 
				selfloopScale=0.1, beam=10, retry_beam=40, name="ali"):
	'''
	Align the neural network acoustic output probability.
	'''
	if type_name(prob) == "BytesProbability":
		pass
	elif type_name(prob) == "NumpyProbability":
		prob = prob.to_bytes()
	else:
		raise UnsupportedType(f"Expected <prob> is an exkaldi probability object but got: {type_name(prob)}.")

	hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
	try:
		if isinstance(hmm,str):
			assert os.path.isfile(hmm), f"No such file: {hmm}."
			hmmFile = hmm
		else:
			assert type_name(hmm) in ["BaseHMM","MonophoneHMM","TriphoneHMM"], f"<hmm> should be exkaldi HMM object but got: {hmm}."
			hmmTemp.write(hmm.data)
			hmmTemp.seek(0)
			hmmFile = hmmTemp.name
		
		cmd = f"align-compiled-mapped --transition-scale={transitionScale} --acoustic-scale={acousticScale} --self-loop-scale={selfloopScale} "
		cmd += f"--beam={beam} --retry-beam={retry_beam} {hmmFile} ark:{trainGraphFile} ark:- ark:-"

		out,err,cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=prob.data)

		if cod != 0:
			print(err.decode())
			raise KaldiProcessError("Failed to align probability.")
		else:
			return BytesAlignmentTrans(out,name=name)
	
	finally:
		hmmTemp.close()
Ejemplo n.º 16
0
def use_cmvn_sliding(feat, windowsSize=None, std=False):
    '''
	Allpy sliding CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<windowsSize>: windows size, If None, use windows size larger than the frames of feature.
		<std>: a bool value.

	Return:
		An exkaldi feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if isinstance(feat, BytesFeature):
        pass
    elif type_name(feat) == "NumpyFeature":
        feat = feat.to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is an exkaldi feature object but got {type_name(feat)}."
        )

    if windowsSize == None:
        featLen = feat.lens[1]
        maxLen = max([length for utt, length in featLen])
        windowsSize = math.ceil(maxLen / 100) * 100
    else:
        assert isinstance(windowsSize,
                          int), "Expected <windowsSize> is an int value."

    if std == True:
        std = 'true'
    else:
        std = 'false'

    cmd = f'apply-cmvn-sliding --cmn-window={windowsSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-'
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=feat.data)
    if (isinstance(cod, int) and cod != 0) or out == b'':
        print(err.decode())
        raise KaldiProcessError('Failed to use sliding CMVN.')
    else:
        newName = f"cmvn({feat.name},{windowsSize})"
        return BytesFeature(out, newName, indexTable=None)
Ejemplo n.º 17
0
    def send_report(self, info):
        '''
		Send information and these will be retained untill you do the statistics by using .collect_report().

		Args:
			<info>: a Python dict object includiing names and their values with int or float type.
					such as {"epoch":epoch,"train_loss":loss,"train_acc":acc}
					The value can be Python int, float object, Numpy int, float object or NUmpy ndarray with only one value.
		'''
        assert isinstance(info,
                          dict), "Expected <info> is a Python dict object."

        for name, value in info.items():
            assert isinstance(name, str) and len(
                name
            ) > 0, f"The name of info should be string avaliable but got {type_name(name)}."
            valueDtype = type_name(value)
            if valueDtype.startswith(
                    "int"):  # Python int object, Numpy int object
                pass

            elif valueDtype.startswith(
                    "float"):  # Python float object, Numpy float object
                self.currentFieldIsFloat[name] = True

            elif valueDtype == "ndarray" and value.shape == (
            ):  # Numpy ndarray with only one value
                if value.dtype == "float":
                    self.currentFieldIsFloat[name] = True
            else:
                raise UnsupportedType(
                    f"Expected int or float value but got {type_name(value)}.")

            name = name.lower()
            if not name in self.currentField.keys():
                self.currentField[name] = []
            self.currentField[name].append(value)
Ejemplo n.º 18
0
def gmm_decode(feat, hmm, HCLGFile, wordSymbolTable, beam=10, latBeam=8, acwt=1,
				minActive=200, maxActive=7000, maxMem=50000000, config=None, maxThreads=1):
	'''
	Decode by generating lattice from feature and GMM model.

	Args:
		<feat>: An exkaldi feature object.
		<hmm>: An exkaldi HMM object or file path.
		<HCLGFile>: HCLG file path.
		<wordSymbolTable>: words.txt file path or exkaldi LexiconBank object or exkaldi ListTable object.
		<beam>: beam size.
		<latBeam>: lattice beam size.
		<acwt>: acoustic model weight.
		<minActivate>: .
		<maxActive>: .
		<maxMem>: .
		<config>: decode configure file.
		<maxThreads>: the number of mutiple threads.
		
		Some usual options can be assigned directly. If you want use more, set <config> = your-configure, but if you do this, these usual configures we provided will be ignored.
		You can use .check_config('gmm_decode') function to get configure information you could set.
		Also run shell command "gmm-latgen-faster" to look their meaning.
	Return:
		An exkaldi Lattice object.
	''' 
	ExkaldiInfo.vertify_kaldi_existed()

	if type_name(feat) == "BytesFeature":
		pass
	elif type_name(feat) == "NumpyFeature":
		feat = feat.to_bytes()
	else:
		raise UnsupportedType(f"Expected <feat> is an exkaldi feature object but got: {type_name(feat)}.")
		
	assert isinstance(HCLGFile, str), "<HCLGFile> should be a file path."
	if not os.path.isfile(HCLGFile):
		raise WrongPath(f"No such file:{HCLGFile}")

	if maxThreads > 1:
		kaldiTool = f"gmm-latgen-faster-parallel --num-threads={maxThreads} "
	else:
		kaldiTool = "gmm-latgen-faster " 

	kaldiTool += f'--allow-partial=true '
	kaldiTool += f'--min-active={minActive} '
	kaldiTool += f'--max-active={maxActive} '  
	kaldiTool += f'--max_mem={maxMem} '
	kaldiTool += f'--beam={beam} '
	kaldiTool += f'--lattice-beam={latBeam} '
	kaldiTool += f'--acoustic-scale={acwt} '

	wordsTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8")
	modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")

	try:
		if type_name(wordSymbolTable) == "LexiconBank":
			wordSymbolTable.dump_dict("words", wordsTemp)
			wordsFile = wordsTemp.name
		elif type_name(wordSymbolTable) == "ListTable":
			wordSymbolTable.save(wordsTemp)
			wordsTemp.seek(0)
			wordsFile = wordsTemp.name
		elif isinstance(wordSymbolTable, str):
			if not os.path.isfile(wordSymbolTable):
				raise WrongPath(f"No such file:{wordSymbolTable}.")
			else:
				wordsFile = wordSymbolTable
		else:
			raise UnsupportedType(f"<wordSymbolTable> should be a file path or exkaldi LexiconBank object but got {type_name(wordSymbolTable)}.")

		kaldiTool += f'--word-symbol-table={wordsFile} '

		if config is not None:
			if check_config(name='gmm_decode', config=config):
				for key,value in config.items():
					if isinstance(value, bool):
						if value is True:
							kaldiTool += f"{key} "
					else:
						kaldiTool += f" {key}={value}"

		if type_name(hmm) in ["MonophoneHMM", "TriphoneHMM"]:
			modelTemp.write(hmm.data)
			modelTemp.seek(0)
			hmmFile = modelTemp.name
		elif isinstance(hmm, str):
			if not os.path.isfile(hmm):
				raise WrongPath(f"No such file:{hmm}.")
			else:
				hmmFile = hmm
		else:
			raise UnsupportedType(f"<hmm> should be exkaldi HMM object or file path but got {type_name(hmm)}.")
		
		cmd = f'{kaldiTool} {hmmFile} {HCLGFile} ark:- ark:-'
		out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data)

		if cod != 0 or out == b'':
			print(err.decode())
			raise KaldiProcessError('Failed to generate lattice.')
		else:
			newName = f"lat({feat.name})"
			return Lattice(data=out, name=newName)
	
	finally:
		wordsTemp.close()
		modelTemp.close()
Ejemplo n.º 19
0
def use_cmvn(feat, cmvn, utt2spk=None, std=False):
    '''
	Apply CMVN statistics to feature.

	Args:
		<feat>: exkaldi feature object.
		<cmvn>: exkaldi CMVN statistics object.
		<utt2spk>: utt2spk file path or ScriptTable object.
		<std>: If true, apply std normalization.

	Return:
		A new feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort(by="utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi feature but got {type_name(feat)}.")

    if type_name(cmvn) == "BytesCMVNStatistics":
        cmvn = cmvn.sort(by="utt")
    elif type_name(cmvn) == "NumpyCMVNStatistics":
        cmvn = cmvn.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi CMVN statistics but got {type_name(cmvn)}.")

    cmvnTemp = tempfile.NamedTemporaryFile('wb+', suffix='_cmvn.ark')
    utt2spkTemp = tempfile.NamedTemporaryFile('w+',
                                              suffix="_utt2spk",
                                              encoding="utf-8")
    try:
        cmvnTemp.write(cmvn.data)
        cmvnTemp.seek(0)

        if std is True:
            stdOption = " --norm-vars true"
        else:
            stdOption = ""

        if utt2spk is None:
            cmd = f'apply-cmvn{stdOption} ark:{cmvnTemp.name} ark:- ark:-'
        else:
            if isinstance(utt2spk, str):
                if not os.path.isfile(utt2spk):
                    raise WrongPath(f"No such file:{utt2spk}.")
                utt2spkSorted = ScriptTable(
                    name="utt2spk").load(utt2spk).sort()
                utt2spkSorted.save(utt2spkTemp)
            elif isinstance(utt2spk, ScriptTable):
                utt2spkSorted = utt2spk.sort()
                utt2spkSorted.save(utt2spkTemp)
            else:
                raise UnsupportedType(
                    f"<utt2spk> should be a file path or ScriptTable object but got {type_name(utt2spk)}."
                )
            utt2spkTemp.seek(0)

            cmd = f'apply-cmvn{stdOption} --utt2spk=ark:{utt2spkTemp.name} ark:{cmvnTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to apply CMVN statistics.')
        else:
            newName = f"cmvn({feat.name},{cmvn.name})"
            if type_name(feat) == "NumpyFeature":
                return BytesFeature(out, newName, indexTable=None).to_numpy()
            else:
                return BytesFeature(out, newName, indexTable=None)
    finally:
        cmvnTemp.close()
        utt2spkTemp.close()
Ejemplo n.º 20
0
def load_index_table(target, name="index", useSuffix=None):
    '''
	Load an index table from dict,or archive table file.

	Args:
		<target>: dict object,.ark or .scp file,IndexTable object,bytes archive object.
		<name>: a string.
		<useSuffix>: "ark" or "scp". We will check the file type by its suffix. 
								But if <target> is file path and not default suffix (ark or scp),you have to declare which type it is.

	Return:
		an exkaldi IndexTable object.
	'''
    newTable = IndexTable(name=name)

    if type_name(target) == "dict":
        for key, value in target.items():
            if isinstance(value, (list, tuple)):
                assert len(value) in [
                    3, 4
                ], f"Expected (frames,start index,data size[,file path]) but {value} does not match."
                newTable[key] = newTable.spec(*value)
            elif type_name(value) == "Index":
                newTable[key] = value
            else:
                raise WrongDataFormat(
                    f"Expected list or tuple but got wrong index info format: {value}."
                )

        return newTable

    elif type_name(target) == "IndexTable":
        newTable.update(target)
        return newTable

    elif isinstance(target, BytesArchive):
        newTable.update(target.indexTable)
        return newTable

    else:
        fileList = list_files(target)

        if useSuffix is not None:
            declare.is_valid_string("useSuffix", useSuffix)
            useSuffix = useSuffix.strip()[-3:].lower()
            declare.is_instances("useSuffix", useSuffix, ["ark", "scp"])
        else:
            useSuffix = ""

        for fileName in fileList:

            if fileName.rstrip().endswith(".ark"):
                t = __read_index_table_from_ark_file(fileName)
            elif fileName.rstrip().endswith(".scp"):
                t = __read_index_table_from_scp_file(fileName)
            elif useSuffix == "ark":
                t = __read_index_table_from_ark_file(fileName)
            elif useSuffix == "scp":
                t = __read_index_table_from_scp_file(fileName)
            else:
                raise UnsupportedType(
                    "Unknown file suffix. Specify <useSuffix> please.")

            newTable.update(t)

        return newTable
Ejemplo n.º 21
0
def accuracy(ref, hyp, ignore=None, mode='all'):
    '''
	Score one-2-one matching score between two items.

	Args:
		<ref>,<hyp>: iterable objects like list,tuple or NumPy array. It will be flattened before scoring.
		<ignore>: Ignoring specific symbols.
		<model>: If <mode> is "all",compute one-one matching score. For example,<ref> is (1,2,3,4),and <hyp> is (1,2,2,4),the score will be 0.75.
				 If <mode> is "present",only the members of <hyp> which appeared in <ref> will be scored no matter which position it is. 
	Return:
		a namedtuple object of score information.
	'''
    assert type_name(ref) != "Transcription" and type_name(
        hyp
    ) != "Transcription", "Exkaldi Transcription objects are unsupported in this function."

    assert mode in ['all',
                    'present'], 'Expected <mode> to be "present" or "all".'

    x = flatten(ref)
    x = list(filter(lambda i: i != ignore, x))
    y = flatten(hyp)
    y = list(filter(lambda i: i != ignore, y))

    if mode == 'all':
        i = 0
        score = 0
        while True:
            if i >= len(x) or i >= len(y):
                break
            elif x[i] == y[i]:
                score += 1
            i += 1
        if i < len(x) or i < len(y):
            raise WrongOperation(
                '<ref> and <hyp> have different length to score.')
        else:
            if len(x) == 0:
                accuracy = 1.0
            else:
                accuracy = score / len(x)

            return namedtuple("Score",
                              ["accuracy", "items", "rightItems"])(accuracy,
                                                                   len(x),
                                                                   score)
    else:
        x = sorted(x)
        score = 0
        for i in y:
            if i in x:
                score += 1
        if len(y) == 0:
            if len(x) == 0:
                accuracy = 1.0
            else:
                accuracy = 0.0
        else:
            accuracy = score / len(y)

        return namedtuple("Score",
                          ["accuracy", "items", "rightItems"])(accuracy,
                                                               len(y), score)
Ejemplo n.º 22
0
def edit_distance(ref, hyp, ignore=None, mode='present'):
    '''
	Compute edit-distance score.

	Args:
		<ref>, <hyp>: Transcription objects or iterable objects like list, tuple or NumPy array. It will be flattened before scoring.
		<ignore>: Ignoring specific symbols.
		<mode>: When both are Transcription objects, if mode is 'present', skip the missed utterances.
	Return:
		a namedtuple object including score information.	
	'''
    if type_name(ref) == "Transcription":
        pass
    elif isinstance(ref, str):
        if not os.path.isfile(ref):
            raise WrongPath(f"No such file:{ref}.")
        else:
            ref = load_trans(ref)
    else:
        raise UnsupportedType(
            '<ref> should be exkaldi Transcription object or file path.')

    if type_name(hyp) == "Transcription":
        pass
    elif isinstance(hyp, str):
        if not os.path.isfile(hyp):
            raise WrongPath(f"No such file:{hyp}.")
        else:
            hyp = load_trans(hyp)
    else:
        raise UnsupportedType(
            '<hyp> should be exkaldi Transcription object or file path.')

    allED = 0
    words = 0
    SER = 0
    sentences = 0
    wrongSentences = 0
    missedSentences = 0

    ref = ref.sort()
    hyp = hyp.sort()
    for utt, hypTrans in hyp.items():
        try:
            refTrans = ref[utt]
        except KeyError as e:
            if mode == "all":
                raise Exception(
                    "Missing transcription in reference, set <mode> as 'all' to skip it."
                )
            else:
                missedSentences += 1
        else:
            sentences += 1
            refTrans = refTrans.split()
            hypTrans = hypTrans.split()
            ed, wds = pure_edit_distance(refTrans, hypTrans, ignore=ignore)
            allED += ed
            words += wds
            if ed > 0:
                wrongSentences += 1
    if sentences == 0:
        raise Exception("Missing all transcription in reference.")

    return namedtuple("Score", [
        "editDistance", "words", "SER", "sentences", "wrongSentences",
        "missedSentences"
    ])(allED, words, wrongSentences / sentences, sentences, wrongSentences,
       missedSentences)
Ejemplo n.º 23
0
	def get_1best(self, wordSymbolTable=None, hmm=None, lmwt=1, acwt=1.0, phoneLevel=False):
		'''
		Get 1 best result with text formation.

		Args:
			<wordSymbolTable>: None or file path or ListTable object or LexiconBank object.
			<hmm>: None or file path or HMM object.
			<lmwt>: language model weight.
			<acwt>: acoustic model weight.
			<phoneLevel>: If Ture, return phone results.
		Return:
			An exkaldi Transcription object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any data in lattice.')

		assert isinstance(lmwt, int) and lmwt >=0, "Expected <lmwt> is a non-negative int number."

		if wordSymbolTable is None:
			assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable."
			wordSymbolTable = self.wordSymbolTable
		
		if hmm is None:
			assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable."
			hmm = self.hmm

		modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		wordSymbolTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8")

		try:
			if isinstance(wordSymbolTable, str):
				assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}."
				wordsFile = wordSymbolTable
			elif type_name(wordSymbolTable) == "LexiconBank":
				if phoneLevel:
					wordSymbolTable.dump_dict("phones", wordSymbolTemp)
				else:
					wordSymbolTable.dump_dict("words", wordSymbolTemp)
				wordsFile = wordSymbolTemp.name
			elif type_name(wordSymbolTable) == "ListTable":
				wordSymbolTable.save(wordSymbolTemp)
				wordSymbolTemp.seek(0)
				wordsFile = wordSymbolTemp.name
			else:
				raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.")

			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]:
				hmm.save(modelTemp)
				hmmFile = modelTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.")

			if phoneLevel:
				cmd0 = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | '
			else:
				cmd0 = ""

			cmd1 = f"lattice-best-path --lm-scale={lmwt} --acoustic-scale={acwt} --word-symbol-table={wordsFile} --verbose=2 ark:- ark,t:- "
			cmd = cmd0 + cmd1

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)
			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError('Failed to get 1-best from lattice.')
			else:
				out = out.decode().strip().split("\n")
				if phoneLevel:
					newName = "1-best-phones"
				else:
					newName = "1-best-words"

				results = Transcription(name=newName)
				for re in out:
					re = re.strip().split(maxsplit=1)
					if len(re) == 0:
						continue
					elif len(re) == 1:
						results[re[0]] = " "
					else:
						results[re[0]] = re[1]
				return results

		finally:
			modelTemp.close()
			wordSymbolTemp.close()
Ejemplo n.º 24
0
def __compute_feature(target,kaldiTool,useSuffix=None,name="feat",outFile=None):
	'''
	The base funtion to compute feature.
	'''
	declare.kaldi_existed()

	if useSuffix != None:
		declare.is_valid_string("useSuffix",useSuffix)
		useSuffix = useSuffix.strip().lower()[-3:]
		declare.is_instances("useSuffix",useSuffix,["scp","wav"])
	else:
		useSuffix = ""	

	targets,kaldiTools,useSuffixs,names,outFiles = check_multiple_resources(target,kaldiTool,useSuffix,name,outFile=outFile)
	# pretreatment
	fromSegment = False
	with FileHandleManager() as fhm:

		segments = []
		for index,kaldiTool,target,useSuffix,name in zip(range(len(outFiles)),kaldiTools,targets,useSuffixs,names):
			
			declare.is_classes("target",target,["str","ListTable","WavSegment"])
			declare.is_valid_string("name",name)

			if isinstance(target,str):		
		
				allFiles = list_files(target)
				target = ListTable()

				for filePath in allFiles:
					filePath = filePath.strip()
					if filePath[-4:].lower() == ".wav":
						fileName = os.path.basename(filePath)
						uttID = fileName[0:-4].replace(".","")
						target[uttID] = filePath
					
					elif filePath[-4:].lower() == '.scp':
						target += load_list_table(filePath)
					
					elif "wav" == useSuffix:
						fileName = os.path.basename(filePath)
						uttID = fileName.replace(".","")
						target[uttID] = filePath

					elif "scp" == useSuffix:
						target += load_list_table(filePath)

					else:
						raise UnsupportedType('Unknown file suffix. You can declare whether <useSuffix> is "wav" or "scp".')
				
				if len(target) == 0:
					raise WrongDataFormat("There did not include any data to compute data in target.")

				targets[index] = target
			
			elif type_name(target) == "WavSegment":

				segTemp = fhm.create("w+",suffix=".seg",encode="utf-8")
				target.save(segTemp)
				segments.append(segTemp.name)

				targets[index] = target.detach_wav()
				fromSegment = True

	if fromSegment:
		# define the command pattern
		cmdPattern = "extract-segments scp:{wavFile} {segment} ark:- | {kaldiTool} ark:- ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"segment":segments,"kaldiTool":kaldiTools,"outFile":outFiles}
	else:
		# define the command pattern
		cmdPattern = "{kaldiTool} scp:{wavFile} ark:{outFile}"
		# define resources
		resources = {"wavFile":targets,"kaldiTool":kaldiTools,"outFile":outFiles}

	# Run
	return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
Ejemplo n.º 25
0
def load_ali(target, aliType=None, name="ali", hmm=None):
    '''
	Load alignment data.

	Args:
		<target>: Python dict object, bytes object, exkaldi alignment object, kaldi alignment file or .npy file.
		<aliType>: None, or one of 'transitionID', 'phoneID', 'pdfID'. It will return different alignment object.
		<name>: a string.
		<hmm>: file path or exkaldi HMM object.
	Return:
		exkaldi alignment data objects.
	'''
    assert isinstance(
        name, str) and len(name) > 0, "Name shoud be a string avaliable."

    ExkaldiInfo.vertify_kaldi_existed()

    def transform(data, cmd):
        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=data)
        if (isinstance(cod, int) and cod != 0) and out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to transform alignment.')
        else:
            result = {}
            sp = BytesIO(out)
            for line in sp.readlines():
                line = line.decode()
                line = line.strip().split()
                utt = line[0]
                matrix = np.array(line[1:], dtype=np.int32)
                result[utt] = matrix
            return results

    if isinstance(target, dict):
        if aliType is None:
            result = NumpyAlignment(target, name)
        elif aliType == "transitionID":
            result = NumpyAlignmentTrans(target, name)
        elif aliType == "phoneID":
            result = NumpyAlignmentPhone(target, name)
        elif aliType == "pdfID":
            result = NumpyAlignmentPdf(target, name)
        else:
            raise WrongOperation(
                f"<aliType> should be None, 'transitionID', 'phoneID' or 'pdfID' but got {aliType}."
            )
        result.check_format()
        return result

    elif type_name(target) in [
            "NumpyAlignment", "NumpyAlignmentTrans", "NumpyAlignmentPhone",
            "NumpyAlignmentPdf", "BytesAlignmentTrans"
    ]:
        result = copy.deepcopy(target)
        result.rename(name)
        return result

    elif isinstance(target, str):

        allFiles = list_files(target)

        results = {
            "NumpyAlignment": NumpyAlignment(),
            "NumpyAlignmentTrans": NumpyAlignmentTrans(),
            "NumpyAlignmentPhone": NumpyAlignmentPhone(),
            "NumpyAlignmentPdf": NumpyAlignmentPdf(),
            "BytesAlignmentTrans": BytesAlignmentTrans(),
        }

        for fileName in allFiles:
            fileName = os.path.abspath(fileName)

            if fileName.endswith(".npy"):
                temp = __read_data_from_file(fileName, "npy")
                if aliType is None:
                    temp = NumpyAlignment(temp.data)
                    results["NumpyAlignment"] += temp
                elif aliType == "transitionID":
                    temp = NumpyAlignmentTrans(temp.data)
                    results["NumpyAlignmentTrans"] += temp
                elif aliType == "phoneID":
                    temp = NumpyAlignmentPhone(temp.data)
                    results["NumpyAlignmentPhone"] += temp
                elif aliType == "pdfID":
                    temp = NumpyAlignmentPdf(temp.data)
                    results["NumpyAlignmentPdf"] += temp
                else:
                    raise WrongOperation(
                        f"<aliType> should be None, 'transitionID','phoneID' or 'pdfID' but got {aliType}."
                    )

            else:
                if fileName.endswith('.gz'):
                    cmd = f'gunzip -c {fileName}'
                else:
                    cmd = f'cat {fileName}'

                if aliType is None or aliType == "transitionID":
                    out, err, cod = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)
                    if (isinstance(cod, int) and cod != 0) or out == b'':
                        print(err.decode())
                        raise ShellProcessError(
                            "Failed to get the alignment data from file.")
                    else:
                        temp = BytesAlignmentTrans(out)
                        results["BytesAlignmentTrans"] += temp

                else:
                    temp = tempfile.NamedTemporaryFile("wb+")
                    try:
                        if type_name(hmm) in ("HMM", "MonophoneHMM",
                                              "TriphoneHMM"):
                            hmm.save(temp)
                            hmmFileName = temp.name
                        elif isinstance(hmm, str):
                            if not os.path.isfile(hmm):
                                raise WrongPath(f"No such file:{hmm}.")
                            hmmFileName = hmm
                        else:
                            raise UnsupportedType(
                                f"<hmm> should be a filePath or exkaldi HMM and its sub-class object. but got {type_name(hmm)}."
                            )

                        if aliType == "phoneID":
                            cmd += f" | ali-to-phones --per-frame=true {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPhone(temp)
                            results["NumpyAlignmentPhone"] += temp

                        elif target == "pdfID":
                            cmd = f" | ali-to-pdf {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPdf(temp)
                            results["NumpyAlignmentPdf"] += temp
                        else:
                            raise WrongOperation(
                                f"<target> should be 'trainsitionID', 'phoneID' or 'pdfID' but got {target}."
                            )

                    finally:
                        temp.close()

        finalResult = []
        for obj in results.values():
            if not obj.is_void:
                obj.rename(name)
                finalResult.append(obj)

        if len(finalResult) == 0:
            raise WrongOperation(
                "<target> dose not include any data avaliable.")
        elif len(finalResult) == 1:
            finalResult = finalResult[0]

        return finalResult
Ejemplo n.º 26
0
def wer(ref, hyp, ignore=None, mode='all'):
    '''
	Compute WER (word error rate) between <ref> and <hyp>. 

	Args:
		<ref>,<hyp>: exkaldi transcription object or file path.
		<ignore>: ignore a symbol.
		<mode>: "all" or "present".

	Return:
		a namedtuple of score information.
	'''
    declare.is_potential_transcription("ref", ref)
    declare.is_potential_transcription("hyp", hyp)
    declare.is_instances("mode", mode, ['all', 'present'])
    declare.kaldi_existed()

    if ignore is not None:
        declare.is_valid_string("ignore", ignore)

    with FileHandleManager() as fhm:

        if ignore is None:

            if type_name(hyp) == "Transcription":
                hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
                hyp.save(hypTemp)
                hyp = hypTemp.name

            if type_name(ref) == "Transcription":
                refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
                ref.save(refTemp)
                ref = refTemp.name

            cmd = f'compute-wer --text --mode={mode} ark:{ref} ark,p:{hyp}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout="PIPE",
                                                      stderr="PIPE")

        else:
            # remove the ingored symbol in hyp
            if type_name(hyp) == "Transcription":
                hyp = hyp.save()
            else:
                with open(hyp, "r", encoding="utf-8") as fr:
                    hyp = fr.read()
            hypTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
            cmd = f'sed "s/{ignore} //g" > {hypTemp.name}'
            hypOut, err, _ = run_shell_command(cmd,
                                               stdin="PIPE",
                                               stdout="PIPE",
                                               stderr="PIPE",
                                               inputs=hyp)
            if len(hypOut) == 0:
                raise WrongDataFormat("<hyp> has wrong data formation.",
                                      err.decode())
            # remove the ingored symbol in ref
            if type_name(ref) == "Transcription":
                ref = ref.save()
            else:
                with open(ref, "r", encoding="utf-8") as fr:
                    ref = fr.read()
            refTemp = fhm.create("w+", suffix=".txt", encoding="utf-8")
            cmd = f'sed "s/{ignore} //g" > {refTemp.name}'
            refOut, err, cod = run_shell_command(cmd,
                                                 stdin="PIPE",
                                                 stdout="PIPE",
                                                 stderr="PIPE",
                                                 inputs=ref)
            if cod != 0 or len(refOut) == 0:
                raise WrongDataFormat("<ref> has wrong data formation.",
                                      err.decode())
            # score
            cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout="PIPE",
                                                      stderr="PIPE")

    if len(scoreOut) == 0:
        raise KaldiProcessError("Failed to compute WER.", scoreErr.decode())
    else:
        out = scoreOut.decode().split("\n")
        pattern1 = '%WER (.*) \[ (.*) \/ (.*),(.*) ins,(.*) del,(.*) sub \]'
        pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]"
        pattern3 = "Scored (.*) sentences,(.*) not present in hyp."
        s1 = re.findall(pattern1, out[0])[0]
        s2 = re.findall(pattern2, out[1])[0]
        s3 = re.findall(pattern3, out[2])[0]

        return namedtuple("Score", [
            "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences",
            "wrongSentences", "missedSentences"
        ])(
            float(s1[0]),  #WER
            int(s1[2]),  #words
            int(s1[3]),  #ins
            int(s1[4]),  #del
            int(s1[5]),  #sub
            float(s2[0]),  #SER
            int(s2[1]),  #sentences
            int(s2[2]),  #wrong sentences
            int(s3[1])  #missed sentences
        )
Ejemplo n.º 27
0
def ctc_prefix_beam_search(prob,
                           vocabs,
                           blankID=None,
                           beam=5,
                           cutoff=0.999,
                           strick=1.0,
                           lmFile=None,
                           alpha=1.0,
                           beta=0):
    '''
    Prefix beam search decoding algorithm. Lm score is supported.

    Args:
        <prob>: An exkaldi postprobability object. This probalility should be an output of Neural Network with CTC loss fucntion.
                We expect the probability didn't pass any activation function, or it may generate wrong results.
        <vocabs>: a list of vocabulary.
        <blankID>: specify the ID of blank symbol. If None, use the last dimentionality of <prob>.
        <beam>: the beam size.
        <cutoff>: the sum threshold to cut off dimensions whose probability is extremely small.  
        <strick>: When the decoding results of two adjacent frames are the same, the probability of latter will be reduced.
        <lmFile>: If not None, add language model score to beam.
        <alpha>: the weight of LM score.
        <beta>: the length normaoliztion weight of LM score.
    Return:
        An exkaldi Transcription object of decoding results.  
    '''
    declare.is_classes("vocabs", vocabs, [tuple, list])

    declare.is_probability("prob", prob)
    if type_name(prob) == "BytesProb":
        prob = prob.to_numpy()
    elif type_name(prob) == "IndexTable":
        prob = prob.read_record("prob").to_numpy()

    if lmFile is not None:
        declare.is_file("lmFile", lmFile)
    else:
        lmFile = "none"

    probDim = prob.dims
    if len(vocabs) == probDim:
        if blankID is None:
            blankID = probDim - 1
        declare.is_positive_int("blankID", blackID)
        declare.in_boundary("blankID", blackID, 0, probDim - 1)

    elif len(vocabs) == probDim - 1:
        if blankID == None:
            blankID = probDim - 1
        else:
            assert blankID == probDim - 1, f"The dimensibality of probability is {probDim} but only have {len(vocabs)} words. In this case, blank ID must be {probDim-1} but got {blankID}"
    else:
        raise WrongDataFormat(
            f"The dimensibality of probability {probDim} does not match the numbers of words {len(vocabs)}."
        )

    for ID, word in enumerate(vocabs):
        if len(word.strip()) == 0:
            raise WrongDataFormat(f"Found a vocab {word} unavaliable.")

    num_classes = len(vocabs)
    vocabs = " ".join(vocabs)

    sources = [
        vocabs.encode(),
    ]
    uttTemp = []
    for utt, pb in prob.items:
        declare.is_classes("prob", prob, np.ndarray)
        declare.is_classes("the rank of matrix shape", len(pb.shape),
                           "expected rank", 2)
        pb = softmax(pb, axis=1)
        sources.append(f" {pb.shape[0]} ".encode() +
                       pb.astype("float32").tobytes())

    sources = b"".join(sources)

    cmd = os.path.join(sys.prefix, "exkaldisrc", "tools",
                       "prefix_beam_search_decode")
    cmd += " --num_files {}".format(prob.lens[0])
    cmd += " --num_classes {}".format(num_classes)
    cmd += " --blank_id {}".format(blankID)
    cmd += " --lm_model {}".format(lmFile)
    cmd += " --beam_size {}".format(beam)
    cmd += " --cutoff_prob {}".format(cutoff)
    cmd += " --alpha {}".format(alpha)
    cmd += " --beta {}".format(beta)

    out, err, _ = run_shell_command(cmd,
                                    stdin=subprocess.PIPE,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE,
                                    inputs=sources)

    if len(out) == 0:
        raise Exception("Failed to beam search decode.", err.decode())
    else:
        results = Transcription(name="beamSearchResults")
        out = out.decode().strip().split("file")
        results = []
        for index, re in enumerate(out[1:]):
            re = re.strip().split("\n")
            if len(re) <= 1:
                results.append([
                    "",
                ])
            else:
                results[uttTemp[index]] = " ".join(re[1].strip().split()[1:])

        return results
Ejemplo n.º 28
0
	def get_nbest(self, n, wordSymbolTable=None, hmm=None, acwt=1, phoneLevel=False, requireAli=False, requireCost=False):
		'''
		Get N best result with text formation.

		Args:
			<n>: n best results.
			<wordSymbolTable>: file or ListTable object or LexiconBank object.
			<hmm>: file or HMM object.
			<acwt>: acoustic weight.
			<phoneLevel>: If True, return phone results.
			<requireAli>: If True, return alignment simultaneously.
			<requireCost>: If True, return acoustic model and language model cost simultaneously.

		Return:
			A list of exkaldi Transcription objects (and their Metric objects).
		'''
		assert isinstance(n, int) and n > 0, "Expected <n> is a positive int value."
		assert isinstance(acwt, (int,float)) and acwt > 0, "Expected <acwt> is a positive int or float value."
	
		if self.is_void:
			raise WrongOperation('No any data in lattice.')
		
		if wordSymbolTable is None:
			assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable."
			wordSymbolTable = self.wordSymbolTable
		
		if hmm is None:
			assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable."
			hmm = self.hmm

		wordSymbolTemp = tempfile.NamedTemporaryFile('w+', suffix="_words.txt", encoding='utf-8')
		modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		outAliTemp = tempfile.NamedTemporaryFile('w+', suffix=".ali", encoding='utf-8')
		outCostFile_LM = tempfile.NamedTemporaryFile('w+', suffix=".cost", encoding='utf-8')
		outCostFile_AM = tempfile.NamedTemporaryFile('w+', suffix=".cost", encoding='utf-8')

		try:
			if isinstance(wordSymbolTable, str):
				assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}."
				wordsFile = wordSymbolTable
			elif type_name(wordSymbolTable) == "LexiconBank":
				if phoneLevel:
					wordSymbolTable.dump_dict("phones", wordSymbolTemp)
				else:
					wordSymbolTable.dump_dict("words", wordSymbolTemp)
				wordsFile = wordSymbolTemp.name
			elif type_name(wordSymbolTable) == "ListTable":
				wordSymbolTable.save(wordSymbolTemp)
				wordSymbolTemp.seek(0)
				wordsFile = wordSymbolTemp.name
			else:
				raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.")

			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]:
				hmm.save(modelTemp)
				hmmFile = modelTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.")

			if phoneLevel:
				cmd = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | '
			else:
				cmd = ""			

			cmd += f'lattice-to-nbest --acoustic-scale={acwt} --n={n} ark:- ark:- |'
			cmd += f'nbest-to-linear ark:- ark,t:{outAliTemp.name} ark,t:-'   
			
			if requireCost:
				cmd += f' ark,t:{outCostFile_LM.name} ark,t:{outCostFile_AM.name}'

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)
			
			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError('Failed to get N best results.')
			
			def sperate_n_bests(data):
				results	= {}
				for index,trans in enumerate(data):
					trans = trans.strip().split(maxsplit=1)
					if len(trans) == 0:
						continue
					name = trans[0][0:(trans[0].rfind("-"))]
					if len(trans) == 1:
						res = " "
					else:
						res = trans[1]
					if not name in results.keys():
						results[name] = [res,]
					else:
						results[name].append(res)
				
				finalResults = []
				for uttID, nbests in results.items():
					for index, one in enumerate(nbests):
						if index > len(finalResults)-1:
							finalResults.append({})
						finalResults[index][uttID] = one

				return finalResults

			out = out.decode().strip().split("\n")

			out = sperate_n_bests(out)
			NBEST = []
			for index, one in enumerate(out,start=1):
				name = f"{index}-best"
				NBEST.append( Transcription(one, name=name) )
			del out

			if requireCost:
				outCostFile_AM.seek(0)
				lines_AM = outCostFile_AM.read().strip().split("\n")
				lines_AM = sperate_n_bests(lines_AM)
				AMSCORE = []
				for index, one in enumerate(lines_AM, start=1):
					name = f"AM-{index}-best"
					AMSCORE.append( Metric(one, name=name) )
				del lines_AM			

				outCostFile_LM.seek(0)
				lines_LM = outCostFile_LM.read().strip().split("\n")
				lines_LM = sperate_n_bests(lines_LM)
				LMSCORE = []
				for index, one in enumerate(lines_LM, start=1):
					name = f"LM-{index}-best"
					LMSCORE.append( Metric(one, name=name) )
				del lines_LM

				finalResult = [NBEST,AMSCORE,LMSCORE]
			else:
				finalResult = [NBEST,]

			if requireAli:
				ALIGNMENT = []
				outAliTemp.seek(0)
				ali = outAliTemp.read().strip().split("\n")
				ali = sperate_n_bests(ali)
				for index, one in enumerate(ali, start=1):
					name = f"{index}-best"
					temp = {}
					for key, value in one.items():
						value = value.strip().split()
						temp[key] = np.array(value, dtype=np.int32)
					ALIGNMENT.append( NumpyAlignmentTrans(temp, name=name) )
				del ali
				finalResult.append(ALIGNMENT)

			if len(finalResult) == 1:
				finalResult = finalResult[0]

			return finalResult
			 
		finally:
			wordSymbolTemp.close()
			modelTemp.close()
			outAliTemp.close()
			outCostFile_LM.close()
			outCostFile_AM.close()
Ejemplo n.º 29
0
def wer(ref, hyp, ignore=None, mode='all'):
    '''
	Compute WER (word error rate) between <ref> and <hyp>. 

	Args:
		<ref>, <hyp>: exkaldi transcription object or file path.
		<ignore>: ignore a symbol.
		<mode>: "all" or "present".
	Return:
		a namedtuple of score information.
	'''
    assert mode in ['all',
                    'present'], 'Expected <mode> to be "present" or "all".'
    ExkaldiInfo.vertify_kaldi_existed()

    hypTemp = tempfile.NamedTemporaryFile("w+",
                                          suffix=".txt",
                                          encoding="utf-8")
    refTemp = tempfile.NamedTemporaryFile("w+",
                                          suffix=".txt",
                                          encoding="utf-8")
    try:
        if ignore is None:
            if type_name(hyp) == "Transcription":
                hyp.save(hypTemp)
                hypTemp.seek(0)
                hypFileName = hypTemp.name
            elif isinstance(hyp, str):
                if not os.path.isfile(hyp):
                    raise WrongPath(f"No such file:{hyp}.")
                else:
                    hypFileName = hyp
            else:
                raise UnsupportedType(
                    '<hyp> should be exkaldi Transcription object or file path.'
                )

            if type_name(ref) == "Transcription":
                ref.save(refTemp)
                refTemp.seek(0)
                refFileName = refTemp.name
            elif isinstance(ref, str):
                if not os.path.isfile(ref):
                    raise WrongPath(f"No such file:{ref}.")
                else:
                    refFileName = ref
            else:
                raise UnsupportedType(
                    '<ref> should be exkaldi Transcription object or file path.'
                )

            cmd = f'compute-wer --text --mode={mode} ark:{refFileName} ark,p:{hypFileName}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)
        else:
            if type_name(hyp) == "Transcription":
                hyp = hyp.save()
            elif isinstance(hyp, str):
                if not os.path.isfile(hyp):
                    raise WrongPath(f"No such file:{hyp}.")
                else:
                    with open(hyp, "r", encoding="utf-8") as fr:
                        hyp = fr.read()
            else:
                raise UnsupportedType(
                    '<hyp> should be exkaldi Transcription object or file path.'
                )

            cmd = f'sed "s/{ignore} //g" > {hypTemp.name}'
            hypOut, err, _ = run_shell_command(cmd,
                                               stdin=subprocess.PIPE,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE,
                                               inputs=hyp.encode())
            if len(hypOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<hyp> has wrong data formation.")

            if type_name(ref) == "Transcription":
                ref = ref.save()
            elif isinstance(ref, str):
                if not os.path.isfile(ref):
                    raise WrongPath(f"No such file:{ref}.")
                else:
                    with open(ref, "r", encoding="utf-8") as fr:
                        ref = fr.read()
            else:
                raise UnsupportedType(
                    '<ref> should be exkaldi Transcription object or file path.'
                )

            cmd = f'sed "s/{ignore} //g" > {refTemp.name}'
            refOut, err, cod = run_shell_command(cmd,
                                                 stdin=subprocess.PIPE,
                                                 stdout=subprocess.PIPE,
                                                 stderr=subprocess.PIPE,
                                                 inputs=hyp.encode())
            if cod != 0 or len(refOut) == 0:
                print(err.decode())
                raise WrongDataFormat("<ref> has wrong data formation.")

            cmd = f'compute-wer --text --mode={mode} ark:{refTemp.name} ark,p:{hypTemp.name}'
            scoreOut, scoreErr, _ = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)

    finally:
        hypTemp.close()
        refTemp.close()

    if len(scoreOut) == 0:
        print(scoreErr.decode())
        raise KaldiProcessError("Failed to compute WER.")

    else:
        out = scoreOut.decode().split("\n")
        pattern1 = '%WER (.*) \[ (.*) \/ (.*), (.*) ins, (.*) del, (.*) sub \]'
        pattern2 = "%SER (.*) \[ (.*) \/ (.*) \]"
        pattern3 = "Scored (.*) sentences, (.*) not present in hyp."
        s1 = re.findall(pattern1, out[0])[0]
        s2 = re.findall(pattern2, out[1])[0]
        s3 = re.findall(pattern3, out[2])[0]

        return namedtuple("Score", [
            "WER", "words", "insErr", "delErr", "subErr", "SER", "sentences",
            "wrongSentences", "missedSentences"
        ])(
            float(s1[0]),  #WER
            int(s1[2]),  #words
            int(s1[3]),  #ins
            int(s1[4]),  #del
            int(s1[5]),  #sub
            float(s2[0]),  #SER
            int(s2[1]),  #sentences
            int(s2[2]),  #wrong sentences
            int(s3[1])  #missed sentences
        )
Ejemplo n.º 30
0
def convert_field(prob, originVocabs, targetVocabs, retainOOV=False):
    '''
    Tranform the dimensions of probability to target field.

    Args:
        <prob>: An exkaldi probability object. This probalility should be an output of Neural Network.
        <originVocabs>: list of original field vocabulary.
        <originVocabs>: list of target field vocabulary.
        <retainOOV>: If True, target words which are not in original vocabulary will be retained in minimum probability of each frame. 
    Return:
        An new exkaldi probability object and a list of new target vocabulary.  
    '''
    declare.is_classes("originVocabs", originVocabs, list)
    declare.is_classes("targetVocabs", targetVocabs, list)
    assert len(targetVocabs) > 0, f"Target vocabulary is void."

    declare.is_probability("prob", prob)
    if type_name(prob) == "BytesProb":
        prob = prob.to_numpy()
    elif type_name(prob) == "IndexTable":
        prob = prob.read_record("prob").to_numpy()

    probDim = prob.dim
    declare.equal("the dimension of probability", probdim,
                  "the number of words", len(originVocabs))

    origin_w2i = dict((w, i) for i, w in enumerate(originVocabs))

    retainIDs = []
    newTargetVocabs = []
    for w in targetVocabs:
        try:
            ID = origin_w2i[w]
        except KeyError:
            if retainOOV is True:
                newTargetVocabs.append(w)
                retainIDs.append(None)
            else:
                pass
        else:
            newTargetVocabs.append(w)
            retainIDs.append(ID)

    results = {}
    for utt, pb in prob.items:
        declare.is_classes("prob", prob, np.ndarray)
        declare.is_classes("the rank of matrix shape", len(pb.shape),
                           "expected rank", 2)
        if retainOOV is True:
            padding = np.min(pb, axis=1)
        new = np.zeros(shape=(pb.shape[0], len(retainIDs)), dtype=np.float32)
        for index, i in enumerate(retainIDs):
            if i is None:
                new[:, index] = padding
            else:
                new[:, index] = pb[:, i]
            results[utt] = new

        results[utt] = new

    newName = f"convert({prob.name})"
    return NumpyProb(data=results, name=newName), newTargetVocabs