Ejemplo n.º 1
0
def compute_cmvn_stats(feat, spk2utt=None, name="cmvn"):
    '''
	Compute CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<spk2utt>: spk2utt file or exkaldi ScriptTable object.
		<name>: a string.

	Return:
		A exkaldi CMVN statistics object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort("utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort("utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is a exkaldi feature object but got {type_name(feat)}."
        )

    spk2uttTemp = tempfile.NamedTemporaryFile("w+", encoding="utf-8")
    try:
        if spk2utt is None:
            cmd = 'compute-cmvn-stats ark:- ark:-'
        else:
            if isinstance(spk2utt, str):
                if not os.path.isfile(spk2utt):
                    raise WrongPath(f"No such file:{spk2utt}.")
                spk2uttSorted = ScriptTable(
                    name="spk2utt").load(spk2utt).sort()
                spk2uttSorted.save(spk2uttTemp)
            elif isinstance(spk2utt, ScriptTable):
                spk2uttSorted = spk2utt.sort()
                spk2uttSorted.save(spk2uttTemp)
            else:
                raise UnsupportedType(
                    f"<spk2utt> should be a file path or ScriptTable object but got {type_name(spk2utt)}."
                )
            spk2uttTemp.seek(0)

            cmd = f'compute-cmvn-stats --spk2utt=ark:{spk2uttTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to compute CMVN statistics.')
        else:
            return BytesCMVNStatistics(out, name, indexTable=None)
    finally:
        spk2uttTemp.close()
Ejemplo n.º 2
0
def use_fmllr(feat, transMatrix, utt2spkFile):
    '''
	Transform feat by a transform matrix. Typically, LDA, MLLt matrixes.

	Args:
		<feat>: exkaldi feature object.
		<transFile>: exkaldi fMLLR transform matrix object.
		<utt2spkFile>: utt2spk file name.
	
	Return:
		a new exkaldi feature object.
	'''
    if type_name(feat) == "BytesFeature":
        bytesFlag = True
        feat = feat.sort(by="utt")
    elif type_name(feat) == "NumpyFeature":
        bytesFlag = False
        feat = feat.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"<feat> should exkaldi feature object but got: {type_name(feat)}."
        )

    if type_name(transMatrix) == "BytesFmllrMatrix":
        transMatrix = transMatrix.sort(by="utt")
    elif type_name(transMatrix) == "NumpyFmllrMatrix":
        transMatrix = transMatrix.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"<transMatrix> should exkaldi fMLLR transform matrix object but got: {type_name(transMatrix)}."
        )

    transTemp = tempfile.NamedTemporaryFile("wb+", suffix="_trans.ark")
    try:
        transTemp.write(transMatrix.data)
        transTemp.seek(0)

        cmd = f'transform-feats --utt2spk=ark:{utt2spkFile} ark:{transTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if cod != 0:
            print(err.decode())
            raise KaldiProcessError(
                "Failed to transform feature to fMLLR feature.")
        else:
            newName = f"fmllr({feat.name})"
            newFeat = BytesFeature(out, name=newName)
            if bytesFlag:
                return newFeat
            else:
                return newFeat.to_numpy()
    finally:
        transTemp.close()
Ejemplo n.º 3
0
	def am_rescore(self, hmm, feat):
		"""
		Replace the acoustic scores with new HMM-GMM model.
		"""
		'''
		Determinize the lattice.

		Args:
			<hmm>: exkaldi HMM object or file path.

		Return:
			An new Lattice object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any lattice data.')

		hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		featTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		try:
			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["BaseHMM", "MonophoneHMM", "TriphoneHMM"]:
				hmmTemp.write(hmm.data)
				hmmTemp.seek(0)
				hmmFile = hmmTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file path or exkaldi HMM object but got: {type_name(hmm)}.")
	
			if type_name(feat) == "BytesFeature":
				feat = feat.sort(by="utt")
			elif type_name(feat) == "NumpyFeature":
				feat = feat.sort(by="utt").to_numpy()
			else:
				raise UnsupportedType(f"<feat> should be exkaldi feature object but got: {type_name(feat)}.")

			featTemp.write(feat.data)
			featTemp.seek(0)
			featFile = featTemp.name

			cmd = f"gmm-rescore-lattice	{hmmFile} ark:- ark:{featFile} ark:-"

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)

			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError("Failed to determinize lattice.")
			else:
				newName = f"am_rescore({self.name})"
				return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName)
		finally:
			hmmTemp.close()
			featTemp.close()
Ejemplo n.º 4
0
def run_shell_command(cmd,
                      stdin=None,
                      stdout=None,
                      stderr=None,
                      inputs=None,
                      env=None):
    '''
	Run a shell command.

	Args:
		<cmd>: a string or list.
		<inputs>: a string or bytes.
		<env>: If None, use exkaldi.version.ENV defaultly.

	Return:
		out, err, returnCode
	'''
    if isinstance(cmd, str):
        shell = True
    elif isinstance(cmd, list):
        shell = False
    else:
        raise UnsupportedType(
            "<cmd> should be a string,  or a list of commands and its' options."
        )

    if env is None:
        env = ExkaldiInfo.ENV

    if inputs is not None:
        if isinstance(inputs, str):
            inputs = inputs.encode()
        elif isinstance(inputs, bytes):
            pass
        else:
            raise UnsupportedType(
                f"Expected <inputs> is string or bytes object but got {type_name(inputs)}."
            )

    p = subprocess.Popen(cmd,
                         shell=shell,
                         stdin=stdin,
                         stdout=stdout,
                         stderr=stderr,
                         env=env)
    (out, err) = p.communicate(input=inputs)
    p.wait()

    return out, err, p.returncode
Ejemplo n.º 5
0
def load_ngrams(target, name="gram"):
    '''
	Load a N-Grams from arpa or binary language model file.

	Args:
		<target>: file path with suffix .arpa or .binary.
	
	Return:
		a KenNGrams object.
	'''
    declare.is_file("target", target)
    target = target.strip()

    with FileHandleManager() as fhm:

        if target.endswith(".arpa"):
            modelTemp = fhm.create("wb+", suffix=".binary")
            arpa_to_binary(target, modelTemp.name)
            modelTemp.seek(0)
            model = KenNGrams(modelTemp.name, name=name)
            model._path = target

        elif target.endswith(".binary"):
            model = KenNGrams(target, name=name)

        else:
            raise UnsupportedType(
                f"Unknown suffix. Language model file should has a suffix .arpa or .binary but got: {target}."
            )

        return model
Ejemplo n.º 6
0
def load_trans(target, name="transcription"):
    '''
	Load transcription from file.

	Args:
		<target>: transcription file path.
		<name>: a string.

	Return:
		An exkaldi Transcription object.
	'''
    if type_name(target) in ["dict", "Transcription", "ScriptTable"]:
        for utt, utterance in target.items():
            assert isinstance(
                utt, str) and len(utt) > 0, "Utterance ID should be a string."
            assert isinstance(utterance,
                              str), "Utterance text should a string."

        return Transcription(target, name)

    elif isinstance(target, str):
        assert os.path.isfile(target), f"No such file:{target}."

        result = Transcription(name=name)
        result.load(target)

        return result

    else:
        raise UnsupportedType(
            "<target> should be file path, dict object or ScriptTable object.")
Ejemplo n.º 7
0
def __read_one_record_from_ark(fp):
    '''
	Read a utterance from opened file pointer of an archive file.
	It is used to generate bytes index table.
	'''
    # read utterance ID
    utt = ''
    while True:
        char = fp.read(1).decode()
        if (char == '') or (char == ' '):
            break
        utt += char
    utt = utt.strip()
    if utt == '':
        if fp.read() == b'':
            return (None, None, None)
        else:
            fp.close()
            raise WrongDataFormat(
                "Miss utterance ID before utterance. This may not be complete Kaldi archeve table file."
            )
    # read data
    binarySymbol = fp.read(2).decode()
    if binarySymbol == '\0B':
        sizeSymbol = fp.read(1).decode()
        if sizeSymbol == '\4':
            frames = int(np.frombuffer(fp.read(4), dtype='int32', count=1)[0])
            buf = fp.read(frames * 5)  # move the handle
            del buf
            dataSize = len(utt) + 8 + frames * 5
            return (utt, frames, dataSize)
        else:
            dataType = sizeSymbol + fp.read(2).decode()
            if dataType == 'CM ':
                fp.close()
                raise UnsupportedType(
                    "Unsupported to generate index table from compressed archive table. Please decompress it firstly."
                )
            elif dataType == 'FM ':
                sampleSize = 4
            elif dataType == 'DM ':
                sampleSize = 8
            else:
                fp.close()
                raise WrongDataFormat(
                    f"This may not be Kaldi archeve table file.")
            s1, rows, s2, cols = np.frombuffer(fp.read(10),
                                               dtype="int8,int32,int8,int32",
                                               count=1)[0]
            rows = int(rows)
            cols = int(cols)
            buf = fp.read(rows * cols * sampleSize)  # move the handle
            del buf
            dataSize = len(utt) + 16 + rows * cols * sampleSize
            return (utt, rows, dataSize)
    else:
        fp.close()
        raise WrongDataFormat(
            "Miss binary symbol before utterance. This may not be Kaldi binary archeve table file."
        )
Ejemplo n.º 8
0
    def perplexity(self, sentence):
        '''
		Compute perplexity of a sentence.

		Args:
			<sentence>: a sentence which has words-in blank and has not boundary or exkaldi Transcription object.

		Return:
			If <sentence> is string, return a perplexity value.
			Else return an exkaldi Metric object.
		'''
        def perplexity_one(one):
            if one.count(" ") < 1:
                print(
                    f"Warning: sentence doesn't seem to be separated by spaces or extremely short: {one}."
                )
            return self.__model.perplexity(one)

        if isinstance(sentence, str):
            return perplexity_one(sentence)
        elif type_name(sentence) == "Transcription":
            scores = {}
            for uttID, txt in sentence.items():
                assert isinstance(
                    txt, str
                ), f"Transcription should be string od words but got:{type_name(txt)} at utt-ID {uttID}."
                scores[uttID] = perplexity_one(txt)
            return Metric(scores, name=f"LMperplexity({sentence.name})")
        else:
            raise UnsupportedType(
                f"<sentence> should be string or exkaldi Transcription object ut got: {type_name(sentence)}."
            )
Ejemplo n.º 9
0
    def score(self, sentence, bos=True, eos=True):
        '''
		Score a sentence.

		Args:
			<sentence>: a string with out boundary symbols or exkaldi Transcription object.
			<bos>: If True, add <s> to the head.
			<eos>: If True, add </s> to the tail.
		Return:
			If <sentence> is string, return a float log-value.
			Else, return an exkaldi Metric object.
		'''
        def score_one(one, bos, eos):
            if one.count(" ") < 1:
                print(
                    f"Warning: sentence doesn't seem to be separated by spaces or extremely short: {one}."
                )
            return self.__model.score(one, bos, eos)

        if isinstance(sentence, str):
            return score_one(sentence, bos, eos)
        elif type_name(sentence) == "Transcription":
            scores = {}
            for uttID, txt in sentence.items():
                assert isinstance(
                    txt, str
                ), f"Transcription should be string od words but got:{type_name(txt)} at utt-ID {uttID}."
                scores[uttID] = score_one(txt, bos, eos)
            return Metric(scores, name=f"LMscore({sentence.name})")
        else:
            raise UnsupportedType(
                f"<sentence> should be string or exkaldi Transcription object ut got: {type_name(sentence)}."
            )
Ejemplo n.º 10
0
def compute_postprob_norm(ali, posrProbDim):
    '''
	Compute alignment counts in order to normalize acoustic model posterior probability.
	For more help information, look at the Kaldi <analyze-counts> command.

	Args:
		<ali>: exkaldi NumpyAlignmentPhone or NumpyAlignmentPdf object.
		<posrProbDim>: the dimensionality of posterior probability.
	Return:
		A numpy array of the normalization.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(ali) in ["NumpyAlignmentPhone", "NumpyAlignmentPdf"]:
        pass
    else:
        raise UnsupportedType(
            f'Expected exkaldi AlignmentPhone or  but got a {type_name(ali)}.')

    cmd = f"analyze-counts --print-args=False --verbose=0 --binary=false --counts-dim={posrProbDim} ark:- -"
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=ali.data)
    if (isinstance(cod, int) and cod != 0) or out == b"":
        print(err.decode())
        raise KaldiProcessError('Analyze counts defailed.')
    else:
        out = out.decode().strip().strip("[]").strip().split()
        counts = np.array(out, dtype=np.int32)
        countBias = np.log(counts / np.sum(counts))
        return countBias
Ejemplo n.º 11
0
def ctc_greedy_search(prob, vocabs, blankID=None):
    '''
    The best path decoding algorithm.

    Args:
        <prob>: An exkaldi probability object. This probalility should be an output of Neural Network with CTC loss fucntion.
        <vocabs>: a list of vocabulary.
        <blankID>: specify the ID of blank symbol. If None, use the last dimentionality of <prob>.
    Return:
        An exkaldi Transcription object of decoding results.  
    '''
    assert isinstance(
        vocabs,
        list), f"<vocabs> must be a list of vocabulary but got {vocabs}."

    if type_name(prob) == "BytesProbability":
        prob = prob.to_numpy()
    elif type_name(prob) == "NumpyProbability":
        pass
    else:
        raise UnsupportedType(
            f"<prob> should be an exkaldi probability object but got {type_name(prob)}."
        )

    probDim = prob.dim
    if len(vocabs) == probDim:
        if blankID is None:
            blankID = probDim - 1
        else:
            assert isinstance(
                blankID, int
            ) and 0 <= blankID < probDim, f"BlankID {blankID} is out of range of int sequences from 0 to {probDim-1}."
    elif len(vocabs) == probDim - 1:
        if blankID == None:
            blankID = probDim - 1
        else:
            assert blankID == probDim - 1, f"The dimensibality of probability is {probDim} but only have {len(vocabs)} words. In this case, blank ID must be {probDim-1} but got {blankID}"
    else:
        raise WrongDataFormat(
            f"The dimensibality of probability {probDim} does not match the numbers of words {len(vocabs)}."
        )

    results = Transcription(name="bestPathResult")
    for utt, pb in prob.items:
        assert isinstance(pb, np.ndarray) and len(
            pb.shape) == 2, "Unsupported probability matrix formatation."
        best_path = np.argmax(pb, 1)
        best_chars_collapsed = [
            vocabs[ID] for ID, _ in groupby(best_path) if ID != blankID
        ]
        try:
            results[utt] = " ".join(best_chars_collapsed)
        except Exception as e:
            print("<vocab> might has non-string items.")
            raise e
    return results
Ejemplo n.º 12
0
 def loadNpyFile(fileName):
     try:
         temp = np.load(fileName, allow_pickle=True)
         data = {}
         for utt_mat in temp:
             data[utt_mat[0]] = utt_mat[1]
     except:
         raise UnsupportedType(
             f'This is not a valid Exkaldi npy file: {fileName}.')
     else:
         return data
Ejemplo n.º 13
0
    def __init__(self, filePath, name="ngram"):
        declare.is_file("filePath", filePath)

        with open(filePath, "rb") as fr:
            t = fr.read(50).decode().strip()
        if t != "mmap lm http://kheafield.com/code format version 5":
            raise UnsupportedType(
                "This may be not a KenLM binary model format.")

        super(KenNGrams, self).__init__(data=b"placeholder", name=name)
        self.__model = kenlm.Model(filePath)
        self._path = None
Ejemplo n.º 14
0
def load_fmllr(target, name="prob", useSuffix=None):
    '''
	Load fmllr transform matrix data.

	Args:
		<target>: Python dict object,bytes object,exkaldi feature or index table object,.ark file,.scp file,npy file.
		<name>: a string.
		<useSuffix>: "ark" or "scp" or "npy". We will check the file type by its suffix. 
								But if <target> is file path and not default suffix (ark or scp),you have to declare which type it is.

	Return:
		A BytesFmllrMatrix or NumpyFmllrMatrix object.
	'''
    declare.is_valid_string("name", name)

    if isinstance(target, dict):
        result = NumpyFmllrMatrix(target, name)
        result.check_format()
        return result

    elif isinstance(target, bytes):
        result = BytesFmllrMatrix(target, name)
        result.check_format()
        return result

    elif isinstance(target, (NumpyFmllrMatrix, BytesFmllrMatrix)):
        result = copy.deepcopy(target)
        result.rename(name)
        return result

    elif isinstance(target, str):
        allData_bytes, allData_numpy, dataType = __read_data_from_file(
            target, useSuffix)
        if dataType == "npy":
            result = NumpyFmllrMatrix(allData_numpy) + BytesFmllrMatrix(
                allData_bytes)
        else:
            result = BytesFmllrMatrix(allData_bytes) + NumpyFmllrMatrix(
                allData_numpy)
        result.rename(name)
        return result

    elif isinstance(target, ArkIndexTable):
        return target.fetch(arkType="fmllrMat", name=name)

    else:
        raise UnsupportedType(
            f"Expected Python dict,bytes object,exkaldi fmllr matrix object,index table object or file path but got{type_name(target)}."
        )
Ejemplo n.º 15
0
    def __init__(self, filePath, name="ngram"):
        assert isinstance(
            filePath,
            str), f"<filePath> should be string but got {type_name(filePath)}."
        if not os.path.isfile(filePath):
            raise WrongPath(f"No such file:{filePath}.")
        else:
            with open(filePath, "rb") as fr:
                t = fr.read(50).decode().strip()
            if t != "mmap lm http://kheafield.com/code format version 5":
                raise UnsupportedType(
                    "This is not a KenLM binary model formation.")

        super(KenNGrams, self).__init__(data=b"kenlm", name=name)
        self.__model = kenlm.Model(filePath)
Ejemplo n.º 16
0
 def loadNpyFile(fileName):
     try:
         temp = np.load(fileName, allow_pickle=True)
         data = {}
         #totalSize = 0
         for utt_mat in temp:
             data[utt_mat[0]] = utt_mat[1]
             #totalSize += sys.getsizeof(utt_mat[1])
         #if totalSize > 10000000000:
         #    print('Warning: Data is extramely large. It could not be used correctly sometimes.')
     except:
         raise UnsupportedType(
             f'Expected "npy" data with exkaldi format but got {fileName}.')
     else:
         return NumpyMatrix(data)
Ejemplo n.º 17
0
def use_cmvn_sliding(feat, windowsSize=None, std=False):
    '''
	Allpy sliding CMVN statistics.

	Args:
		<feat>: exkaldi feature object.
		<windowsSize>: windows size, If None, use windows size larger than the frames of feature.
		<std>: a bool value.

	Return:
		An exkaldi feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if isinstance(feat, BytesFeature):
        pass
    elif type_name(feat) == "NumpyFeature":
        feat = feat.to_bytes()
    else:
        raise UnsupportedType(
            f"Expected <feat> is an exkaldi feature object but got {type_name(feat)}."
        )

    if windowsSize == None:
        featLen = feat.lens[1]
        maxLen = max([length for utt, length in featLen])
        windowsSize = math.ceil(maxLen / 100) * 100
    else:
        assert isinstance(windowsSize,
                          int), "Expected <windowsSize> is an int value."

    if std == True:
        std = 'true'
    else:
        std = 'false'

    cmd = f'apply-cmvn-sliding --cmn-window={windowsSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-'
    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=feat.data)
    if (isinstance(cod, int) and cod != 0) or out == b'':
        print(err.decode())
        raise KaldiProcessError('Failed to use sliding CMVN.')
    else:
        newName = f"cmvn({feat.name},{windowsSize})"
        return BytesFeature(out, newName, indexTable=None)
Ejemplo n.º 18
0
def load_lat(target, name="lat"):
	'''
	Load lattice data.

	Args:
		<target>: bytes object, file path or exkaldi lattice object.
		<hmm>: file path or exkaldi HMM object.
		<wordSymbol>: file path or exkaldi LexiconBank object.
		<name>: a string.
	Return:
		A exkaldi lattice object.
	'''
	if isinstance(target, bytes):
		return Lattice(target, name)

	elif isinstance(target, str):
		target = list_files(target)
		allData = []
		for fileName in target:
			if fileName.endswith('.gz'):
				cmd = 'gunzip -c {}'.format(fileName)
				out, err, _ = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
				if out == b'':
					print(err.decode())
					raise WrongDataFormat('Failed to load Lattice.')
				else:
					allData.append(out)
			else:
				try:
					with open(fileName, 'rb') as fr:
						out = fr.read()
				except Exception as e:
					print("Load lattice file defeated. Please make sure it is a lattice file avaliable.")
					raise e
				else:
					allData.append(out)
		try:
			allData = b"".join(allData)
		except Exception as e:
			raise WrongOperation("Only support binary format lattice file.")
		else:
			return Lattice(data=allData, name=name)

	else:
		raise UnsupportedType(f"Expected bytes object or lattice file but got: {type_name(target)}.")
Ejemplo n.º 19
0
def transform_feat(feat, matrixFile):
    '''
	Transform feat by a transform matrix. Typically, LDA, MLLt matrixes.

	Args:
		<feat>: exkaldi feature object.
		<matrixFile>: file name.
	
	Return:
		a new exkaldi feature object.
	'''
    assert isinstance(
        matrixFile, str
    ), f"<transformMatrix> should be a file path but got: {type_name(matrixFile)}."
    if not os.path.isfile(matrixFile):
        raise WrongPath(f"No such file: {matrixFile}.")

    if type_name(feat) == "BytesFeature":
        bytesFlag = True
    elif type_name(feat) == "NumpyFeature":
        bytesFlag = False
        feat = feat.to_bytes()
    else:
        raise UnsupportedType(
            f"<feat> should exkaldi feature object but got: {type_name(feat)}."
        )

    cmd = f'transform-feats {matrixFile} ark:- ark:-'

    out, err, cod = run_shell_command(cmd,
                                      stdin=subprocess.PIPE,
                                      stdout=subprocess.PIPE,
                                      stderr=subprocess.PIPE,
                                      inputs=feat.data)

    if cod != 0:
        print(err.decode())
        raise KaldiProcessError("Failed to transform feature.")
    else:
        newName = f"tansform({feat.name})"
        newFeat = BytesFeature(out, name=newName)
        if bytesFlag:
            return newFeat
        else:
            return newFeat.to_numpy()
Ejemplo n.º 20
0
def flatten(item):
    '''
	Flatten an iterable object.

	Args:
		<item>: iterable objects,string,list,tuple or NumPy array.

	Return:
		a list of flattened items.
	'''
    if not isinstance(item, Iterable):
        return [
            item,
        ]

    new = []
    for i in item:
        dtype = type_name(i)
        # python int or float value or Numpy float or int value.
        if dtype.startswith("int") or dtype.startswith("float"):
            new.append(i)
        # python str value.
        elif dtype.startswith("str"):
            if len(i) <= 1:
                new.append(i)
            else:
                new.extend(flatten(i))
        # python list,tuple,set object.
        elif dtype in ["list", "tuple", "set"]:
            new.extend(flatten(i))
        # Numpy ndarray object.
        elif dtype == "ndarray":
            if i.shape == ():
                new.append(i)
            else:
                new.extend(flatten(i))
        # Others objects is unsupported.
        else:
            raise UnsupportedType(
                f"Expected list,tuple,set,str or Numpy array object but got {type_name(i)}."
            )

    return new
Ejemplo n.º 21
0
def load_prob(target, name="prob", useSuffix=None):
    '''
	Load post probability data.

	Args:
		<target>: Python dict object, bytes object, exkaldi feature object, .ark file, .scp file, npy file.
		<name>: a string.
		<useSuffix>: a string. When target is file path, use this to specify file.
	Return:
		A BytesProbability or NumpyProbability object.
	'''
    assert isinstance(
        name, str) and len(name) > 0, "Name shoud be a string avaliable."

    if isinstance(target, dict):
        result = NumpyProbability(target, name)
        result.check_format()
        return result

    elif isinstance(target, bytes):
        result = BytesProbability(target, name)
        result.check_format()
        return result

    elif isinstance(target, (NumpyProbability, BytesProbability)):
        result = copy.deepcopy(target)
        result.rename(name)
        return result

    elif isinstance(target, str):
        result = __read_data_from_file(target, useSuffix)
        if isinstance(result, BytesMatrix):
            return BytesProbability(result.data, name)
        else:
            return NumpyProbability(result.data, name)

    else:
        raise UnsupportedType(
            f"Expected Python dict, bytes object, exkaldi feature object or file path but got{type_name(target)}."
        )
Ejemplo n.º 22
0
    def send_report(self, info):
        '''
		Send information and these will be retained untill you do the statistics by using .collect_report().

		Args:
			<info>: a Python dict object includiing names and their values with int or float type.
					such as {"epoch":epoch,"train_loss":loss,"train_acc":acc}
					The value can be Python int, float object, Numpy int, float object or NUmpy ndarray with only one value.
		'''
        assert isinstance(info,
                          dict), "Expected <info> is a Python dict object."

        for name, value in info.items():
            assert isinstance(name, str) and len(
                name
            ) > 0, f"The name of info should be string avaliable but got {type_name(name)}."
            valueDtype = type_name(value)
            if valueDtype.startswith(
                    "int"):  # Python int object, Numpy int object
                pass

            elif valueDtype.startswith(
                    "float"):  # Python float object, Numpy float object
                self.currentFieldIsFloat[name] = True

            elif valueDtype == "ndarray" and value.shape == (
            ):  # Numpy ndarray with only one value
                if value.dtype == "float":
                    self.currentFieldIsFloat[name] = True
            else:
                raise UnsupportedType(
                    f"Expected int or float value but got {type_name(value)}.")

            name = name.lower()
            if not name in self.currentField.keys():
                self.currentField[name] = []
            self.currentField[name].append(value)
Ejemplo n.º 23
0
def nn_align(hmm, prob, trainGraphFile, transitionScale=1.0, acousticScale=0.1, 
				selfloopScale=0.1, beam=10, retry_beam=40, name="ali"):
	'''
	Align the neural network acoustic output probability.
	'''
	if type_name(prob) == "BytesProbability":
		pass
	elif type_name(prob) == "NumpyProbability":
		prob = prob.to_bytes()
	else:
		raise UnsupportedType(f"Expected <prob> is an exkaldi probability object but got: {type_name(prob)}.")

	hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
	try:
		if isinstance(hmm,str):
			assert os.path.isfile(hmm), f"No such file: {hmm}."
			hmmFile = hmm
		else:
			assert type_name(hmm) in ["BaseHMM","MonophoneHMM","TriphoneHMM"], f"<hmm> should be exkaldi HMM object but got: {hmm}."
			hmmTemp.write(hmm.data)
			hmmTemp.seek(0)
			hmmFile = hmmTemp.name
		
		cmd = f"align-compiled-mapped --transition-scale={transitionScale} --acoustic-scale={acousticScale} --self-loop-scale={selfloopScale} "
		cmd += f"--beam={beam} --retry-beam={retry_beam} {hmmFile} ark:{trainGraphFile} ark:- ark:-"

		out,err,cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=prob.data)

		if cod != 0:
			print(err.decode())
			raise KaldiProcessError("Failed to align probability.")
		else:
			return BytesAlignmentTrans(out,name=name)
	
	finally:
		hmmTemp.close()
Ejemplo n.º 24
0
def __read_data_from_file(fileName, useSuffix=None):
    '''
	Read data from file. If the file suffix is unknown, <useSuffix> should be assigned.
	'''
    if useSuffix != None:
        assert isinstance(useSuffix, str), "Expected <useSuffix> is a string."
        useSuffix = useSuffix.strip().lower()[-3:]
    else:
        useSuffix = ""

    assert useSuffix in [
        "", "scp", "ark", "npy"
    ], f'Expected <useSuffix> is "ark", "scp" or "npy" but got "{useSuffix}".'

    if isinstance(fileName, str):
        if os.path.isdir(fileName):
            raise WrongOperation(
                f"Expected file name but got a directory:{fileName}.")
        else:
            allFiles = list_files(fileName)
    else:
        raise UnsupportedType(
            f'Expected <fileName> is file name-like string but got a {type_name(fileName)}.'
        )

    allData_bytes = BytesMatrix()
    allData_numpy = NumpyMatrix()

    def loadNpyFile(fileName):
        try:
            temp = np.load(fileName, allow_pickle=True)
            data = {}
            #totalSize = 0
            for utt_mat in temp:
                data[utt_mat[0]] = utt_mat[1]
                #totalSize += sys.getsizeof(utt_mat[1])
            #if totalSize > 10000000000:
            #    print('Warning: Data is extramely large. It could not be used correctly sometimes.')
        except:
            raise UnsupportedType(
                f'Expected "npy" data with exkaldi format but got {fileName}.')
        else:
            return NumpyMatrix(data)

    def loadArkScpFile(fileName, suffix):
        ExkaldiInfo.vertify_kaldi_existed()

        if suffix == "ark":
            cmd = 'copy-feats ark:'
        else:
            cmd = 'copy-feats scp:'

        cmd += '{} ark:-'.format(fileName)
        out, err, cod = run_shell_command(cmd,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE)
        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Copy feat defeated.')
        else:
            #if sys.getsizeof(out) > 10000000000:
            #    print('Warning: Data is extramely large. It could not be used correctly sometimes.')
            return BytesMatrix(out)

    for fileName in allFiles:
        sfx = fileName[-3:].lower()
        if sfx == "npy":
            allData_numpy += loadNpyFile(fileName)
        elif sfx in ["ark", "scp"]:
            allData_bytes += loadArkScpFile(fileName, sfx)
        elif useSuffix == "npy":
            allData_numpy += loadNpyFile(fileName)
        elif useSuffix in ["ark", "scp"]:
            allData_bytes += loadArkScpFile(fileName, useSuffix)
        else:
            raise UnsupportedType(
                'Unknown file suffix. You can assign the <useSuffix> with "scp", "ark" or "npy".'
            )

    if useSuffix == "":
        if allFiles[0][-3:].lower() == "npy":
            result = allData_numpy + allData_bytes.to_numpy()
        else:
            result = allData_bytes + allData_numpy.to_bytes()
    elif useSuffix == "npy":
        result = allData_numpy + allData_bytes.to_numpy()
    else:
        result = allData_bytes + allData_numpy.to_bytes()

    result.check_format()
    return result
Ejemplo n.º 25
0
def load_ali(target, aliType=None, name="ali", hmm=None):
    '''
	Load alignment data.

	Args:
		<target>: Python dict object, bytes object, exkaldi alignment object, kaldi alignment file or .npy file.
		<aliType>: None, or one of 'transitionID', 'phoneID', 'pdfID'. It will return different alignment object.
		<name>: a string.
		<hmm>: file path or exkaldi HMM object.
	Return:
		exkaldi alignment data objects.
	'''
    assert isinstance(
        name, str) and len(name) > 0, "Name shoud be a string avaliable."

    ExkaldiInfo.vertify_kaldi_existed()

    def transform(data, cmd):
        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=data)
        if (isinstance(cod, int) and cod != 0) and out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to transform alignment.')
        else:
            result = {}
            sp = BytesIO(out)
            for line in sp.readlines():
                line = line.decode()
                line = line.strip().split()
                utt = line[0]
                matrix = np.array(line[1:], dtype=np.int32)
                result[utt] = matrix
            return results

    if isinstance(target, dict):
        if aliType is None:
            result = NumpyAlignment(target, name)
        elif aliType == "transitionID":
            result = NumpyAlignmentTrans(target, name)
        elif aliType == "phoneID":
            result = NumpyAlignmentPhone(target, name)
        elif aliType == "pdfID":
            result = NumpyAlignmentPdf(target, name)
        else:
            raise WrongOperation(
                f"<aliType> should be None, 'transitionID', 'phoneID' or 'pdfID' but got {aliType}."
            )
        result.check_format()
        return result

    elif type_name(target) in [
            "NumpyAlignment", "NumpyAlignmentTrans", "NumpyAlignmentPhone",
            "NumpyAlignmentPdf", "BytesAlignmentTrans"
    ]:
        result = copy.deepcopy(target)
        result.rename(name)
        return result

    elif isinstance(target, str):

        allFiles = list_files(target)

        results = {
            "NumpyAlignment": NumpyAlignment(),
            "NumpyAlignmentTrans": NumpyAlignmentTrans(),
            "NumpyAlignmentPhone": NumpyAlignmentPhone(),
            "NumpyAlignmentPdf": NumpyAlignmentPdf(),
            "BytesAlignmentTrans": BytesAlignmentTrans(),
        }

        for fileName in allFiles:
            fileName = os.path.abspath(fileName)

            if fileName.endswith(".npy"):
                temp = __read_data_from_file(fileName, "npy")
                if aliType is None:
                    temp = NumpyAlignment(temp.data)
                    results["NumpyAlignment"] += temp
                elif aliType == "transitionID":
                    temp = NumpyAlignmentTrans(temp.data)
                    results["NumpyAlignmentTrans"] += temp
                elif aliType == "phoneID":
                    temp = NumpyAlignmentPhone(temp.data)
                    results["NumpyAlignmentPhone"] += temp
                elif aliType == "pdfID":
                    temp = NumpyAlignmentPdf(temp.data)
                    results["NumpyAlignmentPdf"] += temp
                else:
                    raise WrongOperation(
                        f"<aliType> should be None, 'transitionID','phoneID' or 'pdfID' but got {aliType}."
                    )

            else:
                if fileName.endswith('.gz'):
                    cmd = f'gunzip -c {fileName}'
                else:
                    cmd = f'cat {fileName}'

                if aliType is None or aliType == "transitionID":
                    out, err, cod = run_shell_command(cmd,
                                                      stdout=subprocess.PIPE,
                                                      stderr=subprocess.PIPE)
                    if (isinstance(cod, int) and cod != 0) or out == b'':
                        print(err.decode())
                        raise ShellProcessError(
                            "Failed to get the alignment data from file.")
                    else:
                        temp = BytesAlignmentTrans(out)
                        results["BytesAlignmentTrans"] += temp

                else:
                    temp = tempfile.NamedTemporaryFile("wb+")
                    try:
                        if type_name(hmm) in ("HMM", "MonophoneHMM",
                                              "TriphoneHMM"):
                            hmm.save(temp)
                            hmmFileName = temp.name
                        elif isinstance(hmm, str):
                            if not os.path.isfile(hmm):
                                raise WrongPath(f"No such file:{hmm}.")
                            hmmFileName = hmm
                        else:
                            raise UnsupportedType(
                                f"<hmm> should be a filePath or exkaldi HMM and its sub-class object. but got {type_name(hmm)}."
                            )

                        if aliType == "phoneID":
                            cmd += f" | ali-to-phones --per-frame=true {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPhone(temp)
                            results["NumpyAlignmentPhone"] += temp

                        elif target == "pdfID":
                            cmd = f" | ali-to-pdf {hmmFileName} ark:- ark,t:-"
                            temp = transform(None, cmd)
                            temp = NumpyAlignmentPdf(temp)
                            results["NumpyAlignmentPdf"] += temp
                        else:
                            raise WrongOperation(
                                f"<target> should be 'trainsitionID', 'phoneID' or 'pdfID' but got {target}."
                            )

                    finally:
                        temp.close()

        finalResult = []
        for obj in results.values():
            if not obj.is_void:
                obj.rename(name)
                finalResult.append(obj)

        if len(finalResult) == 0:
            raise WrongOperation(
                "<target> dose not include any data avaliable.")
        elif len(finalResult) == 1:
            finalResult = finalResult[0]

        return finalResult
Ejemplo n.º 26
0
def record_voice(outFile,
                 seconds=None,
                 dtype="int8",
                 channels=1,
                 rate=16000,
                 chunkFrames=1000):
    '''
    Record voice from microphone and save it to file.

    Args:
        <outFile>: wav file name.
        <seconds>: If None, use ctrl+C to stop recording.
        <dtype>: 'int8','int16' or 'int32'.
        <channels>: channels, 1 or 2.
        <rate>: sample rate.
        <chunkFrames>: the frames every time to read from microphone stream.
    
    Return:
        the absolute path of out file.
    '''

    if seconds != None:
        assert isinstance(
            seconds, (int, float)
        ) and seconds > 0, f'Expected <seconds> is positive int or float value but got {type_name(seconds)}.'
    assert isinstance(
        rate, int) and rate > 0, f"<rate> shoule be positive int value."
    assert isinstance(
        chunkFrames, int
    ) and chunkFrames > 0, f"<chunkFrames> should be positive int value."
    assert channels in [1, 2
                        ], f"Expected <Channels> is 1 or 2 but got {channels}."

    if dtype == "int8":
        width = 1
        ft = pyaudio.paInt8
    elif dtype == "int16":
        width = 2
        ft = pyaudio.paInt16
    elif dtype == "int32":
        width = 4
        ft = pyaudio.paInt32
    else:
        raise UnsupportedType(
            f"<dtype> should be int8, int16 or int32 but got {dtype}.")

    counter = 0
    secPerRecord = chunkFrames / rate

    p = pyaudio.PyAudio()
    stream = p.open(format=ft,
                    channels=channels,
                    rate=rate,
                    input=True,
                    output=False)

    try:
        wavData = []
        if seconds is not None:
            while counter <= (seconds - secPerRecord):
                data = stream.read(chunkFrames)
                wavData.append(data)
                counter += secPerRecord
            lastRecordFrames = int((seconds - counter) * rate)
            data = stream.read(lastRecordFrames)
            wavData.append(data)
        else:
            try:
                while True:
                    data = stream.read(chunkFrames)
                    wavData.append(data)
                    counter += secPerRecord
            except KeyboardInterrupt:
                pass

        if outFile is None:
            return np.fromstring(b"".join(wavData), dtype=np.short)
        else:
            assert isinstance(
                outFile,
                str) and len(outFile) > 0, f"<outFile> should be a file name."
            if outFile.rstrip()[-4:].lower() != ".wav":
                outFile += ".wav"
            with wave.open(outFile, 'wb') as wf:
                wf.setnchannels(channels)
                wf.setsampwidth(width)
                wf.setframerate(rate)
                wf.writeframes(b"".join(wavData))
    finally:
        stream.stop_stream()
        stream.close()
        p.terminate()

    return os.path.abspath(outFile)
Ejemplo n.º 27
0
def decompress_feat(feat):
    '''
	Decompress a kaldi conpressed feature whose data-type is "CM"
	
	Args:
		<feat>: an exkaldi feature object.
	Return:
		An new exkaldi feature object.

	This function is a cover of kaldi-io-for-python tools. 
	For more information about it, please access to https://github.com/vesis84/kaldi-io-for-python/blob/master/kaldi_io/kaldi_io.py 
	'''
    assert isinstance(
        feat,
        BytesFeature), "Expected <feat> is a exkaldi bytes feature object."

    def _read_compressed_mat(fd):

        # Format of header 'struct',
        global_header = np.dtype([('minvalue', 'float32'),
                                  ('range', 'float32'), ('num_rows', 'int32'),
                                  ('num_cols', 'int32')
                                  ])  # member '.format' is not written,
        per_col_header = np.dtype([('percentile_0', 'uint16'),
                                   ('percentile_25', 'uint16'),
                                   ('percentile_75', 'uint16'),
                                   ('percentile_100', 'uint16')])

        # Read global header,
        globmin, globrange, rows, cols = np.frombuffer(fd.read(16),
                                                       dtype=global_header,
                                                       count=1)[0]
        cols = int(cols)
        rows = int(rows)

        # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ]
        #                         {           cols           }{     size         }
        col_headers = np.frombuffer(fd.read(cols * 8),
                                    dtype=per_col_header,
                                    count=cols)
        col_headers = np.array([
            np.array([x
                      for x in y]) * globrange * 1.52590218966964e-05 + globmin
            for y in col_headers
        ],
                               dtype=np.float32)
        data = np.reshape(np.frombuffer(fd.read(cols * rows),
                                        dtype='uint8',
                                        count=cols * rows),
                          newshape=(cols, rows))  # stored as col-major,

        mat = np.zeros((cols, rows), dtype='float32')
        p0 = col_headers[:, 0].reshape(-1, 1)
        p25 = col_headers[:, 1].reshape(-1, 1)
        p75 = col_headers[:, 2].reshape(-1, 1)
        p100 = col_headers[:, 3].reshape(-1, 1)
        mask_0_64 = (data <= 64)
        mask_193_255 = (data > 192)
        mask_65_192 = (~(mask_0_64 | mask_193_255))

        mat += (p0 + (p25 - p0) / 64. * data) * mask_0_64.astype(np.float32)
        mat += (p25 + (p75 - p25) / 128. *
                (data - 64)) * mask_65_192.astype(np.float32)
        mat += (p75 + (p100 - p75) / 63. *
                (data - 192)) * mask_193_255.astype(np.float32)

        return mat.T, rows, cols

    with BytesIO(feat.data) as sp:
        newData = []

        while True:
            data = b''
            utt = ''
            while True:
                char = sp.read(1)
                data += char
                char = char.decode()
                if (char == '') or (char == ' '): break
                utt += char
            utt = utt.strip()
            if utt == '': break
            binarySymbol = sp.read(2)
            data += binarySymbol
            binarySymbol = binarySymbol.decode()
            if binarySymbol == '\0B':
                dataType = sp.read(3).decode()
                if dataType == 'CM ':
                    data += 'FM '.encode()
                    matrix, rows, cols = _read_compressed_mat(sp)
                    data += '\04'.encode()
                    data += struct.pack(np.dtype('uint32').char, rows)
                    data += '\04'.encode()
                    data += struct.pack(np.dtype('uint32').char, cols)
                    data += matrix.tobytes()
                    newData.append(data)
                else:
                    raise UnsupportedType(
                        "This is not a compressed binary data.")
            else:
                raise WrongDataFormat('Miss right binary symbol.')

    return BytesFeature(b''.join(newData), name=feat.name)
Ejemplo n.º 28
0
def use_cmvn(feat, cmvn, utt2spk=None, std=False):
    '''
	Apply CMVN statistics to feature.

	Args:
		<feat>: exkaldi feature object.
		<cmvn>: exkaldi CMVN statistics object.
		<utt2spk>: utt2spk file path or ScriptTable object.
		<std>: If true, apply std normalization.

	Return:
		A new feature object.
	'''
    ExkaldiInfo.vertify_kaldi_existed()

    if type_name(feat) == "BytesFeature":
        feat = feat.sort(by="utt")
    elif type_name(feat) == "NumpyFeature":
        feat = feat.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi feature but got {type_name(feat)}.")

    if type_name(cmvn) == "BytesCMVNStatistics":
        cmvn = cmvn.sort(by="utt")
    elif type_name(cmvn) == "NumpyCMVNStatistics":
        cmvn = cmvn.sort(by="utt").to_bytes()
    else:
        raise UnsupportedType(
            f"Expected exkaldi CMVN statistics but got {type_name(cmvn)}.")

    cmvnTemp = tempfile.NamedTemporaryFile('wb+', suffix='_cmvn.ark')
    utt2spkTemp = tempfile.NamedTemporaryFile('w+',
                                              suffix="_utt2spk",
                                              encoding="utf-8")
    try:
        cmvnTemp.write(cmvn.data)
        cmvnTemp.seek(0)

        if std is True:
            stdOption = " --norm-vars true"
        else:
            stdOption = ""

        if utt2spk is None:
            cmd = f'apply-cmvn{stdOption} ark:{cmvnTemp.name} ark:- ark:-'
        else:
            if isinstance(utt2spk, str):
                if not os.path.isfile(utt2spk):
                    raise WrongPath(f"No such file:{utt2spk}.")
                utt2spkSorted = ScriptTable(
                    name="utt2spk").load(utt2spk).sort()
                utt2spkSorted.save(utt2spkTemp)
            elif isinstance(utt2spk, ScriptTable):
                utt2spkSorted = utt2spk.sort()
                utt2spkSorted.save(utt2spkTemp)
            else:
                raise UnsupportedType(
                    f"<utt2spk> should be a file path or ScriptTable object but got {type_name(utt2spk)}."
                )
            utt2spkTemp.seek(0)

            cmd = f'apply-cmvn{stdOption} --utt2spk=ark:{utt2spkTemp.name} ark:{cmvnTemp.name} ark:- ark:-'

        out, err, cod = run_shell_command(cmd,
                                          stdin=subprocess.PIPE,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          inputs=feat.data)

        if (isinstance(cod, int) and cod != 0) or out == b'':
            print(err.decode())
            raise KaldiProcessError('Failed to apply CMVN statistics.')
        else:
            newName = f"cmvn({feat.name},{cmvn.name})"
            if type_name(feat) == "NumpyFeature":
                return BytesFeature(out, newName, indexTable=None).to_numpy()
            else:
                return BytesFeature(out, newName, indexTable=None)
    finally:
        cmvnTemp.close()
        utt2spkTemp.close()
Ejemplo n.º 29
0
def __compute_feature(wavFile, kaldiTool, useSuffix=None, name="feat"):

    if useSuffix != None:
        assert isinstance(useSuffix, str), "Expected <useSuffix> is a string."
        useSuffix = useSuffix.strip().lower()[-3:]
    else:
        useSuffix = ""
    assert useSuffix in ["", "scp",
                         "wav"], 'Expected <useSuffix> is "scp" or "wav".'

    ExkaldiInfo.vertify_kaldi_existed()

    wavFileTemp = tempfile.NamedTemporaryFile("w+",
                                              suffix=".scp",
                                              encoding="utf-8")
    try:
        if isinstance(wavFile, str):
            if os.path.isdir(wavFile):
                raise WrongOperation(
                    f'Expected <wavFile> is file path but got a directory:{wavFile}.'
                )
            else:
                out, err, cod = run_shell_command(f'ls {wavFile}',
                                                  stdout=subprocess.PIPE,
                                                  stderr=subprocess.PIPE)
                if out == b'':
                    raise WrongPath(f"No such file:{wavFile}.")
                else:
                    allFiles = out.decode().strip().split('\n')
        elif isinstance(wavFile, ScriptTable):
            wavFile = wavFile.sort()
            wavFile.save(wavFileTemp)
            allFiles = [
                wavFileTemp.name,
            ]
        else:
            raise UnsupportedType(
                f'Expected filename-like string but got a {type_name(wavFile)}.'
            )

        results = []
        for wavFile in allFiles:
            wavFile = os.path.abspath(wavFile)
            if wavFile[-3:].lower() == "wav":
                dirName = os.path.dirname(wavFile)
                fileName = os.path.basename(wavFile)
                uttID = "".join(fileName[0:-4].split("."))
                cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-"
            elif wavFile[-3:].lower() == 'scp':
                cmd = f"{kaldiTool} scp,p:{wavFile} ark:-"
            elif "wav" in useSuffix:
                dirName = os.path.dirname(wavFile)
                fileName = os.path.basename(wavFile)
                uttID = "".join(fileName[0:-4].split("."))
                cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-"
            elif "scp" in useSuffix:
                cmd = f"{kaldiTool} scp,p:{wavFile} ark:-"
            else:
                raise UnsupportedType(
                    'Unknown file suffix. You can declare it by making <useSuffix> "wav" or "scp".'
                )

            out, err, cod = run_shell_command(cmd,
                                              stdout=subprocess.PIPE,
                                              stderr=subprocess.PIPE)
            if (isinstance(out, int) and cod != 0) or out == b'':
                print(err.decode())
                raise KaldiProcessError(f'Failed to compute feature:{name}.')
            else:
                results.append(BytesFeature(out))
    finally:
        wavFileTemp.close()

    if len(results) == 0:
        raise WrongOperation("No any feature date in file path.")
    else:
        result = results[0]
        for i in results[1:]:
            result += i
        result.rename(name)
        return result
Ejemplo n.º 30
0
	def get_1best(self, wordSymbolTable=None, hmm=None, lmwt=1, acwt=1.0, phoneLevel=False):
		'''
		Get 1 best result with text formation.

		Args:
			<wordSymbolTable>: None or file path or ListTable object or LexiconBank object.
			<hmm>: None or file path or HMM object.
			<lmwt>: language model weight.
			<acwt>: acoustic model weight.
			<phoneLevel>: If Ture, return phone results.
		Return:
			An exkaldi Transcription object.
		'''
		ExkaldiInfo.vertify_kaldi_existed()

		if self.is_void:
			raise WrongOperation('No any data in lattice.')

		assert isinstance(lmwt, int) and lmwt >=0, "Expected <lmwt> is a non-negative int number."

		if wordSymbolTable is None:
			assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable."
			wordSymbolTable = self.wordSymbolTable
		
		if hmm is None:
			assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable."
			hmm = self.hmm

		modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl")
		wordSymbolTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8")

		try:
			if isinstance(wordSymbolTable, str):
				assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}."
				wordsFile = wordSymbolTable
			elif type_name(wordSymbolTable) == "LexiconBank":
				if phoneLevel:
					wordSymbolTable.dump_dict("phones", wordSymbolTemp)
				else:
					wordSymbolTable.dump_dict("words", wordSymbolTemp)
				wordsFile = wordSymbolTemp.name
			elif type_name(wordSymbolTable) == "ListTable":
				wordSymbolTable.save(wordSymbolTemp)
				wordSymbolTemp.seek(0)
				wordsFile = wordSymbolTemp.name
			else:
				raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.")

			if isinstance(hmm, str):
				assert os.path.isfile(hmm), f"No such file: {hmm}."
				hmmFile = hmm
			elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]:
				hmm.save(modelTemp)
				hmmFile = modelTemp.name
			else:
				raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.")

			if phoneLevel:
				cmd0 = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | '
			else:
				cmd0 = ""

			cmd1 = f"lattice-best-path --lm-scale={lmwt} --acoustic-scale={acwt} --word-symbol-table={wordsFile} --verbose=2 ark:- ark,t:- "
			cmd = cmd0 + cmd1

			out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data)
			if cod != 0 or out == b'':
				print(err.decode())
				raise KaldiProcessError('Failed to get 1-best from lattice.')
			else:
				out = out.decode().strip().split("\n")
				if phoneLevel:
					newName = "1-best-phones"
				else:
					newName = "1-best-words"

				results = Transcription(name=newName)
				for re in out:
					re = re.strip().split(maxsplit=1)
					if len(re) == 0:
						continue
					elif len(re) == 1:
						results[re[0]] = " "
					else:
						results[re[0]] = re[1]
				return results

		finally:
			modelTemp.close()
			wordSymbolTemp.close()