def compute_cmvn_stats(feat, spk2utt=None, name="cmvn"): ''' Compute CMVN statistics. Args: <feat>: exkaldi feature object. <spk2utt>: spk2utt file or exkaldi ScriptTable object. <name>: a string. Return: A exkaldi CMVN statistics object. ''' ExkaldiInfo.vertify_kaldi_existed() if type_name(feat) == "BytesFeature": feat = feat.sort("utt") elif type_name(feat) == "NumpyFeature": feat = feat.sort("utt").to_bytes() else: raise UnsupportedType( f"Expected <feat> is a exkaldi feature object but got {type_name(feat)}." ) spk2uttTemp = tempfile.NamedTemporaryFile("w+", encoding="utf-8") try: if spk2utt is None: cmd = 'compute-cmvn-stats ark:- ark:-' else: if isinstance(spk2utt, str): if not os.path.isfile(spk2utt): raise WrongPath(f"No such file:{spk2utt}.") spk2uttSorted = ScriptTable( name="spk2utt").load(spk2utt).sort() spk2uttSorted.save(spk2uttTemp) elif isinstance(spk2utt, ScriptTable): spk2uttSorted = spk2utt.sort() spk2uttSorted.save(spk2uttTemp) else: raise UnsupportedType( f"<spk2utt> should be a file path or ScriptTable object but got {type_name(spk2utt)}." ) spk2uttTemp.seek(0) cmd = f'compute-cmvn-stats --spk2utt=ark:{spk2uttTemp.name} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Failed to compute CMVN statistics.') else: return BytesCMVNStatistics(out, name, indexTable=None) finally: spk2uttTemp.close()
def use_fmllr(feat, transMatrix, utt2spkFile): ''' Transform feat by a transform matrix. Typically, LDA, MLLt matrixes. Args: <feat>: exkaldi feature object. <transFile>: exkaldi fMLLR transform matrix object. <utt2spkFile>: utt2spk file name. Return: a new exkaldi feature object. ''' if type_name(feat) == "BytesFeature": bytesFlag = True feat = feat.sort(by="utt") elif type_name(feat) == "NumpyFeature": bytesFlag = False feat = feat.sort(by="utt").to_bytes() else: raise UnsupportedType( f"<feat> should exkaldi feature object but got: {type_name(feat)}." ) if type_name(transMatrix) == "BytesFmllrMatrix": transMatrix = transMatrix.sort(by="utt") elif type_name(transMatrix) == "NumpyFmllrMatrix": transMatrix = transMatrix.sort(by="utt").to_bytes() else: raise UnsupportedType( f"<transMatrix> should exkaldi fMLLR transform matrix object but got: {type_name(transMatrix)}." ) transTemp = tempfile.NamedTemporaryFile("wb+", suffix="_trans.ark") try: transTemp.write(transMatrix.data) transTemp.seek(0) cmd = f'transform-feats --utt2spk=ark:{utt2spkFile} ark:{transTemp.name} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if cod != 0: print(err.decode()) raise KaldiProcessError( "Failed to transform feature to fMLLR feature.") else: newName = f"fmllr({feat.name})" newFeat = BytesFeature(out, name=newName) if bytesFlag: return newFeat else: return newFeat.to_numpy() finally: transTemp.close()
def am_rescore(self, hmm, feat): """ Replace the acoustic scores with new HMM-GMM model. """ ''' Determinize the lattice. Args: <hmm>: exkaldi HMM object or file path. Return: An new Lattice object. ''' ExkaldiInfo.vertify_kaldi_existed() if self.is_void: raise WrongOperation('No any lattice data.') hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") featTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") try: if isinstance(hmm, str): assert os.path.isfile(hmm), f"No such file: {hmm}." hmmFile = hmm elif type_name(hmm) in ["BaseHMM", "MonophoneHMM", "TriphoneHMM"]: hmmTemp.write(hmm.data) hmmTemp.seek(0) hmmFile = hmmTemp.name else: raise UnsupportedType(f"<hmm> should be file path or exkaldi HMM object but got: {type_name(hmm)}.") if type_name(feat) == "BytesFeature": feat = feat.sort(by="utt") elif type_name(feat) == "NumpyFeature": feat = feat.sort(by="utt").to_numpy() else: raise UnsupportedType(f"<feat> should be exkaldi feature object but got: {type_name(feat)}.") featTemp.write(feat.data) featTemp.seek(0) featFile = featTemp.name cmd = f"gmm-rescore-lattice {hmmFile} ark:- ark:{featFile} ark:-" out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError("Failed to determinize lattice.") else: newName = f"am_rescore({self.name})" return Lattice(data=out, wordSymbolTable=self.wordSymbolTable, hmm=self.hmm, name=newName) finally: hmmTemp.close() featTemp.close()
def run_shell_command(cmd, stdin=None, stdout=None, stderr=None, inputs=None, env=None): ''' Run a shell command. Args: <cmd>: a string or list. <inputs>: a string or bytes. <env>: If None, use exkaldi.version.ENV defaultly. Return: out, err, returnCode ''' if isinstance(cmd, str): shell = True elif isinstance(cmd, list): shell = False else: raise UnsupportedType( "<cmd> should be a string, or a list of commands and its' options." ) if env is None: env = ExkaldiInfo.ENV if inputs is not None: if isinstance(inputs, str): inputs = inputs.encode() elif isinstance(inputs, bytes): pass else: raise UnsupportedType( f"Expected <inputs> is string or bytes object but got {type_name(inputs)}." ) p = subprocess.Popen(cmd, shell=shell, stdin=stdin, stdout=stdout, stderr=stderr, env=env) (out, err) = p.communicate(input=inputs) p.wait() return out, err, p.returncode
def load_ngrams(target, name="gram"): ''' Load a N-Grams from arpa or binary language model file. Args: <target>: file path with suffix .arpa or .binary. Return: a KenNGrams object. ''' declare.is_file("target", target) target = target.strip() with FileHandleManager() as fhm: if target.endswith(".arpa"): modelTemp = fhm.create("wb+", suffix=".binary") arpa_to_binary(target, modelTemp.name) modelTemp.seek(0) model = KenNGrams(modelTemp.name, name=name) model._path = target elif target.endswith(".binary"): model = KenNGrams(target, name=name) else: raise UnsupportedType( f"Unknown suffix. Language model file should has a suffix .arpa or .binary but got: {target}." ) return model
def load_trans(target, name="transcription"): ''' Load transcription from file. Args: <target>: transcription file path. <name>: a string. Return: An exkaldi Transcription object. ''' if type_name(target) in ["dict", "Transcription", "ScriptTable"]: for utt, utterance in target.items(): assert isinstance( utt, str) and len(utt) > 0, "Utterance ID should be a string." assert isinstance(utterance, str), "Utterance text should a string." return Transcription(target, name) elif isinstance(target, str): assert os.path.isfile(target), f"No such file:{target}." result = Transcription(name=name) result.load(target) return result else: raise UnsupportedType( "<target> should be file path, dict object or ScriptTable object.")
def __read_one_record_from_ark(fp): ''' Read a utterance from opened file pointer of an archive file. It is used to generate bytes index table. ''' # read utterance ID utt = '' while True: char = fp.read(1).decode() if (char == '') or (char == ' '): break utt += char utt = utt.strip() if utt == '': if fp.read() == b'': return (None, None, None) else: fp.close() raise WrongDataFormat( "Miss utterance ID before utterance. This may not be complete Kaldi archeve table file." ) # read data binarySymbol = fp.read(2).decode() if binarySymbol == '\0B': sizeSymbol = fp.read(1).decode() if sizeSymbol == '\4': frames = int(np.frombuffer(fp.read(4), dtype='int32', count=1)[0]) buf = fp.read(frames * 5) # move the handle del buf dataSize = len(utt) + 8 + frames * 5 return (utt, frames, dataSize) else: dataType = sizeSymbol + fp.read(2).decode() if dataType == 'CM ': fp.close() raise UnsupportedType( "Unsupported to generate index table from compressed archive table. Please decompress it firstly." ) elif dataType == 'FM ': sampleSize = 4 elif dataType == 'DM ': sampleSize = 8 else: fp.close() raise WrongDataFormat( f"This may not be Kaldi archeve table file.") s1, rows, s2, cols = np.frombuffer(fp.read(10), dtype="int8,int32,int8,int32", count=1)[0] rows = int(rows) cols = int(cols) buf = fp.read(rows * cols * sampleSize) # move the handle del buf dataSize = len(utt) + 16 + rows * cols * sampleSize return (utt, rows, dataSize) else: fp.close() raise WrongDataFormat( "Miss binary symbol before utterance. This may not be Kaldi binary archeve table file." )
def perplexity(self, sentence): ''' Compute perplexity of a sentence. Args: <sentence>: a sentence which has words-in blank and has not boundary or exkaldi Transcription object. Return: If <sentence> is string, return a perplexity value. Else return an exkaldi Metric object. ''' def perplexity_one(one): if one.count(" ") < 1: print( f"Warning: sentence doesn't seem to be separated by spaces or extremely short: {one}." ) return self.__model.perplexity(one) if isinstance(sentence, str): return perplexity_one(sentence) elif type_name(sentence) == "Transcription": scores = {} for uttID, txt in sentence.items(): assert isinstance( txt, str ), f"Transcription should be string od words but got:{type_name(txt)} at utt-ID {uttID}." scores[uttID] = perplexity_one(txt) return Metric(scores, name=f"LMperplexity({sentence.name})") else: raise UnsupportedType( f"<sentence> should be string or exkaldi Transcription object ut got: {type_name(sentence)}." )
def score(self, sentence, bos=True, eos=True): ''' Score a sentence. Args: <sentence>: a string with out boundary symbols or exkaldi Transcription object. <bos>: If True, add <s> to the head. <eos>: If True, add </s> to the tail. Return: If <sentence> is string, return a float log-value. Else, return an exkaldi Metric object. ''' def score_one(one, bos, eos): if one.count(" ") < 1: print( f"Warning: sentence doesn't seem to be separated by spaces or extremely short: {one}." ) return self.__model.score(one, bos, eos) if isinstance(sentence, str): return score_one(sentence, bos, eos) elif type_name(sentence) == "Transcription": scores = {} for uttID, txt in sentence.items(): assert isinstance( txt, str ), f"Transcription should be string od words but got:{type_name(txt)} at utt-ID {uttID}." scores[uttID] = score_one(txt, bos, eos) return Metric(scores, name=f"LMscore({sentence.name})") else: raise UnsupportedType( f"<sentence> should be string or exkaldi Transcription object ut got: {type_name(sentence)}." )
def compute_postprob_norm(ali, posrProbDim): ''' Compute alignment counts in order to normalize acoustic model posterior probability. For more help information, look at the Kaldi <analyze-counts> command. Args: <ali>: exkaldi NumpyAlignmentPhone or NumpyAlignmentPdf object. <posrProbDim>: the dimensionality of posterior probability. Return: A numpy array of the normalization. ''' ExkaldiInfo.vertify_kaldi_existed() if type_name(ali) in ["NumpyAlignmentPhone", "NumpyAlignmentPdf"]: pass else: raise UnsupportedType( f'Expected exkaldi AlignmentPhone or but got a {type_name(ali)}.') cmd = f"analyze-counts --print-args=False --verbose=0 --binary=false --counts-dim={posrProbDim} ark:- -" out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=ali.data) if (isinstance(cod, int) and cod != 0) or out == b"": print(err.decode()) raise KaldiProcessError('Analyze counts defailed.') else: out = out.decode().strip().strip("[]").strip().split() counts = np.array(out, dtype=np.int32) countBias = np.log(counts / np.sum(counts)) return countBias
def ctc_greedy_search(prob, vocabs, blankID=None): ''' The best path decoding algorithm. Args: <prob>: An exkaldi probability object. This probalility should be an output of Neural Network with CTC loss fucntion. <vocabs>: a list of vocabulary. <blankID>: specify the ID of blank symbol. If None, use the last dimentionality of <prob>. Return: An exkaldi Transcription object of decoding results. ''' assert isinstance( vocabs, list), f"<vocabs> must be a list of vocabulary but got {vocabs}." if type_name(prob) == "BytesProbability": prob = prob.to_numpy() elif type_name(prob) == "NumpyProbability": pass else: raise UnsupportedType( f"<prob> should be an exkaldi probability object but got {type_name(prob)}." ) probDim = prob.dim if len(vocabs) == probDim: if blankID is None: blankID = probDim - 1 else: assert isinstance( blankID, int ) and 0 <= blankID < probDim, f"BlankID {blankID} is out of range of int sequences from 0 to {probDim-1}." elif len(vocabs) == probDim - 1: if blankID == None: blankID = probDim - 1 else: assert blankID == probDim - 1, f"The dimensibality of probability is {probDim} but only have {len(vocabs)} words. In this case, blank ID must be {probDim-1} but got {blankID}" else: raise WrongDataFormat( f"The dimensibality of probability {probDim} does not match the numbers of words {len(vocabs)}." ) results = Transcription(name="bestPathResult") for utt, pb in prob.items: assert isinstance(pb, np.ndarray) and len( pb.shape) == 2, "Unsupported probability matrix formatation." best_path = np.argmax(pb, 1) best_chars_collapsed = [ vocabs[ID] for ID, _ in groupby(best_path) if ID != blankID ] try: results[utt] = " ".join(best_chars_collapsed) except Exception as e: print("<vocab> might has non-string items.") raise e return results
def loadNpyFile(fileName): try: temp = np.load(fileName, allow_pickle=True) data = {} for utt_mat in temp: data[utt_mat[0]] = utt_mat[1] except: raise UnsupportedType( f'This is not a valid Exkaldi npy file: {fileName}.') else: return data
def __init__(self, filePath, name="ngram"): declare.is_file("filePath", filePath) with open(filePath, "rb") as fr: t = fr.read(50).decode().strip() if t != "mmap lm http://kheafield.com/code format version 5": raise UnsupportedType( "This may be not a KenLM binary model format.") super(KenNGrams, self).__init__(data=b"placeholder", name=name) self.__model = kenlm.Model(filePath) self._path = None
def load_fmllr(target, name="prob", useSuffix=None): ''' Load fmllr transform matrix data. Args: <target>: Python dict object,bytes object,exkaldi feature or index table object,.ark file,.scp file,npy file. <name>: a string. <useSuffix>: "ark" or "scp" or "npy". We will check the file type by its suffix. But if <target> is file path and not default suffix (ark or scp),you have to declare which type it is. Return: A BytesFmllrMatrix or NumpyFmllrMatrix object. ''' declare.is_valid_string("name", name) if isinstance(target, dict): result = NumpyFmllrMatrix(target, name) result.check_format() return result elif isinstance(target, bytes): result = BytesFmllrMatrix(target, name) result.check_format() return result elif isinstance(target, (NumpyFmllrMatrix, BytesFmllrMatrix)): result = copy.deepcopy(target) result.rename(name) return result elif isinstance(target, str): allData_bytes, allData_numpy, dataType = __read_data_from_file( target, useSuffix) if dataType == "npy": result = NumpyFmllrMatrix(allData_numpy) + BytesFmllrMatrix( allData_bytes) else: result = BytesFmllrMatrix(allData_bytes) + NumpyFmllrMatrix( allData_numpy) result.rename(name) return result elif isinstance(target, ArkIndexTable): return target.fetch(arkType="fmllrMat", name=name) else: raise UnsupportedType( f"Expected Python dict,bytes object,exkaldi fmllr matrix object,index table object or file path but got{type_name(target)}." )
def __init__(self, filePath, name="ngram"): assert isinstance( filePath, str), f"<filePath> should be string but got {type_name(filePath)}." if not os.path.isfile(filePath): raise WrongPath(f"No such file:{filePath}.") else: with open(filePath, "rb") as fr: t = fr.read(50).decode().strip() if t != "mmap lm http://kheafield.com/code format version 5": raise UnsupportedType( "This is not a KenLM binary model formation.") super(KenNGrams, self).__init__(data=b"kenlm", name=name) self.__model = kenlm.Model(filePath)
def loadNpyFile(fileName): try: temp = np.load(fileName, allow_pickle=True) data = {} #totalSize = 0 for utt_mat in temp: data[utt_mat[0]] = utt_mat[1] #totalSize += sys.getsizeof(utt_mat[1]) #if totalSize > 10000000000: # print('Warning: Data is extramely large. It could not be used correctly sometimes.') except: raise UnsupportedType( f'Expected "npy" data with exkaldi format but got {fileName}.') else: return NumpyMatrix(data)
def use_cmvn_sliding(feat, windowsSize=None, std=False): ''' Allpy sliding CMVN statistics. Args: <feat>: exkaldi feature object. <windowsSize>: windows size, If None, use windows size larger than the frames of feature. <std>: a bool value. Return: An exkaldi feature object. ''' ExkaldiInfo.vertify_kaldi_existed() if isinstance(feat, BytesFeature): pass elif type_name(feat) == "NumpyFeature": feat = feat.to_bytes() else: raise UnsupportedType( f"Expected <feat> is an exkaldi feature object but got {type_name(feat)}." ) if windowsSize == None: featLen = feat.lens[1] maxLen = max([length for utt, length in featLen]) windowsSize = math.ceil(maxLen / 100) * 100 else: assert isinstance(windowsSize, int), "Expected <windowsSize> is an int value." if std == True: std = 'true' else: std = 'false' cmd = f'apply-cmvn-sliding --cmn-window={windowsSize} --min-cmn-window=100 --norm-vars={std} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Failed to use sliding CMVN.') else: newName = f"cmvn({feat.name},{windowsSize})" return BytesFeature(out, newName, indexTable=None)
def load_lat(target, name="lat"): ''' Load lattice data. Args: <target>: bytes object, file path or exkaldi lattice object. <hmm>: file path or exkaldi HMM object. <wordSymbol>: file path or exkaldi LexiconBank object. <name>: a string. Return: A exkaldi lattice object. ''' if isinstance(target, bytes): return Lattice(target, name) elif isinstance(target, str): target = list_files(target) allData = [] for fileName in target: if fileName.endswith('.gz'): cmd = 'gunzip -c {}'.format(fileName) out, err, _ = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if out == b'': print(err.decode()) raise WrongDataFormat('Failed to load Lattice.') else: allData.append(out) else: try: with open(fileName, 'rb') as fr: out = fr.read() except Exception as e: print("Load lattice file defeated. Please make sure it is a lattice file avaliable.") raise e else: allData.append(out) try: allData = b"".join(allData) except Exception as e: raise WrongOperation("Only support binary format lattice file.") else: return Lattice(data=allData, name=name) else: raise UnsupportedType(f"Expected bytes object or lattice file but got: {type_name(target)}.")
def transform_feat(feat, matrixFile): ''' Transform feat by a transform matrix. Typically, LDA, MLLt matrixes. Args: <feat>: exkaldi feature object. <matrixFile>: file name. Return: a new exkaldi feature object. ''' assert isinstance( matrixFile, str ), f"<transformMatrix> should be a file path but got: {type_name(matrixFile)}." if not os.path.isfile(matrixFile): raise WrongPath(f"No such file: {matrixFile}.") if type_name(feat) == "BytesFeature": bytesFlag = True elif type_name(feat) == "NumpyFeature": bytesFlag = False feat = feat.to_bytes() else: raise UnsupportedType( f"<feat> should exkaldi feature object but got: {type_name(feat)}." ) cmd = f'transform-feats {matrixFile} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if cod != 0: print(err.decode()) raise KaldiProcessError("Failed to transform feature.") else: newName = f"tansform({feat.name})" newFeat = BytesFeature(out, name=newName) if bytesFlag: return newFeat else: return newFeat.to_numpy()
def flatten(item): ''' Flatten an iterable object. Args: <item>: iterable objects,string,list,tuple or NumPy array. Return: a list of flattened items. ''' if not isinstance(item, Iterable): return [ item, ] new = [] for i in item: dtype = type_name(i) # python int or float value or Numpy float or int value. if dtype.startswith("int") or dtype.startswith("float"): new.append(i) # python str value. elif dtype.startswith("str"): if len(i) <= 1: new.append(i) else: new.extend(flatten(i)) # python list,tuple,set object. elif dtype in ["list", "tuple", "set"]: new.extend(flatten(i)) # Numpy ndarray object. elif dtype == "ndarray": if i.shape == (): new.append(i) else: new.extend(flatten(i)) # Others objects is unsupported. else: raise UnsupportedType( f"Expected list,tuple,set,str or Numpy array object but got {type_name(i)}." ) return new
def load_prob(target, name="prob", useSuffix=None): ''' Load post probability data. Args: <target>: Python dict object, bytes object, exkaldi feature object, .ark file, .scp file, npy file. <name>: a string. <useSuffix>: a string. When target is file path, use this to specify file. Return: A BytesProbability or NumpyProbability object. ''' assert isinstance( name, str) and len(name) > 0, "Name shoud be a string avaliable." if isinstance(target, dict): result = NumpyProbability(target, name) result.check_format() return result elif isinstance(target, bytes): result = BytesProbability(target, name) result.check_format() return result elif isinstance(target, (NumpyProbability, BytesProbability)): result = copy.deepcopy(target) result.rename(name) return result elif isinstance(target, str): result = __read_data_from_file(target, useSuffix) if isinstance(result, BytesMatrix): return BytesProbability(result.data, name) else: return NumpyProbability(result.data, name) else: raise UnsupportedType( f"Expected Python dict, bytes object, exkaldi feature object or file path but got{type_name(target)}." )
def send_report(self, info): ''' Send information and these will be retained untill you do the statistics by using .collect_report(). Args: <info>: a Python dict object includiing names and their values with int or float type. such as {"epoch":epoch,"train_loss":loss,"train_acc":acc} The value can be Python int, float object, Numpy int, float object or NUmpy ndarray with only one value. ''' assert isinstance(info, dict), "Expected <info> is a Python dict object." for name, value in info.items(): assert isinstance(name, str) and len( name ) > 0, f"The name of info should be string avaliable but got {type_name(name)}." valueDtype = type_name(value) if valueDtype.startswith( "int"): # Python int object, Numpy int object pass elif valueDtype.startswith( "float"): # Python float object, Numpy float object self.currentFieldIsFloat[name] = True elif valueDtype == "ndarray" and value.shape == ( ): # Numpy ndarray with only one value if value.dtype == "float": self.currentFieldIsFloat[name] = True else: raise UnsupportedType( f"Expected int or float value but got {type_name(value)}.") name = name.lower() if not name in self.currentField.keys(): self.currentField[name] = [] self.currentField[name].append(value)
def nn_align(hmm, prob, trainGraphFile, transitionScale=1.0, acousticScale=0.1, selfloopScale=0.1, beam=10, retry_beam=40, name="ali"): ''' Align the neural network acoustic output probability. ''' if type_name(prob) == "BytesProbability": pass elif type_name(prob) == "NumpyProbability": prob = prob.to_bytes() else: raise UnsupportedType(f"Expected <prob> is an exkaldi probability object but got: {type_name(prob)}.") hmmTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") try: if isinstance(hmm,str): assert os.path.isfile(hmm), f"No such file: {hmm}." hmmFile = hmm else: assert type_name(hmm) in ["BaseHMM","MonophoneHMM","TriphoneHMM"], f"<hmm> should be exkaldi HMM object but got: {hmm}." hmmTemp.write(hmm.data) hmmTemp.seek(0) hmmFile = hmmTemp.name cmd = f"align-compiled-mapped --transition-scale={transitionScale} --acoustic-scale={acousticScale} --self-loop-scale={selfloopScale} " cmd += f"--beam={beam} --retry-beam={retry_beam} {hmmFile} ark:{trainGraphFile} ark:- ark:-" out,err,cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=prob.data) if cod != 0: print(err.decode()) raise KaldiProcessError("Failed to align probability.") else: return BytesAlignmentTrans(out,name=name) finally: hmmTemp.close()
def __read_data_from_file(fileName, useSuffix=None): ''' Read data from file. If the file suffix is unknown, <useSuffix> should be assigned. ''' if useSuffix != None: assert isinstance(useSuffix, str), "Expected <useSuffix> is a string." useSuffix = useSuffix.strip().lower()[-3:] else: useSuffix = "" assert useSuffix in [ "", "scp", "ark", "npy" ], f'Expected <useSuffix> is "ark", "scp" or "npy" but got "{useSuffix}".' if isinstance(fileName, str): if os.path.isdir(fileName): raise WrongOperation( f"Expected file name but got a directory:{fileName}.") else: allFiles = list_files(fileName) else: raise UnsupportedType( f'Expected <fileName> is file name-like string but got a {type_name(fileName)}.' ) allData_bytes = BytesMatrix() allData_numpy = NumpyMatrix() def loadNpyFile(fileName): try: temp = np.load(fileName, allow_pickle=True) data = {} #totalSize = 0 for utt_mat in temp: data[utt_mat[0]] = utt_mat[1] #totalSize += sys.getsizeof(utt_mat[1]) #if totalSize > 10000000000: # print('Warning: Data is extramely large. It could not be used correctly sometimes.') except: raise UnsupportedType( f'Expected "npy" data with exkaldi format but got {fileName}.') else: return NumpyMatrix(data) def loadArkScpFile(fileName, suffix): ExkaldiInfo.vertify_kaldi_existed() if suffix == "ark": cmd = 'copy-feats ark:' else: cmd = 'copy-feats scp:' cmd += '{} ark:-'.format(fileName) out, err, cod = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Copy feat defeated.') else: #if sys.getsizeof(out) > 10000000000: # print('Warning: Data is extramely large. It could not be used correctly sometimes.') return BytesMatrix(out) for fileName in allFiles: sfx = fileName[-3:].lower() if sfx == "npy": allData_numpy += loadNpyFile(fileName) elif sfx in ["ark", "scp"]: allData_bytes += loadArkScpFile(fileName, sfx) elif useSuffix == "npy": allData_numpy += loadNpyFile(fileName) elif useSuffix in ["ark", "scp"]: allData_bytes += loadArkScpFile(fileName, useSuffix) else: raise UnsupportedType( 'Unknown file suffix. You can assign the <useSuffix> with "scp", "ark" or "npy".' ) if useSuffix == "": if allFiles[0][-3:].lower() == "npy": result = allData_numpy + allData_bytes.to_numpy() else: result = allData_bytes + allData_numpy.to_bytes() elif useSuffix == "npy": result = allData_numpy + allData_bytes.to_numpy() else: result = allData_bytes + allData_numpy.to_bytes() result.check_format() return result
def load_ali(target, aliType=None, name="ali", hmm=None): ''' Load alignment data. Args: <target>: Python dict object, bytes object, exkaldi alignment object, kaldi alignment file or .npy file. <aliType>: None, or one of 'transitionID', 'phoneID', 'pdfID'. It will return different alignment object. <name>: a string. <hmm>: file path or exkaldi HMM object. Return: exkaldi alignment data objects. ''' assert isinstance( name, str) and len(name) > 0, "Name shoud be a string avaliable." ExkaldiInfo.vertify_kaldi_existed() def transform(data, cmd): out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=data) if (isinstance(cod, int) and cod != 0) and out == b'': print(err.decode()) raise KaldiProcessError('Failed to transform alignment.') else: result = {} sp = BytesIO(out) for line in sp.readlines(): line = line.decode() line = line.strip().split() utt = line[0] matrix = np.array(line[1:], dtype=np.int32) result[utt] = matrix return results if isinstance(target, dict): if aliType is None: result = NumpyAlignment(target, name) elif aliType == "transitionID": result = NumpyAlignmentTrans(target, name) elif aliType == "phoneID": result = NumpyAlignmentPhone(target, name) elif aliType == "pdfID": result = NumpyAlignmentPdf(target, name) else: raise WrongOperation( f"<aliType> should be None, 'transitionID', 'phoneID' or 'pdfID' but got {aliType}." ) result.check_format() return result elif type_name(target) in [ "NumpyAlignment", "NumpyAlignmentTrans", "NumpyAlignmentPhone", "NumpyAlignmentPdf", "BytesAlignmentTrans" ]: result = copy.deepcopy(target) result.rename(name) return result elif isinstance(target, str): allFiles = list_files(target) results = { "NumpyAlignment": NumpyAlignment(), "NumpyAlignmentTrans": NumpyAlignmentTrans(), "NumpyAlignmentPhone": NumpyAlignmentPhone(), "NumpyAlignmentPdf": NumpyAlignmentPdf(), "BytesAlignmentTrans": BytesAlignmentTrans(), } for fileName in allFiles: fileName = os.path.abspath(fileName) if fileName.endswith(".npy"): temp = __read_data_from_file(fileName, "npy") if aliType is None: temp = NumpyAlignment(temp.data) results["NumpyAlignment"] += temp elif aliType == "transitionID": temp = NumpyAlignmentTrans(temp.data) results["NumpyAlignmentTrans"] += temp elif aliType == "phoneID": temp = NumpyAlignmentPhone(temp.data) results["NumpyAlignmentPhone"] += temp elif aliType == "pdfID": temp = NumpyAlignmentPdf(temp.data) results["NumpyAlignmentPdf"] += temp else: raise WrongOperation( f"<aliType> should be None, 'transitionID','phoneID' or 'pdfID' but got {aliType}." ) else: if fileName.endswith('.gz'): cmd = f'gunzip -c {fileName}' else: cmd = f'cat {fileName}' if aliType is None or aliType == "transitionID": out, err, cod = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise ShellProcessError( "Failed to get the alignment data from file.") else: temp = BytesAlignmentTrans(out) results["BytesAlignmentTrans"] += temp else: temp = tempfile.NamedTemporaryFile("wb+") try: if type_name(hmm) in ("HMM", "MonophoneHMM", "TriphoneHMM"): hmm.save(temp) hmmFileName = temp.name elif isinstance(hmm, str): if not os.path.isfile(hmm): raise WrongPath(f"No such file:{hmm}.") hmmFileName = hmm else: raise UnsupportedType( f"<hmm> should be a filePath or exkaldi HMM and its sub-class object. but got {type_name(hmm)}." ) if aliType == "phoneID": cmd += f" | ali-to-phones --per-frame=true {hmmFileName} ark:- ark,t:-" temp = transform(None, cmd) temp = NumpyAlignmentPhone(temp) results["NumpyAlignmentPhone"] += temp elif target == "pdfID": cmd = f" | ali-to-pdf {hmmFileName} ark:- ark,t:-" temp = transform(None, cmd) temp = NumpyAlignmentPdf(temp) results["NumpyAlignmentPdf"] += temp else: raise WrongOperation( f"<target> should be 'trainsitionID', 'phoneID' or 'pdfID' but got {target}." ) finally: temp.close() finalResult = [] for obj in results.values(): if not obj.is_void: obj.rename(name) finalResult.append(obj) if len(finalResult) == 0: raise WrongOperation( "<target> dose not include any data avaliable.") elif len(finalResult) == 1: finalResult = finalResult[0] return finalResult
def record_voice(outFile, seconds=None, dtype="int8", channels=1, rate=16000, chunkFrames=1000): ''' Record voice from microphone and save it to file. Args: <outFile>: wav file name. <seconds>: If None, use ctrl+C to stop recording. <dtype>: 'int8','int16' or 'int32'. <channels>: channels, 1 or 2. <rate>: sample rate. <chunkFrames>: the frames every time to read from microphone stream. Return: the absolute path of out file. ''' if seconds != None: assert isinstance( seconds, (int, float) ) and seconds > 0, f'Expected <seconds> is positive int or float value but got {type_name(seconds)}.' assert isinstance( rate, int) and rate > 0, f"<rate> shoule be positive int value." assert isinstance( chunkFrames, int ) and chunkFrames > 0, f"<chunkFrames> should be positive int value." assert channels in [1, 2 ], f"Expected <Channels> is 1 or 2 but got {channels}." if dtype == "int8": width = 1 ft = pyaudio.paInt8 elif dtype == "int16": width = 2 ft = pyaudio.paInt16 elif dtype == "int32": width = 4 ft = pyaudio.paInt32 else: raise UnsupportedType( f"<dtype> should be int8, int16 or int32 but got {dtype}.") counter = 0 secPerRecord = chunkFrames / rate p = pyaudio.PyAudio() stream = p.open(format=ft, channels=channels, rate=rate, input=True, output=False) try: wavData = [] if seconds is not None: while counter <= (seconds - secPerRecord): data = stream.read(chunkFrames) wavData.append(data) counter += secPerRecord lastRecordFrames = int((seconds - counter) * rate) data = stream.read(lastRecordFrames) wavData.append(data) else: try: while True: data = stream.read(chunkFrames) wavData.append(data) counter += secPerRecord except KeyboardInterrupt: pass if outFile is None: return np.fromstring(b"".join(wavData), dtype=np.short) else: assert isinstance( outFile, str) and len(outFile) > 0, f"<outFile> should be a file name." if outFile.rstrip()[-4:].lower() != ".wav": outFile += ".wav" with wave.open(outFile, 'wb') as wf: wf.setnchannels(channels) wf.setsampwidth(width) wf.setframerate(rate) wf.writeframes(b"".join(wavData)) finally: stream.stop_stream() stream.close() p.terminate() return os.path.abspath(outFile)
def decompress_feat(feat): ''' Decompress a kaldi conpressed feature whose data-type is "CM" Args: <feat>: an exkaldi feature object. Return: An new exkaldi feature object. This function is a cover of kaldi-io-for-python tools. For more information about it, please access to https://github.com/vesis84/kaldi-io-for-python/blob/master/kaldi_io/kaldi_io.py ''' assert isinstance( feat, BytesFeature), "Expected <feat> is a exkaldi bytes feature object." def _read_compressed_mat(fd): # Format of header 'struct', global_header = np.dtype([('minvalue', 'float32'), ('range', 'float32'), ('num_rows', 'int32'), ('num_cols', 'int32') ]) # member '.format' is not written, per_col_header = np.dtype([('percentile_0', 'uint16'), ('percentile_25', 'uint16'), ('percentile_75', 'uint16'), ('percentile_100', 'uint16')]) # Read global header, globmin, globrange, rows, cols = np.frombuffer(fd.read(16), dtype=global_header, count=1)[0] cols = int(cols) rows = int(rows) # The data is structed as [Colheader, ... , Colheader, Data, Data , .... ] # { cols }{ size } col_headers = np.frombuffer(fd.read(cols * 8), dtype=per_col_header, count=cols) col_headers = np.array([ np.array([x for x in y]) * globrange * 1.52590218966964e-05 + globmin for y in col_headers ], dtype=np.float32) data = np.reshape(np.frombuffer(fd.read(cols * rows), dtype='uint8', count=cols * rows), newshape=(cols, rows)) # stored as col-major, mat = np.zeros((cols, rows), dtype='float32') p0 = col_headers[:, 0].reshape(-1, 1) p25 = col_headers[:, 1].reshape(-1, 1) p75 = col_headers[:, 2].reshape(-1, 1) p100 = col_headers[:, 3].reshape(-1, 1) mask_0_64 = (data <= 64) mask_193_255 = (data > 192) mask_65_192 = (~(mask_0_64 | mask_193_255)) mat += (p0 + (p25 - p0) / 64. * data) * mask_0_64.astype(np.float32) mat += (p25 + (p75 - p25) / 128. * (data - 64)) * mask_65_192.astype(np.float32) mat += (p75 + (p100 - p75) / 63. * (data - 192)) * mask_193_255.astype(np.float32) return mat.T, rows, cols with BytesIO(feat.data) as sp: newData = [] while True: data = b'' utt = '' while True: char = sp.read(1) data += char char = char.decode() if (char == '') or (char == ' '): break utt += char utt = utt.strip() if utt == '': break binarySymbol = sp.read(2) data += binarySymbol binarySymbol = binarySymbol.decode() if binarySymbol == '\0B': dataType = sp.read(3).decode() if dataType == 'CM ': data += 'FM '.encode() matrix, rows, cols = _read_compressed_mat(sp) data += '\04'.encode() data += struct.pack(np.dtype('uint32').char, rows) data += '\04'.encode() data += struct.pack(np.dtype('uint32').char, cols) data += matrix.tobytes() newData.append(data) else: raise UnsupportedType( "This is not a compressed binary data.") else: raise WrongDataFormat('Miss right binary symbol.') return BytesFeature(b''.join(newData), name=feat.name)
def use_cmvn(feat, cmvn, utt2spk=None, std=False): ''' Apply CMVN statistics to feature. Args: <feat>: exkaldi feature object. <cmvn>: exkaldi CMVN statistics object. <utt2spk>: utt2spk file path or ScriptTable object. <std>: If true, apply std normalization. Return: A new feature object. ''' ExkaldiInfo.vertify_kaldi_existed() if type_name(feat) == "BytesFeature": feat = feat.sort(by="utt") elif type_name(feat) == "NumpyFeature": feat = feat.sort(by="utt").to_bytes() else: raise UnsupportedType( f"Expected exkaldi feature but got {type_name(feat)}.") if type_name(cmvn) == "BytesCMVNStatistics": cmvn = cmvn.sort(by="utt") elif type_name(cmvn) == "NumpyCMVNStatistics": cmvn = cmvn.sort(by="utt").to_bytes() else: raise UnsupportedType( f"Expected exkaldi CMVN statistics but got {type_name(cmvn)}.") cmvnTemp = tempfile.NamedTemporaryFile('wb+', suffix='_cmvn.ark') utt2spkTemp = tempfile.NamedTemporaryFile('w+', suffix="_utt2spk", encoding="utf-8") try: cmvnTemp.write(cmvn.data) cmvnTemp.seek(0) if std is True: stdOption = " --norm-vars true" else: stdOption = "" if utt2spk is None: cmd = f'apply-cmvn{stdOption} ark:{cmvnTemp.name} ark:- ark:-' else: if isinstance(utt2spk, str): if not os.path.isfile(utt2spk): raise WrongPath(f"No such file:{utt2spk}.") utt2spkSorted = ScriptTable( name="utt2spk").load(utt2spk).sort() utt2spkSorted.save(utt2spkTemp) elif isinstance(utt2spk, ScriptTable): utt2spkSorted = utt2spk.sort() utt2spkSorted.save(utt2spkTemp) else: raise UnsupportedType( f"<utt2spk> should be a file path or ScriptTable object but got {type_name(utt2spk)}." ) utt2spkTemp.seek(0) cmd = f'apply-cmvn{stdOption} --utt2spk=ark:{utt2spkTemp.name} ark:{cmvnTemp.name} ark:- ark:-' out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=feat.data) if (isinstance(cod, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError('Failed to apply CMVN statistics.') else: newName = f"cmvn({feat.name},{cmvn.name})" if type_name(feat) == "NumpyFeature": return BytesFeature(out, newName, indexTable=None).to_numpy() else: return BytesFeature(out, newName, indexTable=None) finally: cmvnTemp.close() utt2spkTemp.close()
def __compute_feature(wavFile, kaldiTool, useSuffix=None, name="feat"): if useSuffix != None: assert isinstance(useSuffix, str), "Expected <useSuffix> is a string." useSuffix = useSuffix.strip().lower()[-3:] else: useSuffix = "" assert useSuffix in ["", "scp", "wav"], 'Expected <useSuffix> is "scp" or "wav".' ExkaldiInfo.vertify_kaldi_existed() wavFileTemp = tempfile.NamedTemporaryFile("w+", suffix=".scp", encoding="utf-8") try: if isinstance(wavFile, str): if os.path.isdir(wavFile): raise WrongOperation( f'Expected <wavFile> is file path but got a directory:{wavFile}.' ) else: out, err, cod = run_shell_command(f'ls {wavFile}', stdout=subprocess.PIPE, stderr=subprocess.PIPE) if out == b'': raise WrongPath(f"No such file:{wavFile}.") else: allFiles = out.decode().strip().split('\n') elif isinstance(wavFile, ScriptTable): wavFile = wavFile.sort() wavFile.save(wavFileTemp) allFiles = [ wavFileTemp.name, ] else: raise UnsupportedType( f'Expected filename-like string but got a {type_name(wavFile)}.' ) results = [] for wavFile in allFiles: wavFile = os.path.abspath(wavFile) if wavFile[-3:].lower() == "wav": dirName = os.path.dirname(wavFile) fileName = os.path.basename(wavFile) uttID = "".join(fileName[0:-4].split(".")) cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-" elif wavFile[-3:].lower() == 'scp': cmd = f"{kaldiTool} scp,p:{wavFile} ark:-" elif "wav" in useSuffix: dirName = os.path.dirname(wavFile) fileName = os.path.basename(wavFile) uttID = "".join(fileName[0:-4].split(".")) cmd = f"echo {uttID} {wavFile} | {kaldiTool} scp,p:- ark:-" elif "scp" in useSuffix: cmd = f"{kaldiTool} scp,p:{wavFile} ark:-" else: raise UnsupportedType( 'Unknown file suffix. You can declare it by making <useSuffix> "wav" or "scp".' ) out, err, cod = run_shell_command(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if (isinstance(out, int) and cod != 0) or out == b'': print(err.decode()) raise KaldiProcessError(f'Failed to compute feature:{name}.') else: results.append(BytesFeature(out)) finally: wavFileTemp.close() if len(results) == 0: raise WrongOperation("No any feature date in file path.") else: result = results[0] for i in results[1:]: result += i result.rename(name) return result
def get_1best(self, wordSymbolTable=None, hmm=None, lmwt=1, acwt=1.0, phoneLevel=False): ''' Get 1 best result with text formation. Args: <wordSymbolTable>: None or file path or ListTable object or LexiconBank object. <hmm>: None or file path or HMM object. <lmwt>: language model weight. <acwt>: acoustic model weight. <phoneLevel>: If Ture, return phone results. Return: An exkaldi Transcription object. ''' ExkaldiInfo.vertify_kaldi_existed() if self.is_void: raise WrongOperation('No any data in lattice.') assert isinstance(lmwt, int) and lmwt >=0, "Expected <lmwt> is a non-negative int number." if wordSymbolTable is None: assert self.wordSymbolTable is not None, "<wordSymbolTable> is necessary because no wordSymbol table is avaliable." wordSymbolTable = self.wordSymbolTable if hmm is None: assert self.hmm is not None, "<hmm> is necessary because no wordSymbol table is avaliable." hmm = self.hmm modelTemp = tempfile.NamedTemporaryFile("wb+", suffix=".mdl") wordSymbolTemp = tempfile.NamedTemporaryFile("w+", suffix="_words.txt", encoding="utf-8") try: if isinstance(wordSymbolTable, str): assert os.path.isfile(wordSymbolTable), f"No such file: {wordSymbolTable}." wordsFile = wordSymbolTable elif type_name(wordSymbolTable) == "LexiconBank": if phoneLevel: wordSymbolTable.dump_dict("phones", wordSymbolTemp) else: wordSymbolTable.dump_dict("words", wordSymbolTemp) wordsFile = wordSymbolTemp.name elif type_name(wordSymbolTable) == "ListTable": wordSymbolTable.save(wordSymbolTemp) wordSymbolTemp.seek(0) wordsFile = wordSymbolTemp.name else: raise UnsupportedType(f"<wordSymbolTable> should be file name, LexiconBank object or ListTable object but got: {type_name(wordSymbolTable)}.") if isinstance(hmm, str): assert os.path.isfile(hmm), f"No such file: {hmm}." hmmFile = hmm elif type_name(hmm) in ["MonophoneHMM","TriphoneHMM"]: hmm.save(modelTemp) hmmFile = modelTemp.name else: raise UnsupportedType(f"<hmm> should be file name, exkaldi HMM object but got: {type_name(hmm)}.") if phoneLevel: cmd0 = f'lattice-align-phones --replace-output-symbols=true {hmmFile} ark:- ark:- | ' else: cmd0 = "" cmd1 = f"lattice-best-path --lm-scale={lmwt} --acoustic-scale={acwt} --word-symbol-table={wordsFile} --verbose=2 ark:- ark,t:- " cmd = cmd0 + cmd1 out, err, cod = run_shell_command(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, inputs=self.data) if cod != 0 or out == b'': print(err.decode()) raise KaldiProcessError('Failed to get 1-best from lattice.') else: out = out.decode().strip().split("\n") if phoneLevel: newName = "1-best-phones" else: newName = "1-best-words" results = Transcription(name=newName) for re in out: re = re.strip().split(maxsplit=1) if len(re) == 0: continue elif len(re) == 1: results[re[0]] = " " else: results[re[0]] = re[1] return results finally: modelTemp.close() wordSymbolTemp.close()