def use_fmllr(feat,fmllrMat,utt2spk,outFile=None): ''' Transfrom to fmllr feature. Share Args: Null Parallel Args: <feat>: exkaldi feature or index table object. <fmllrMat>: exkaldi fMLLR transform matrix or index table object. <utt2spk>: file name or ListTable object. <outFile>: output file name. Return: exkaldi feature or index table object. ''' feats,fmllrMats,utt2spks,outFiles = check_multiple_resources(feat,fmllrMat,utt2spk,outFile=outFile) names = [] for index,feat,fmllrMat,utt2spk in zip(range(len(outFiles)),feats,fmllrMats,utt2spks): # verify data declare.is_feature("feat",feat) declare.is_fmllr_matrix("fmllrMat",fmllrMat) # verify utt2spk declare.is_potential_list_table("utt2spk",utt2spk) names.append(f"fmllr({feat.name},{fmllrMat.name})") cmdPattern = 'transform-feats --utt2spk=ark:{utt2spk} {transMat} {feat} ark:{outFile}' resources = {"feat":feats,"transMat":fmllrMats,"utt2spk":utt2spks,"outFile":outFiles} return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
def compute_cmvn_stats(feat,spk2utt=None,name="cmvn",outFile=None): ''' Compute CMVN statistics. Share Args: Null Parrallel Args: <feat>: exkaldi feature object or index table object. <spk2utt>: spk2utt file or exkaldi ListTable object. <name>: name of output CMVN object. <outFile>: output file name. Return: exkaldi CMVN statistics or index table object. ''' feats,spk2utts,names,outFiles = check_multiple_resources(feat,spk2utt,name,outFile=outFile) for feat,spk2utt in zip(feats,spk2utts): # verify feature declare.is_feature("feat",feat) # verify spk2utt if spk2utt is not None: declare.is_potential_list_table("spk2utt",spk2utt) if spk2utts[0] is None: cmdPattern = 'compute-cmvn-stats {feat} ark:{outFile}' resources = {"feat":feats,"outFile":outFiles} else: cmdPattern = 'compute-cmvn-stats --spk2utt=ark:{spk2utt} {feat} ark:{outFile}' resources = {"feat":feats,"spk2utt":spk2utts,"outFile":outFiles} return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="cmvn",archiveNames=names)
def utt_to_spk(utts,utt2spk): ''' Accept a list of utterance IDs and return their corresponding speaker IDs. Args: <utts>: a string or list or tuple of utterance IDs. <utt2spk>: utt2spk file or ListTable object. Return: a list of speaker IDs. ''' declare.is_classes("utterance IDs",utts,(str,tuple,list)) if not isinstance(utts,str): declare.members_are_valid_strings("utterance IDs",utts) else: utts = [utts,] declare.is_potential_list_table("utt2spk",utt2spk) if isinstance(utt2spk,str): utt2spk = load_list_table(utt2spk) spks = [] for utt in utts: try: spk = utt2spk[utt] except KeyError: raise WrongOperation(f"Miss utterance ID {utt} in utt2spk map.") else: declare.is_valid_string("The value of utt2spk",utt) spktemp = spk.strip().split(maxsplit=1) assert len(spktemp) == 1,f"speaker ID in utt2spk has unexpected space: {spk}." spks.append(spktemp[0]) return sorted(list(set(spks)))
def spk_to_utt(spks,spk2utt): ''' Accept a list of speaker IDs and return their corresponding utterance IDs. Args: <spks>: a string or list or tuple of speaker IDs. <spk2utt>: spk2utt file or ListTable object. Return: a list of utterance IDs. ''' declare.is_classes("speaker IDs",spks,(str,tuple,list)) if not isinstance(spks,str): declare.members_are_valid_strings("speaker IDs",spks) else: spks = [spks,] declare.is_potential_list_table("spk2utt",spk2utt) if isinstance(spk2utt,str): spk2utt = load_list_table(spk2utt) utts = [] for spk in spks: try: utt = spk2utt[spk] except KeyError: raise WrongOperation(f"Miss speaker ID {spk} in spk2utt map.") else: declare.is_valid_string("The value of spk2utt",utt) utts.extend(utt.strip().split()) return sorted(list(set(utts)))
def utt2spk_to_spk2utt(utt2spk,outFile=None): ''' Transform utt2spk to spk2utt. Args: <utt2spk>: file name or exkaldi ListTable object. <outFile>: file name or None. Return: file name or exakldi ListTable object. ''' declare.is_potential_list_table("utt2spk",utt2spk) if outFile is not None: declare.is_valid_file_name(outFile) if isinstance(utt2spk,str): utt2spk = load_list_table(utt2spk) spk2utt = ListTable(name="spk2utt") for utt,spk in utt2spk.items(): declare.is_valid_string("utterance ID",utt) declare.is_valid_string("speaker ID",spk) assert utt.count(" ") == 0,f"<utterance ID> is not a continuous string but spaces existed: {utt}." assert spk.count(" ") == 0,f"<speaker ID> is not a continuous string but spaces existed: {spk}." try: spk2utt[spk] += f" {utt}" except KeyError: spk2utt[spk] = utt if outFile is None: return spk2utt else: spk2utt.save(outFile) return outFile
def use_cmvn(feat,cmvn,utt2spk=None,std=False,outFile=None): ''' Apply CMVN statistics to feature. Share Args: Null Parrallel Args: <feat>: exkaldi feature or index table object. <cmvn>: exkaldi CMVN statistics or index object. <utt2spk>: file path or ListTable object. <std>: If true,apply std normalization. <outFile>: out file name. Return: feature or index table object. ''' feats,cmvns,utt2spks,stds,outFiles = check_multiple_resources(feat,cmvn,utt2spk,std,outFile=outFile) names = [] for i,feat,cmvn,utt2spk,std in zip(range(len(outFiles)),feats,cmvns,utt2spks,stds): # verify feature and cmvn declare.is_feature("feat",feat) declare.is_cmvn("cmvn",cmvn) # verify utt2spk if utt2spk is not None: declare.is_potential_list_table("utt2spk",utt2spk) # std declare.is_bool("std",std) #stds[i] = "true" if std else "false" names.append( f"cmvn({feat.name},{cmvn.name})" ) if utt2spks[0] is None: cmdPattern = 'apply-cmvn --norm-vars={std} {cmvn} {feat} ark:{outFile}' resources = {"feat":feats,"cmvn":cmvns,"std":stds,"outFile":outFiles} else: cmdPattern = 'apply-cmvn --norm-vars={std} --utt2spk=ark:{utt2spk} {cmvn} {feat} ark:{outFile}' resources = {"feat":feats,"cmvn":cmvns,"utt2spk":utt2spks,"std":stds,"outFile":outFiles} return run_kaldi_commands_parallel(resources,cmdPattern,analyzeResult=True,generateArchive="feat",archiveNames=names)
def spk2utt_to_utt2spk(spk2utt,outFile=None): ''' Transform spk2utt file to utt2spk file. Args: <spk2utt>: file name or exkaldi ListTable object. <outFile>: file name or None. Return: file name or exakldi ListTable object. ''' declare.is_potential_list_table("spk2utt",spk2utt) if outFile is not None: declare.is_valid_file_name(outFile) if isinstance(spk2utt,str): spk2utt = load_list_table(spk2utt) utt2spk = ListTable(name="utt2spk") for spk,utts in spk2utt.items(): declare.is_valid_string("utterance IDs",utts) declare.is_valid_string("speaker ID",spk) assert spk.count(" ") == 0,f"<speaker ID> is not a continuous string but spaces existed: {spk}." for utt in utts.split(): try: utt2spk[utt] except KeyError: utt2spk[utt] = spk else: raise WrongDataFormat(f"utterance ID:{utt} has existed toward multiple speakers.") if outFile is None: return utt2spk else: utt2spk.save(outFile) return outFile