def err_per_phone(flags_fname, scores_fname, threshold, text_fname, phn_map, misp_trend): flags_dict = {k: v for k, v in kio.read_vec_int_ark(flags_fname)} text_dict = {k: v for k, v in kio.read_vec_int_ark(text_fname)} far = np.zeros((len(phn_map), len(threshold)), dtype=np.int32) frr = np.zeros((len(phn_map), len(threshold)), dtype=np.int32) cnt_mp = np.zeros(len(phn_map), dtype=np.int32) cnt_cp = np.zeros(len(phn_map), dtype=np.int32) for utt_id, scores in kio.read_vec_flt_ark(scores_fname): flags = flags_dict[utt_id] phones = text_dict[utt_id] for i in range(len(flags)): if flags[i] > 2: continue phn = phones[i] mp = misp_fn[misp_trend](scores[i], threshold) if flags[i] == 0: cnt_cp[phn] += 1 frr[phn, mp == True] = frr[phn, mp == True] + 1 elif flags[i] > 0: cnt_mp[phn] += 1 far[phn, mp == False] = far[phn, mp == False] + 1 cnt_mp = cnt_mp.astype(np.float) cnt_cp = cnt_cp.astype(np.float) cnt_mp[cnt_mp == 0.] = float('nan') cnt_cp[cnt_cp == 0.] = float('nan') cnt_mp = np.reshape(cnt_mp, (-1, 1)) cnt_cp = np.reshape(cnt_cp, (-1, 1)) return (far / cnt_mp), (frr / cnt_cp)
def testInt32VectorReadWrite(self): """ Test read/write for int32 vectors. """ # read, i32_vec = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali.ark') } # binary, i32_vec2 = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark') } # ascii, # re-save the data, with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark','wb') as f: for k,v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k) # read and make sure it is the same, for k,v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'): self.assertTrue(np.array_equal(v,i32_vec[k]), msg="int32 vector same after re-saving")
def get_vad_dict(ipath2vad_ark): vad_dict = dict() for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2vad_ark): assert (uttid not in vad_dict ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_ark) vad_dict[uttid] = vad_vec return vad_dict
def main(): assert len(sys.argv) == 2, "Improper number of arguments." ipath2vad_scp = sys.argv[1] # for debugging # ipath2vad_scp="ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/_vad/vad_dev_1s.1.scp ark,t:-|" # use dict rather than list for further investigation of some specific utterance # but there is no need to realize this so far numvoicedframes = dict() for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2vad_scp): assert (uttid not in numvoicedframes ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_scp) numvoicedframes[uttid] = (np.sum(vad_vec), vad_vec.shape[0]) # draw histogram for voiced frames draw_histogram(numvoicedframes, 0, "distribution of number of voiced frames per utterance") # draw histogram for all frames draw_histogram(numvoicedframes, 1, "distrbution of number of frames per utterance") # get voiced/whole frames ratio print("The nvoiced/nwhole ratio is %.4f" % get_ratio(numvoicedframes))
def load_dataset(fea_scp,fea_opts,lab_folder,lab_opts,left,right): fea= { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats scp:'+fea_scp+' ark:- |'+fea_opts) } lab= { k:v for k,v in kaldi_io.read_vec_int_ark('gunzip -c '+lab_folder+'/ali*.gz | '+lab_opts+' '+lab_folder+'/final.mdl ark:- ark:-|') if k in fea} # Note that I'm copying only the aligments of the loaded fea fea={k: v for k, v in fea.items() if k in lab} # This way I remove all the features without an aligment (see log file in alidir "Did not Succeded") count=0 end_snt=0 end_index=[] snt_name=[] for k in sorted(fea.keys(), key=lambda k: len(fea[k])): if count==0: count=1 fea_conc=fea[k] lab_conc=lab[k] end_snt=end_snt+fea[k].shape[0]-left else: fea_conc=np.concatenate([fea_conc,fea[k]],axis=0) lab_conc=np.concatenate([lab_conc,lab[k]],axis=0) end_snt=end_snt+fea[k].shape[0] end_index.append(end_snt) snt_name.append(k) end_index[-1]=end_index[-1]-right return [snt_name,fea_conc,lab_conc,end_index]
def run(config): # Load the nnet model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetFeedforward(nnet['feature_dim'] * nnet['num_frames'], nnet['num_layers'], nnet['hidden_dim'], nnet['num_classes']) model.load_state_dict(nnet['model_state_dict']) # Load alignment ali_files = [ os.path.join(config.ali_dir, f) for f in listdir(config.ali_dir) if f.startswith('ali.') ] pdf_ali_dict = {} for file in ali_files: if config.ali_type == "pdf": pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(config.ali_dir, "final.mdl"), file) else: pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(config.ali_dir, "final.mdl"), file) pdf_ali_dict.update( {u: d - 1 for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)}) # Load feature stuff feats_config = pickle.load(open(config.egs_config, 'rb')) if feats_config['feat_type']: feat_type = feats_config['feat_type'].split(',')[0] trans_path = feats_config['feat_type'].split(',')[1] if config.override_trans_path is not None: trans_path = config.override_trans_path if feat_type == "pca": cmd = "transform-feats {} scp:{} ark:- |".format( trans_path, config.scp) elif feat_type == "cmvn": cmd = "apply-cmvn {} scp:{} ark:- |".format(trans_path, config.scp) else: cmd = config.scp if feats_config['concat_feats']: context = feats_config['concat_feats'].split(',') cmd += " splice-feats --left-context={:s} --right-context={:s} ark:- ark:- |".format( context[0], context[1]) # Get the posterior fer_dict = {} for utt_id, mat in kaldi_io.read_mat_ark(cmd): out = model(Variable(torch.FloatTensor(mat))) out = softmax(out[1].data.numpy()) als = pdf_ali_dict[utt_id] preds = np.argmax(out, axis=1) err = (float(preds.shape[0]) - float(np.sum(np.equal( preds, als)))) * 100 / float(preds.shape[0]) fer_dict[utt_id] = err return fer_dict
def err(flags_fname, scores_fname, threshold, misp_trend): flags_dict = {k: v for k, v in kio.read_vec_int_ark(flags_fname)} far = np.zeros_like(threshold, dtype=np.int32) frr = np.zeros_like(threshold, dtype=np.int32) cnt_mp = 0 cnt_cp = 0 for uttid, scores in kio.read_vec_flt_ark(scores_fname): flags = flags_dict[uttid] for i in range(len(flags)): if flags[i] > 2: continue mp = misp_fn[misp_trend](scores[i], threshold) if flags[i] == 0: cnt_cp += 1 # correct[mp == False] = correct[mp == False] + 1 frr[mp == True] += 1 elif flags[i] > 0: cnt_mp = cnt_mp + 1 # correct[mp == True] = correct[mp == True] + 1 far[mp == False] = far[mp == False] + 1 else: print("Imposible?!") return (far / cnt_mp), (frr / cnt_cp)
def read_data(self): feat_path = os.path.join(self.recipe_dir, 'data', self.dset, 'feats.scp') if self.dset == 'train': label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir']) else: label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'] + '_' + self.dset) feat_opts = "apply-cmvn --utt2spk=ark:{0} ark:{1} ark:- ark:- |". \ format(os.path.join(self.recipe_dir, 'data', self.dset, 'utt2spk'), os.path.join(self.recipe_dir, 'data', self.dset, self.dset + '_cmvn_speaker.ark')) if self.cfg['feature_deltas']: feat_opts += " add-deltas --delta-order=2 ark:- ark:- |" if self.cfg['feature_context']: feat_opts += " splice-feats --left-context={0} --right-context={0} ark:- ark:- |". \ format(str(self.cfg['feature_context'])) label_opts = 'ali-to-pdf' if self.cfg['task'] == 'classification' else 'ali-to-phones --per-frame' feats = {k: m for k, m in kaldi_io.read_mat_ark( 'ark:copy-feats scp:{} ark:- | {}'.format(feat_path, feat_opts))} lab = {k: v for k, v in kaldi_io.read_vec_int_ark( 'gunzip -c {0}/ali*.gz | {1} {0}/final.mdl ark:- ark:-|'.format(label_path, label_opts)) if k in feats} feats = {k: v for k, v in feats.items() if k in lab} return feats, lab
def read_to_vec(self, type='int'): print("run", self.cmd) if type == 'int': generator = kaldi_io.read_vec_int_ark(self.cmd) if type == 'float': generator = kaldi_io.read_vec_flt_ark(self.cmd) result = {utt_id: np.array(vec) for utt_id, vec in generator} return result
def get_labels(ali_dir, ali_type, config): ali_files = [] all_ali_dirs = ali_dir.split(',') for ali_dir in all_ali_dirs: ali_files.extend([ os.path.join(ali_dir, f) for f in listdir(ali_dir) if f.startswith('ali.') ]) pdf_ali_dict = {} for file in ali_files: if ali_type == "pdf": pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(ali_dir, "final.mdl"), file) if config.notruncpad: pdf_ali_dict.update({ u + ".pt": torch.FloatTensor(d - 0).long() for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file) }) else: pdf_ali_dict.update({ u + ".pt": F.pad( torch.FloatTensor(d - 0).long(), (0, config.max_seq_len - d.shape[0])) for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file) }) else: pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(ali_dir, "final.mdl"), file) if config.notruncpad: pdf_ali_dict.update({ u + ".pt": torch.FloatTensor(d - 1).long() for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file) }) else: pdf_ali_dict.update({ u + ".pt": F.pad( torch.FloatTensor(d - 1).long(), (0, config.max_seq_len - d.shape[0])) for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file) }) torch.save(pdf_ali_dict, os.path.join(config.save_dir, 'labels.pkl'))
def __init__(self, input_stream, output_stream, flags_stream, phone_map_fname, tmp_dir, batch_size): self.phone_map = read_phone_map(phone_map_fname) input_it = kio.read_mat_ark(input_stream) output_dict = {k: v for k, v in kio.read_ali_ark(output_stream)} self.flags_dict = {k: v for k, v in kio.read_vec_int_ark(flags_stream)} self.flags = None gi.DataLoader.__init__(self, input_it, output_dict, tmp_dir, batch_size)
def testInt32VectorReadWrite(self): """ Test read/write for int32 vectors. """ # read, i32_vec = { k: v for k, v in kaldi_io.read_vec_int_ark('tests/data/ali.ark') } # binary, i32_vec2 = { k: v for k, v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark') } # ascii, # re-save the data, with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark', 'wb') as f: for k, v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k) # read and make sure it is the same, for k, v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'): self.assertTrue(np.array_equal(v, i32_vec[k]), msg="int32 vector same after re-saving")
def compute_prior(config): ali_files = [ os.path.join(config.ali_dir, f) for f in listdir(config.ali_dir) if f.startswith('ali.') ] p = np.zeros(config.num_classes) for file in ali_files: if config.ali_type == "pdf": pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(config.ali_dir, "final.mdl"), file) for key, ali in kaldi_io.read_vec_int_ark(pdf_ali_file): for x in range(config.num_classes): p[x] += len(np.where((ali) == x)[0]) else: pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(config.ali_dir, "final.mdl"), file) for key, ali in kaldi_io.read_vec_int_ark(pdf_ali_file): for x in range(config.num_classes): p[x] += len(np.where((ali - 1) == x)[0]) return np.log(p / np.sum(p))
def read_kaldi_lab(kaldi_ali, kaldi_lab_opts): """Read labels in kaldi format. Uses kaldi IO. Arguments --------- kaldi_ali : str Path to directory where kaldi alignents are stored. kaldi_lab_opts : str A string that contains the options for reading the kaldi alignments. Returns ------- lab : dict A dictionary contaning the labels. Note ---- This depends on kaldi-io-for-python. Install it separately. See: https://github.com/vesis84/kaldi-io-for-python Example ------- This example requires kaldi files. ``` lab_folder = '/home/kaldi/egs/TIMIT/s5/exp/dnn4_pretrain-dbn_dnn_ali' read_kaldi_lab(lab_folder, 'ali-to-pdf') ``` """ # EXTRA TOOLS try: import kaldi_io except ImportError: raise ImportError("Could not import kaldi_io. Install it to use this.") # Reading the Kaldi labels lab = { k: v for k, v in kaldi_io.read_vec_int_ark( "gunzip -c " + kaldi_ali + "/ali*.gz | " + kaldi_lab_opts + " " + kaldi_ali + "/final.mdl ark:- ark:-|", ) } return lab
def run(config, post_dict): ali_files = [os.path.join(config.ali_dir, f) for f in listdir(config.ali_dir) if f.endswith('.gz')] fer_dict = {} for file in ali_files: pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(config.ali_dir, "final.mdl"), file) pdf_ali_dict = {u: d for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)} for key in pdf_ali_dict: als = pdf_ali_dict[key] preds = np.argmax(post_dict[key], axis=1) err = (float(preds.shape[0]) - float(np.sum(np.equal(preds, als)))) * 100 / float(preds.shape[0]) fer_dict[key] = err return fer_dict
def __init__(self, phone_label=None, feats=None, transform=None): """ Args: phone_label (dict): utt to frame label. feats (dict): utt to frame features. transform (callable, optional): Optional transform to be applied on a sample. """ if Phone_cla_Dataset.class_trans_vector == None: Phone_cla_Dataset.class_trans_vector = np.vectorize( Phone_cla_Dataset.class_trans) if Phone_cla_Dataset.maxClassNum == -1: Phone_cla_Dataset.maxClassNum = max( list(Data_show.phone2class.values())) + 1 if phone_label == None or feats == None: self.phone_label = { u: d for u, d in kaldi_io.read_vec_int_ark("feats/ali.1.ph") } self.feats = { u: d for u, d in kaldi_io.read_mat_scp("feats/feats.scp") } else: self.phone_label = phone_label self.feats = feats self.feats_list = [] self.phone_label_list = [] self.transform = transform for utt, feat in feats.items(): if utt in phone_label: self.feats_list.append(feat) a = np.zeros(feat.shape[0], int) for i in range(a.shape[0]): a[i] = phone_label[utt][(i) // 3] self.phone_label_list.append( Phone_cla_Dataset.class_trans_vector(a)) self.feats_nd = np.concatenate(tuple(self.feats_list)) self.phone_label_nd = np.concatenate(tuple(self.phone_label_list))
def get_phoneme_labels(ali_dir): ali_files = [] all_ali_dirs = ali_dir.split(',') for ali_dir in all_ali_dirs: ali_files.extend([ os.path.join(ali_dir, f) for f in os.listdir(ali_dir) if f.startswith('ali.') ]) pdf_ali_dict = {} for file in ali_files: pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(ali_dir, "final.mdl"), file) pdf_ali_dict.update( {u: d for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)}) return pdf_ali_dict
def get_labels(ali_dir, ali_type): ali_files = [ os.path.join(ali_dir, f) for f in listdir(ali_dir) if f.startswith('ali.') ] pdf_ali_dict = {} for file in ali_files: if ali_type == "pdf": pdf_ali_file = "ark:ali-to-pdf {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(ali_dir, "final.mdl"), file) else: pdf_ali_file = "ark:ali-to-phones --per-frame {} ark:'gunzip -c {} |' ark:- |".format( os.path.join(ali_dir, "final.mdl"), file) pdf_ali_dict.update( {u: d - 1 for u, d in kaldi_io.read_vec_int_ark(pdf_ali_file)}) return pdf_ali_dict
def testPipeReadWrite(self): """ Test read/write for pipes. Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct. """ # the following line disables 'stderr' forwarding, comment it for DEBUG, with open("/dev/null","w") as sys.stderr: # read, flt_mat4 = { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats ark:tests/data/feats.ark ark:- |') } # write to pipe, with kaldi_io.open_or_fd('ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark','wb') as f: for k,m in flt_mat4.items(): kaldi_io.write_mat(f, m, k) # read it again and compare, for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat_pipe.ark'): self.assertTrue(np.array_equal(m, flt_mat4[k]),"flt. matrix same after read/write via pipe") # read some other formats from pipe, i32_vec3 = { k:v for k,v in kaldi_io.read_vec_int_ark('ark:copy-int-vector ark:tests/data/ali.ark ark:- |') } flt_vec4 = { k:v for k,v in kaldi_io.read_vec_flt_ark('ark:copy-vector ark:tests/data/conf.ark ark:- |') }
def read_custom_feats(self, custom_feats_ark): feat_path = custom_feats_ark if self.dset == 'train': label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir']) else: label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'] + '_' + self.dset) feat_opts = '' if self.cfg['feature_deltas']: feat_opts += " add-deltas --delta-order=2 ark:- ark:- |" if self.cfg['feature_context']: feat_opts += " splice-feats --left-context={0} --right-context={0} ark:- ark:- |". \ format(str(self.cfg['feature_context'])) label_opts = 'ali-to-pdf' if self.cfg['task'] == 'classification' else 'ali-to-phones --per-frame' feats = {k: m for k, m in kaldi_io.read_mat_ark( 'ark:copy-feats ark:{} ark:- | {}'.format(feat_path, feat_opts))} lab = {k: v for k, v in kaldi_io.read_vec_int_ark( 'gunzip -c {0}/ali*.gz | {1} {0}/final.mdl ark:- ark:-|'.format(label_path, label_opts)) if k in feats} feats = {k: v for k, v in feats.items() if k in lab} return feats, lab
def testPipeReadWrite(self): """ Test read/write for pipes. Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct. """ # the following line disables 'stderr' forwarding, comment it for DEBUG, with open("/dev/null", "w") as sys.stderr: # read, flt_mat4 = { k: m for k, m in kaldi_io.read_mat_ark( 'ark:copy-feats ark:tests/data/feats.ark ark:- |') } # write to pipe, with kaldi_io.open_or_fd( 'ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark', 'wb') as f: for k, m in flt_mat4.items(): kaldi_io.write_mat(f, m, k) # read it again and compare, for k, m in kaldi_io.read_mat_ark( 'tests/data_re-saved/mat_pipe.ark'): self.assertTrue(np.array_equal(m, flt_mat4[k]), "flt. matrix same after read/write via pipe") # read some other formats from pipe, i32_vec3 = { k: v for k, v in kaldi_io.read_vec_int_ark( 'ark:copy-int-vector ark:tests/data/ali.ark ark:- |') } flt_vec4 = { k: v for k, v in kaldi_io.read_vec_flt_ark( 'ark:copy-vector ark:tests/data/conf.ark ark:- |') }
def save_smoothed_feats(self): feat_path = os.path.join(self.recipe_dir, 'data', self.dset, 'feats.scp') if self.dset == 'train': label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir']) else: label_path = os.path.join(self.recipe_dir, 'exp', self.cfg['ali_dir'] + '_' + self.dset) feat_opts = "apply-cmvn --utt2spk=ark:{0} ark:{1} ark:- ark:- |". \ format(os.path.join(self.recipe_dir, 'data', self.dset, 'utt2spk'), os.path.join(self.recipe_dir, 'data', self.dset, self.dset + '_cmvn_speaker.ark')) label_opts = 'ali-to-pdf' if self.cfg['task'] == 'classification' else 'ali-to-phones --per-frame' feats = {k: m for k, m in kaldi_io.read_mat_ark( 'ark:copy-feats scp:{} ark:- | {}'.format(feat_path, feat_opts))} lab = {k: v for k, v in kaldi_io.read_vec_int_ark( 'gunzip -c {0}/ali*.gz | {1} {0}/final.mdl ark:- ark:-|'.format(label_path, label_opts)) if k in feats} feats = {k: v for k, v in feats.items() if k in lab} fname = os.path.join(self.recipe_dir, 'data', self.dset, 'smoothed.ark') f = kaldi_io.open_or_fd(fname, 'wb') for key in tqdm(feats): tmp = smooth_acoustic(feats[key]) kaldi_io.write_mat(f, tmp, key) return fname
root.ark_path = kwargs.pop('ark_path', None) cmd_gmm_info = lambda: '%s/src/gmmbin/gmm-info --print-args=false %s | grep pdfs' % ( root.kaldi_dir, root.mdl_path) cmd_ali_pdf = lambda: '%s/src/bin/ali-to-pdf --print-args=false %s "ark:gunzip -c %s|" ark,t:-' % ( root.kaldi_dir, root.mdl_path, root.aligz_path) show_alignments = lambda: '%s/src/bin/show-alignments --print-args=false %s %s "ark:gunzip -c %s|"' % ( root.kaldi_dir, root.phones_path, root.mdl_path, root.aligz_path) show_transitions = lambda: '%s/src/bin/show-transitions --print-args=false %s %s' % ( root.kaldi_dir, root.phones_path, root.mdl_path) copy_int_vector = lambda: '%s/src/bin/copy-int-vector --print-args=false "ark:gunzip -c %s|" ark,t:-' % ( root.kaldi_dir, root.aligz_path) alignment = lambda: { k: v for k, v in kaldi_io.read_vec_int_ark(root.aligz_path) } def numpdfs(): numpdfs = 0 with subprocess.Popen(cmd_gmm_info(), stdout=subprocess.PIPE, shell=True) as proc: numpdfs = int(proc.stdout.readline().decode().strip().split(' ')[-1]) return numpdfs def transid2info(): transid2info = {} with subprocess.Popen(show_transitions(), stdout=subprocess.PIPE,
#!/usr/bin/env python import numpy as np import kaldi_io print('testing int32-vector i/o') i32_vec = {k: v for k, v in kaldi_io.read_vec_int_ark('data/ali.ark')} # binary, i32_vec2 = {k: v for k, v in kaldi_io.read_vec_int_ark('data/ali_ascii.ark') } # ascii, # - store, with kaldi_io.open_or_fd('data_re-saved/ali.ark', 'wb') as f: for k, v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k) # - read and compare, for k, v in kaldi_io.read_vec_int_ark('data_re-saved/ali.ark'): assert (np.array_equal(v, i32_vec[k])) print('testing float-vector i/o') flt_vec = {k: v for k, v in kaldi_io.read_vec_flt_scp('data/conf.scp')} # scp, flt_vec2 = {k: v for k, v in kaldi_io.read_vec_flt_ark('data/conf.ark') } # binary-ark, flt_vec3 = {k: v for k, v in kaldi_io.read_vec_flt_ark('data/conf_ascii.ark') } # ascii-ark, # - store, with kaldi_io.open_or_fd('data_re-saved/conf.ark', 'wb') as f: for k, v in flt_vec.items(): kaldi_io.write_vec_flt(f, v, k)
def text_generator(path, name): generator = kaldi_io.read_vec_int_ark(path) return prepend_generator(generator, name)
parser.add_argument('eval_dir') args = parser.parse_args() idx_to_phn_name_file = os.path.join(args.exp_dir, 'phn_sil_to_idx.txt') phn_to_idx_file = os.path.join(args.exp_dir, 'phn_sil_to_idx.int') pfeats_name_to_idx_file = os.path.join(args.exp_dir, 'pfeats_name_to_idx.txt') out_file = os.path.join(args.eval_dir, 'accuracy.txt') output = open(out_file, 'w') # Mappings idx_to_pfeats_name = read_inv_phone_map(pfeats_name_to_idx_file) idx_to_phn_name = read_inv_phone_map(idx_to_phn_name_file) pfeats_map = PFeatsMap(phn_to_idx_file, args.lang) # Inputs aligns_it = kio.read_vec_int_ark(args.align) pfeats_it = kio.read_mat_ark(args.pfeats) # Counters phn_cnt = np.zeros(pfeats_map.phn_dim(), dtype=np.int) phn_correct = np.zeros_like(phn_cnt) pfeats_correct = np.zeros(pfeats_map.pfeats_dim(), dtype=np.int) # Evaluate accuracy for utt_phones, utt_pfeats_real in zip(aligns_it, pfeats_it): for phone, pfeats_real in zip(utt_phones[1], utt_pfeats_real[1]): pfeats_real = np.exp(pfeats_real) if not pfeats_map.is_phn_valid(phone): continue pfeats_true = pfeats_map.phn_to_pfeats(phone)
Id2LoglikeAMModel.add_args(parser) parser.add_argument('ali_dir') parser.add_argument('--max_pdf_id', type=int, default=None, help="Maximum pdf_id") args = parser.parse_args() ali_stretch_model = AliStretchModel.build(args, load_from_cashe=False) id2ll_model = Id2LoglikeAMModel.build(args, load_from_cashe=False) logger.info(f"Loading {args.ali_dir}/ali.*.gz") utt2ali = { key: ali for key, ali in tqdm( kaldi_io.read_vec_int_ark( f'ark: gunzip -c {args.ali_dir}/ali_pdf.1.gz|')) } i = 0 for key, ali in tqdm(utt2ali.items()): i += 1 ali_stretch_model.add_utts(ali) logger.info(f"AliStretchModel processed {i} utterances") ali_stretch_model.compute() ali_stretch_model.save_to_file() logger.info(f"Loaded {len(utt2ali)} alis") logger.info(f"Loading logprobs and train model") i = 0 for k, m in tqdm( kaldi_io.read_mat_ark(f'ark: cat {args.ali_dir}/output.1.ark |'), total=len(utt2ali)):
maxscore = float("-inf") targetscore = 0 # pdb.set_trace() # if 'wrong' == wrong_or_correct: # for uttid in wrong_uttids: # print(uttid) # else: # for uttid in correct_uttids: # print(uttid) numvoicedframes_correct = dict() numvoicedframes_wrong = dict() for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2vad_scp): assert (uttid not in numvoicedframes_correct ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_scp) assert (uttid not in numvoicedframes_wrong ), "Duplicated utterance %s in %s" % (uttid, ipath2vad_scp) if uttid in correct_uttids: numvoicedframes_correct[uttid] = (np.sum(vad_vec), vad_vec.shape[0]) if uttid in wrong_uttids: numvoicedframes_wrong[uttid] = (np.sum(vad_vec), vad_vec.shape[0]) draw_histogram( numvoicedframes_correct, 0, "distribution of number of voiced frames per utterance(correct utterance)") print("The nvoiced/nwhole ratio is %.4f" % get_ratio(numvoicedframes_correct)) draw_histogram(
def main(): #assert len(sys.argv) == 2, "Improper number of arguments." #ipath2vad_scp=sys.argv[1] # for debugging #ipath2vad_scp="ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/_vad/vad_dev_1s.1.scp ark,t:-|" ipath2train_vad_scp = "ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/data/train/vad.scp ark,t:- |" ipath2dev1s_vad_scp = "ark:copy-vector scp:/home/jerry/research/ap17_olr/lsid/data/dev_1s/vad.scp ark,t:- |" # use dict rather than list for further investigation of some specific utterance # but there is no need to realize this so far train_numvoicedframes = dict() for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2train_vad_scp): assert ( uttid not in train_numvoicedframes ), "Duplicated utterance %s in %s" % (uttid, ipath2train_vad_scp) train_numvoicedframes[uttid] = (np.sum(vad_vec), vad_vec.shape[0]) dev1s_numvoicedframes = dict() for uttid, vad_vec in kaldi_io.read_vec_int_ark(ipath2dev1s_vad_scp): assert ( uttid not in dev1s_numvoicedframes ), "Duplicated utterance %s in %s" % (uttid, ipath2dev1s_vad_scp) dev1s_numvoicedframes[uttid] = (np.sum(vad_vec), vad_vec.shape[0]) ## draw histogram for voiced frames for train #draw_histogram(train_numvoicedframes, 0, "distribution of number of voiced frames per utterance") ## draw histogram of voiced frames for dev_1s idx = 0 train_nframe_list = [ train_numvoicedframes[uttid][idx] for uttid in train_numvoicedframes.keys() ] train_num_bins = range(0, 1000) dev1s_nframe_list = [ dev1s_numvoicedframes[uttid][idx] for uttid in dev1s_numvoicedframes.keys() ] dev1s_num_bins = range(0, 1000) #num_bins = 100 title = "distribution of number of frames per utterance" plt.hist(train_nframe_list, train_num_bins, facecolor='blue', alpha=0.5) plt.hist(dev1s_nframe_list, dev1s_num_bins, facecolor='red', alpha=0.5) plt.xlabel("number of frames per utterance") plt.ylabel("number of utterance") plt.title(title) plt.show() ## draw histogram for all frames #draw_histogram(numvoicedframes, 1, "distrbution of number of frames per utterance") #pdb.set_trace() # get voiced/whole frames ratio print("The nvoiced/nwhole ratio is %.4f" % get_ratio(numvoicedframes))
parser.add_argument('exp_dir') parser.add_argument('eval_dir') args = parser.parse_args() os.makedirs(args.eval_dir, exist_ok=True) model_fname = os.path.join(args.exp_dir, 'final.h5') phn_map_fname = os.path.join(args.exp_dir, 'phn_sil_to_idx.int') score_fname = os.path.join(args.eval_dir, 'score.ark') score_txt_fname = os.path.join(args.eval_dir, 'score.txt') score_txt = open(score_txt_fname, 'w') # Load phone map phn_map = read_phone_map(phn_map_fname) # Load kaldi files flags_dict = {k: v for k, v in kio.read_vec_int_ark(args.flags)} ali_dict = {k: v for k, v in kio.read_ali_ark(args.phones)} feats_it = kio.read_mat_ark(args.feats) # Load classifier model model = MispModel.load(model_fname) with open(score_fname, 'wb') as f: for utt, feats in feats_it: # Workaround to deal with missing alignments if utt not in ali_dict: continue print(utt, end=' ', file=score_txt) flags = flags_dict[utt] ali = ali_dict[utt]
args = parser.parse_args() # Prepere files os.makedirs(args.res_dir, exist_ok=True) feats_fd = sys.stdin.buffer flags_fname = os.path.join(args.data_dir, 'text_ext_flags') ali_force_frame_fname = os.path.join(args.cbps_dir, 'force_ali_test/ali_frames.gz') ali_force_fname = os.path.join(args.cbps_dir, 'force_ali_test/ali_pdf.gz') score_fname = os.path.join(args.res_dir, 'score.ark') score_txt_fname = os.path.join(args.res_dir, 'score.txt') score_txt = open(score_txt_fname, 'w') cmp_fd = open('test/cmp_gop.txt', 'w') # Load kaldi files flags_it = kio.read_vec_int_ark(flags_fname) ali_force_it = kio.read_ali_ark(ali_force_fname) ali_force_frm_it = kio.read_ali_ark(ali_force_frame_fname) feats_it = kio.read_mat_ark(feats_fd) with open(score_fname, 'wb') as f: for flags_t, ali_force_t, ali_force_frm_t, feats_t in zip( flags_it, ali_force_it, ali_force_frm_it, feats_it): # Unpack each tuple utt, flags = flags_t _, ali_force = ali_force_t _, ali_force_frm = ali_force_frm_t _, feats = feats_t # Get only features for corresponding states in alignments probs_force = hlp.np_pick(feats, ali_force)
def load_dataset(fea_scp, fea_opts, lab_folder, lab_opts, left, right, max_sequence_length, fea_only=False): fea = { k: m for k, m in kaldi_io.read_mat_ark('ark:copy-feats scp:' + fea_scp + ' ark:- |' + fea_opts) } if not fea_only: lab = { k: v for k, v in kaldi_io.read_vec_int_ark('gunzip -c ' + lab_folder + '/ali*.gz | ' + lab_opts + ' ' + lab_folder + '/final.mdl ark:- ark:-|') if k in fea } # Note that I'm copying only the aligments of the loaded fea fea = { k: v for k, v in fea.items() if k in lab } # This way I remove all the features without an aligment (see log file in alidir "Did not Succeded") end_snt = 0 end_index = [] snt_name = [] fea_conc = [] lab_conc = [] tmp = 0 for k in sorted(sorted(fea.keys()), key=lambda k: len(fea[k])): ##### # If the sequence length is above the threshold, we split it with a minimal length max/4 # If max length = 500, then the split will start at 500 + (500/4) = 625. # A seq of length 625 will be splitted in one of 500 and one of 125 if (len(fea[k]) > max_sequence_length) and max_sequence_length > 0: fea_chunked = [] lab_chunked = [] for i in range((len(fea[k]) + max_sequence_length - 1) // max_sequence_length): if (len(fea[k][i * max_sequence_length:]) > max_sequence_length + (max_sequence_length / 4)): fea_chunked.append(fea[k][i * max_sequence_length:(i + 1) * max_sequence_length]) if not fea_only: lab_chunked.append( lab[k][i * max_sequence_length:(i + 1) * max_sequence_length]) else: lab_chunked.append( np.zeros((fea[k][i * max_sequence_length:(i + 1) * max_sequence_length].shape[0], ))) else: fea_chunked.append(fea[k][i * max_sequence_length:]) if not fea_only: lab_chunked.append(lab[k][i * max_sequence_length:]) else: lab_chunked.append( np.zeros( (fea[k][i * max_sequence_length:].shape[0], ))) break for j in range(0, len(fea_chunked)): fea_conc.append(fea_chunked[j]) lab_conc.append(lab_chunked[j]) snt_name.append(k + '_split' + str(j)) else: fea_conc.append(fea[k]) if not fea_only: lab_conc.append(lab[k]) else: lab_conc.append(np.zeros((fea[k].shape[0], ))) snt_name.append(k) tmp += 1 fea_zipped = zip(fea_conc, lab_conc) fea_sorted = sorted(fea_zipped, key=lambda x: x[0].shape[0]) fea_conc, lab_conc = zip(*fea_sorted) for entry in fea_conc: end_snt = end_snt + entry.shape[0] end_index.append(end_snt) fea_conc = np.concatenate(fea_conc) lab_conc = np.concatenate(lab_conc) return [snt_name, fea_conc, lab_conc, np.asarray(end_index)]