def testFloatVectorReadWrite(self): """ Test read/write for float vectors. """ # read, flt_vec = { k: v for k, v in kaldi_io.read_vec_flt_scp('tests/data/conf.scp') } # scp, return flt_vec2 = { k: v for k, v in kaldi_io.read_vec_flt_ark('tests/data/conf.ark') } # binary-ark, flt_vec3 = { k: v for k, v in kaldi_io.read_vec_flt_ark('tests/data/conf_ascii.ark') } # ascii-ark, # store, with kaldi_io.open_or_fd('tests/data_re-saved/conf.ark', 'wb') as f: for k, v in flt_vec.items(): kaldi_io.write_vec_flt(f, v, k) # read and compare, for k, v in kaldi_io.read_vec_flt_ark('tests/data_re-saved/conf.ark'): self.assertTrue(np.array_equal(v, flt_vec[k]), msg="flt. vector same after re-saving")
def create_session_wise_arks(self): session_wise_speech_arks = self.cluster_arks(self.speech_arks) modified_ark_components = self.get_modified_ark_components(session_wise_speech_arks) for session, components in modified_ark_components: with open('iemocap/data/kaldi/modified/' + self.get_modified_file_name(session), 'wb') as file_: for key, vector in components: kaldi_io.write_vec_flt(file_, np.array(vector), key)
def main(): print("start time:", time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))) model = load_model() ext = ExtractEmbedding(model, "layer4") dir = "/home/work_nfs4_ssd/hzhao/feature/voxceleb1/test/feats.scp" dataset = getTestData(dir) batch_size = 256 test_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False) for i, trainset in enumerate(test_loader): (input_x, target_var) = trainset input_x = input_x.cuda() #print(input_x.size(),type(input_x)) output = ext(input_x) output = output.squeeze() output = output.cpu() #tensor to numpy output = output.detach().numpy() #print(":::",len(target_var)) target_var = np.squeeze(target_var) #tmp="/home/work_nfs/lizhang/node6/kaldi02/kaldi02/egs/voxceleb/v2/tmp" filename = "./test/99_test/enroll/xvector." + str(i) + ".ark" f = kaldi_io.open_or_fd(filename, "wb") for i, uttid in enumerate(target_var): kaldi_io.write_vec_flt(f, output[i], key=uttid) print("end time:", time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())))
def dvec_compute(generator, ds_eval, device, num_jobs=20, outfolder='./exp/example_dvecs'): # naively compute the embeddings for each window # ds_len = len(ds_feats) all_utts = ds_eval.all_utts ds_len = len(all_utts) indices = np.arange(ds_len) job_split = np.array_split(indices, num_jobs) generator.eval().to(device) for job_num, job in enumerate(tqdm(job_split)): print('Starting job {}'.format(job_num)) ark_scp_output = 'ark:| copy-vector ark:- ark,scp:{0}/xvector.{1}.ark,{0}/xvector.{1}.scp'.format( outfolder, job_num + 1) job_utts = all_utts[job] job_feats = ds_eval.get_batches(job_utts) job_feats = mtd(job_feats, device) with torch.no_grad(): job_embeds = torch.cat([ generator(x.unsqueeze(0)) for x in tqdm(job_feats) ]).cpu().numpy() with kaldi_io.open_or_fd(ark_scp_output, 'wb') as f: for xvec, key in zip(job_embeds, job_utts): kaldi_io.write_vec_flt(f, xvec, key=key)
def write_vec_ark(uid, feats, write_path, set_id): """ :param uid: utterance ids generated by dataset class :param feat: features :param write_path: :param id: train or test :return: """ file_path = pathlib.Path(write_path) if not file_path.exists(): os.makedirs(str(file_path)) assert len(uid) == len(feats) print('There are %d vectors.' % len(uid)) ark_file = write_path + '/xvec.{}.ark'.format(set_id) scp_file = write_path + '/xvec.{}.scp'.format(set_id) # write scp and ark file with open(scp_file, 'w') as scp, open(ark_file, 'wb') as ark: for i in range(len(uid)): vec = feats[i] len_vec = len(vec.tobytes()) key = uid[i] kaldi_io.write_vec_flt(ark, vec, key=key) # print(ark.tell()) scp.write( str(uid[i]) + ' ' + str(ark_file) + ':' + str(ark.tell() - len_vec - 10) + '\n') print('\nark,scp files are in: {}, {}.'.format(ark_file, scp_file))
def compute_plda_score(enr_embeds, test_embeds, plda_dir, all_pair=True, mean=False): os.environ["KALDI_ROOT"] = "/host/projects/kaldi" if mean: enr_keys = ['enr'] n_uttrs = len(enr_embeds) enr_embeds = enr_embeds.mean(0, keepdims=True) else: enr_keys = ['enr_{}'.format(i) for i in range(len(enr_embeds))] test_keys = ['test_{}'.format(i) for i in range(len(test_embeds))] keys = enr_keys + test_keys embeds = np.concatenate([enr_embeds, test_embeds]) # write trials score_dir = TemporaryDirectory() with open("{}/num_utts.ark".format(score_dir.name), "w") as f: if mean: f.write("enr {}\n".format(n_uttrs)) else: for key in enr_keys: f.write("{} {}\n".format(key, 1)) with open("{}/kaldi_trial".format(score_dir.name), "w") as f: if all_pair: trial_pairs = itertools.product(enr_keys, test_keys) else: trial_pairs = zip(enr_keys, test_keys) for pair in trial_pairs: f.write(" ".join(pair)) f.write("\n") # write feat ark_scp_output='ark:| copy-vector ark:- ' +\ 'ark,scp:{output}/feats.ark,{output}/feats.scp'.format(output=score_dir.name) with kaldi_io.open_or_fd(ark_scp_output, "wb") as f: for key, vec in zip(keys, embeds): kaldi_io.write_vec_flt(f, vec.squeeze(), key=str(key)) # call scoring ret = subprocess.call(["./plda_score.sh", plda_dir, score_dir.name]) if ret != 0: print("plda scoring fails") raise ValueError # read plda scores plda_scores = pd.read_csv("{}/plda_scores".format(score_dir.name), delimiter=" ", names=["enroll", "test", "score"]).score plda_scores = np.array(plda_scores) plda_scores = plda_scores.reshape(len(enr_keys), -1) if all_pair: assert plda_scores.shape[1] == len(test_embeds) score_dir.cleanup() return plda_scores
def write_vec(filename, temp_dict): try: with open(filename, 'wb') as f: for key, vec in temp_dict.iteritems(): kaldi_io.write_vec_flt(f, vec, key) return True except Exception, e: print(str(e)) return False
def write_vectors(utt_list, csvector_list, data_path): size = len(utt_list) csvector_dict = {} for i in range(size): sub_list = utt_list[i] sub_vector = csvector_list[i] for j in range(len(sub_list)): csvector_dict[sub_list[j]] = sub_vector[j] with kaldi_io.open_or_fd(data_path,'wb') as f: for k,v in csvector_dict.items(): kaldi_io.write_vec_flt(f, v, k)
def predict_ark(model, ark_file_path, out_file_path, sess, vec_nom=False): # Testing periodically out_file_path = out_file_path + '/output.ark' out_file = open_or_fd(out_file_path, 'wb') for index, (key, feature) in enumerate(read_mat_ark(ark_file_path)): test_input = feature #feed_dict = {model.test_input: test_input, model.test_label: test_label, model.KEEP_PROB: 1.} feed_dict = {model.test_input: test_input} output_tensors = [model.embeddings] embeddings = sess.run(output_tensors, feed_dict) if vec_nom: embeddings /= np.sqrt(np.num(np.square(embeddings))) write_vec_flt(out_file, embeddings[0][0], key=key)
def get_utt_xvector(utt2spk, featscp): #getTestDataset #getbatchsize() input_x, target_var = getTestData() #print(target_var) input_x = input_x.cuda() target_y = target_var.cuda() ext = ExtractEmbedding(model, "fc") output = ext(input_x) xvector = feature.squeeze(0) # 2D to 1D # tensor to numpy (using detach to remove the variable grad) xvector = xvector.detach().numpy() # write xvector of utt kaldi_io.write_vec_flt(f, xvector, key=utt)
def testFloatVectorReadWrite(self): """ Test read/write for float vectors. """ # read, flt_vec = { k:v for k,v in kaldi_io.read_vec_flt_scp('tests/data/conf.scp') } # scp, return flt_vec2 = { k:v for k,v in kaldi_io.read_vec_flt_ark('tests/data/conf.ark') } # binary-ark, flt_vec3 = { k:v for k,v in kaldi_io.read_vec_flt_ark('tests/data/conf_ascii.ark') } # ascii-ark, # store, with kaldi_io.open_or_fd('tests/data_re-saved/conf.ark','wb') as f: for k,v in flt_vec.items(): kaldi_io.write_vec_flt(f, v, k) # read and compare, for k,v in kaldi_io.read_vec_flt_ark('tests/data_re-saved/conf.ark'): self.assertTrue(np.array_equal(v,flt_vec[k]), msg="flt. vector same after re-saving")
def predict_scp(model, scp_file_path, out_file_path, sess, vec_nom=False): # Testing periodically out_file_path = out_file_path + '/test/enroll.ark' out_file = open_or_fd(out_file_path, 'wb') lenth = 110000 with tqdm(total=lenth) as pbar: for index, (key, feature) in enumerate(read_mat_scp(scp_file_path)): pbar.update(1) test_input = feature feed_dict = {model.test_input: test_input} output_tensors = [model.embeddings] embeddings = sess.run(output_tensors, feed_dict) if vec_nom: embeddings /= np.sqrt(np.num(np.square(embeddings))) write_vec_flt(out_file, embeddings[0][0], key=key)
def predict_ark(model, ark_file_path, out_file_path, sess, vec_nom=False): # Testing periodically out_file_path = out_file_path + '/output.ark' out_file = open_or_fd(out_file_path, 'wb') input_group = [] key_group = [] for index, (key, feature) in enumerate(read_mat_ark(ark_file_path)): input_group.append(feature) key_group.append(key) feed_dict = {model.test_input: input_group} output_tensors = [model.embeddings] embeddings = sess.run(output_tensors, feed_dict) for i in range(len(key_group)): write_vec_flt(out_file, embeddings[0][i], key=key_group[i])
def compute_lda_score(enr_embeds, test_embeds, lda_dir, score_dir="./lda_score"): os.environ["KALDI_ROOT"] = "/host/projects/kaldi" enr_keys = ['enr{}'.format(i) for i in range(len(enr_embeds))] test_keys = ['test_{}'.format(i) for i in range(len(test_embeds))] keys = enr_keys + test_keys embeds = np.concatenate([enr_embeds, test_embeds]) if not os.path.isdir(score_dir): os.makedirs(score_dir) # write trials with open("{}/kaldi_trial".format(score_dir), "w") as f: trial_pairs = itertools.product(enr_keys, test_keys) for pair in trial_pairs: f.write(" ".join(pair)) f.write("\n") # write feat ark_scp_output = 'ark:| copy-vector ark:- ark,scp:{output}/feats.ark,{output}/feats.scp'.format( output=score_dir) with kaldi_io.open_or_fd(ark_scp_output, "wb") as f: for key, vec in zip(keys, embeds): kaldi_io.write_vec_flt(f, vec.squeeze(), key=str(key)) # scoring call ret = subprocess.call("./lda_score.sh {} {}".format(lda_dir, score_dir), shell=True) if ret != 0: print("lda scoring fails") raise ValueError # read lda scores lda_scores = pd.read_csv("{}/lda_scores".format(score_dir), delimiter=" ", names=["enroll", "test", "score"]).score lda_scores = np.array(lda_scores) lda_scores = lda_scores.reshape(len(enr_keys), -1) assert lda_scores.shape[1] == len(test_embeds) return lda_scores
def main(): args = get_args() os.makedirs(args.output_dir, exist_ok=True) device = torch.device(f'cuda:{args.gpu_idx - 1}') # Load model with open(f'{args.model_dir}/feat_dim', 'rt') as f: feat_dim = int(f.readline().strip()) with open(f'{args.model_dir}/num_spks', 'rt') as f: num_spks = int(f.readline().strip()) if not args.attention: model = Xvector(feat_dim, num_spks, extract=True) else: model = Xvector_AttnPooling(feat_dim, num_spks, extract=True) model = model.to(device) state = torch.load(f'{args.model_dir}/final.pth', map_location=device) model.load_state_dict(state) model.eval() # Extract X-vector for i in range(args.gpu_idx, args.nj + 1, args.num_gpus): dataset = ExtractorDataset(f"{args.data_dir}/split{args.nj}/{i}") ark = open(f'{args.output_dir}/xvector.{i}.ark', 'wb') scp = open(f'{args.output_dir}/xvector.{i}.scp', 'wt') desc = f" (xvector.{i}.ark,scp) " for feat, uttid in tqdm(dataset, desc=desc, leave=False, ncols=79): feat = feat.unsqueeze(0).to(device) with torch.no_grad(): embedding = model(feat).squeeze(0).to('cpu').detach().numpy() ark.write((uttid + ' ').encode("latin1")) point = ark.tell() write_vec_flt(ark, embedding) scp.write(f'{uttid} {args.output_dir}/xvector.{i}.ark:{point}\n') ark.close() scp.close()
def extract_embeddings_from_model(model_path: str, features: str, output_arkfile: str, **kwargs): model_dump = torch.load(model_path, lambda storage, loc: storage) config_parameters = model_dump['config'] model = model_dump['model'] scaler = model_dump['scaler'] encoder = model_dump['encoder'] feature_string = config_parameters['features'] if not features else features kaldi_string = parsecopyfeats( feature_string, **config_parameters['feature_args']) arkfile = os.path.join(os.path.dirname(model_path), output_arkfile) model.to(device).eval() with torch.no_grad(): with open(arkfile, "wb") as fp: for key, feat in kaldi_io.read_mat_ark(kaldi_string): feat = scaler.transform(feat) # Add singleton batch dim feat = torch.from_numpy(feat).unsqueeze(0).to(device) # Forward through model output = model.extract(feat) embedding = output.squeeze(0).cpu().numpy() kaldi_io.write_vec_flt(fp, embedding, key=key)
def main(args): assert args.prior_floor > 0.0 and args.prior_floor < 1.0 prior = np.zeros((args.prior_dim, ), dtype=np.int32) for path in args.alignment_files: with open(path, "r", encoding="utf-8") as f: for line in f: _, rxfile = line.strip().split(None, 1) try: ali = kaldi_io.read_vec_int(rxfile) except Exception: raise Exception( "failed to read int vector {}.".format(rxfile)) assert ali is not None and isinstance(ali, np.ndarray) for id in ali: prior[id] += 1 prior = np.maximum(prior / float(np.sum(prior)), args.prior_floor) # normalize and floor prior = prior / float(np.sum(prior)) # normalize again kaldi_io.write_vec_flt(args.output, prior) logger.info("Saved the initial state prior estimate in {}".format( args.output))
def write_ark(model, args): """ Write the extracted embeddings to ark file, having the same format as i-vectors """ dataset = KaldiDatasetUtt_ARK(args.feats_ark, args.feats_len) data_loader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1) model.eval() save_to = args.ark_file validate_path(save_to) with torch.no_grad(): with open(save_to, 'wb') as save_to_file: for batch_idx, (data, names) in tqdm(enumerate(data_loader), ncols=100, total=len(data_loader)): data = data.transpose(1, 2) data = data.to(args.cuda, dtype=torch.double) _, embedding_a, embedding_b = model(data) vectors = embedding_a.data.cpu().numpy() for i in range(len(vectors)): kaldi_io.write_vec_flt(save_to_file, vectors[i], names[i])
def plda_write(self, plda): with kaldi_io.open_or_fd(plda, 'wb') as f: kaldi_io.write_vec_flt(f, self.mean, key='mean') kaldi_io.write_vec_flt(f, self.within_var.reshape(-1, 1), key='within_var') kaldi_io.write_vec_flt(f, self.between_var.reshape(-1, 1), key='between_var')
def create_modified_ark(self): modified_ark_components = self.get_modified_ark_components() for file_name, components in modified_ark_components: with open('meld/data/kaldi/modified/' + self.get_modified_file_name(file_name), 'wb') as file_: for key, vector in components: kaldi_io.write_vec_flt(file_, np.array(vector), key)
if feat_mat.size > 0: key = hf[k].replace('.wav', '') Features[key] = feat_mat else: print("Problem with file: {}".format(key)) else: Features.append(feat_mat) ID.append(IDs) # Once the features of all files have been extracted save them # TODO: Save them within the loop, waiting for them to finish will become an issue later if flag_static == "static": if flag_kaldi: temp_file = 'temp_static' + file_features[:-4] + '.ark' with open(temp_file, 'wb') as f: for key in sorted(Features): write_vec_flt(f, Features[key], key=key) ark_file = file_features.replace('.txt', '') + '.ark' scp_file = file_features.replace('.txt', '') + '.scp' os.system("copy-vector ark:" + temp_file + " ark,scp:" + ark_file + ',' + scp_file) os.remove(temp_file) else: Features = np.asarray(Features) np.savetxt(file_features, Features) if flag_static == "dynamic": if flag_kaldi: temp_file = 'temp_dynamic' + file_features[:-4] + '.ark' with open(temp_file, 'wb') as f: for key in sorted(Features): write_mat(f, Features[key], key=key)
content = f.readlines() content = [x.strip() for x in content] # speaker to utterances mapping spk2mat = defaultdict(list) for line in content: (key, rxfile) = line.split() spk = key.split('-')[0] if spk in dev_test_spk.keys(): uttid = key.split('-')[1] + '_' + key.split('-')[2] if uttid not in dev_test_spk[spk]: continue spk2mat[spk].append(read_vec_flt(rxfile)) #for i in spk2mat.keys(): # if i in dev_test_spk.keys(): # print(len(spk2mat[i])) # create speaker embeddings out_file = sys.argv[2] ark_scp_output = 'ark:| copy-vector ark:- ark,scp:' + out_file + '.ark,' + out_file + '.scp' with open_or_fd(ark_scp_output, 'wb') as f: for spk, mat in spk2mat.items(): spk_emb = np.mean(mat, axis=0).reshape(-1, ) # get speaker embedding (vector) #print(spk_emb.shape) #print(spk) #print(spk_emb.shape) write_vec_flt(f, spk_emb, key=spk)
import sys import numpy as np import kaldi_io if __name__ == "__main__": if len(sys.argv) < 3: print( 'Usage: %s data_dir P:percentage_of_cv/T:num_utts_of_tr/V:num_utts_of_cv repeats' % sys.argv[0]) exit(1) npy_file = sys.argv[1] kaldi_file = sys.argv[2] mag_ark_scp_output = 'ark:| copy-vector ark:- ark,scp:{0}.ark,{0}.scp'.format( kaldi_file) num = 0 with open(npy_file, 'r', encoding='utf-8') as fread, kaldi_io.open_or_fd( mag_ark_scp_output, 'wb') as f_mag: for line in fread: line = line.strip().replace('\n', '') utt_id, feat_path = line.split(' ') output = np.load(feat_path) #print(utt#_id, enroll_mat.shape, enroll_outputs.shape, enroll_output.shape) kaldi_io.write_vec_flt(f_mag, output, key=utt_id) if num % 100 == 0: print(num) num += 1 print('finish')
def main(): parser = argparse.ArgumentParser() parser.add_argument( 'data_name', type=str, help='name of dataset' ) parser.add_argument( 'feats_file', type=str, help='full path of the data(xvector array to be used as input for prediction) file' ) parser.add_argument( 'utts_file', type=str, help='full path of the utterances numpy array(corresponding to the feats_file above) file' ) parser.add_argument( 'weights_path', type=str, help='Path of model weights as .h5 file' ) parser.add_argument( 'output_dir', type=str, help='output directory to save embeddings' ) parser.add_argument( 'save_kaldi_flag', type=int, default=1, help='whether to save the output embeddings as kaldi format scp files or not' ) args = parser.parse_args() # Load data util and config for network-modules model_config = MODEL_CONFIGS_DICT[args.data_name].ModelConfig() if os.path.exists(args.feats_file): data = numpy.load(args.feats_file) else: sys.exit("input xvector feature numpy array doesn't exist. Please check feature numpy array path. Exiting!!!") embeddings1, embeddings2 = \ get_predictions_test( model_config, args.weights_path, data) # Save predictions and embeddings (for further use to visualize and compute accuracy) numpy.save(os.path.join(args.output_dir,"embed1_test"), embeddings1) numpy.save(os.path.join(args.output_dir,"embed2_test"), embeddings2) print('\nEmbeddings saved at %s \n' % args.output_dir) if args.save_kaldi_flag == 1: if not os.path.exists(args.utts_file): sys.exit("numpy array containing utterance information doesn't exist. Exiting!!!") # Save embeddings as kaldi ark files (for use in evaluations of eer) utts = numpy.load(args.utts_file) with open(os.path.join(args.output_dir, "e1_test.ark"), 'wb') as o: for idx, e in enumerate(embeddings1): utt = utts[idx] kaldi_io.write_vec_flt(o, e, key=utt) with open(os.path.join(args.output_dir, "e2_test.ark"), 'wb') as o: for idx, e in enumerate(embeddings2): utt = utts[idx] kaldi_io.write_vec_flt(o, e, key=utt)
def extract(test_loader, model, xvector_dir, ark_num=50000): model.eval() if not os.path.exists(xvector_dir): os.makedirs(xvector_dir) print('Creating xvector path: %s' % xvector_dir) pbar = tqdm(enumerate(test_loader)) vectors = [] uids = [] for batch_idx, (data, uid) in pbar: # print(model.conv1.weight) # print(data) # raise ValueError('Conv1') vec_shape = data.shape # pdb.set_trace() if vec_shape[1] != 1: # print(data.shape) data = data.reshape(vec_shape[0] * vec_shape[1], 1, vec_shape[2], vec_shape[3]) if args.cuda: data = data.cuda() data = Variable(data) # compute output _, out = model(data) if vec_shape[1] != 1: out = out.reshape(vec_shape[0], vec_shape[1], out.shape[-1]).mean(dim=1) # pdb.set_trace() vectors.append(out.squeeze().data.cpu().numpy()) uids.append(uid[0]) del data, out if batch_idx % args.log_interval == 0: pbar.set_description('Extracting: [{}/{} ({:.0f}%)]'.format( batch_idx, len(test_loader.dataset), 100. * batch_idx / len(test_loader))) assert len(uids) == len(vectors) print('There are %d vectors' % len(uids)) scp_file = xvector_dir + '/xvectors.scp' scp = open(scp_file, 'w') # write scp and ark file # pdb.set_trace() for set_id in range(int(np.ceil(len(uids) / ark_num))): ark_file = xvector_dir + '/xvector.{}.ark'.format(set_id) with open(ark_file, 'wb') as ark: ranges = np.arange(len(uids))[int(set_id * ark_num):int((set_id + 1) * ark_num)] for i in ranges: vec = vectors[i] len_vec = len(vec.tobytes()) key = uids[i] kaldi_io.write_vec_flt(ark, vec, key=key) # print(ark.tell()) scp.write(str(uids[i]) + ' ' + str(ark_file) + ':' + str(ark.tell() - len_vec - 10) + '\n') scp.close() print('There are %d vectors. Saving to %s' % (len(uids), xvector_dir)) torch.cuda.empty_cache()
ood_vec = np.array(ood_vec) id_vec = [] for key, vec in kaldi_io.read_vec_flt_scp(id_vec_file): id_vec.append(vec) id_vec = np.array(id_vec) dim = id_vec.shape[1] # Covariance Cs = (1.0 / ood_vec.shape[0]) * np.dot(np.transpose(ood_vec), ood_vec) + np.eye(dim) Ct = (1.0 / id_vec.shape[0]) * np.dot(np.transpose(id_vec), id_vec) + np.eye(dim) # Compute Cs^(-1/2) ws, vs = np.linalg.eig(Cs) whitening = np.dot(np.dot(vs, np.diag(ws**(-0.5))), np.transpose(vs)) wt, vt = np.linalg.eig(Ct) coloring = np.dot(np.dot(vt, np.diag(wt**(0.5))), np.transpose(vt)) # Transform the out-of-domain vectors transform_ood_vec = np.dot(np.dot(ood_vec, whitening), coloring) if not os.path.isdir(transform_dir): os.makedirs(transform_dir) with open(os.path.join(transform_dir, 'xvector_transformed.ark'), 'wb') as f: for index, key in enumerate(ood_keys): kaldi_io.write_vec_flt(f, transform_ood_vec[index], key=key)
import kaldi_io import numpy as np import sys dataset = sys.argv[1] rfilename = '/scratch/xli/kaldi/egs/voxceleb_xli/v3_stft/data/voxceleb1_'+dataset+'/feats.scp' ark_scp_output='ark:| copy-vector ark:- ark,scp:data/LPS/'+dataset+'_vad.ark,data/voxceleb1_'+dataset+'/vad.scp' keys = [] vectors = [] for key, mat in kaldi_io.read_mat_scp(rfilename): rst = [] keys.append(key) num_frames, feats_dim = mat.shape for i in range(num_frames): if mat[i].sum() > feats_dim*(-60): rst.append(1.0) else: rst.append(0.0) rst = np.array(rst) vectors.append(rst) print('totally %d utts.' %(len(keys))) with kaldi_io.open_or_fd(ark_scp_output, 'wb') as f: for key, vec in zip(keys, vectors): kaldi_io.write_vec_flt(f, vec, key=key)
try: if args.cuda: feats = feats.cuda(device) model = model.cuda(device) emb = model.forward(feats) except: feats = feats.cpu() model = model.cpu() emb = model.forward(feats) embeddings[utt] = emb.detach().cpu().numpy().squeeze() print('Storing embeddings in output file') out_name = args.path_to_data.split('/')[-2] file_name = args.out_path + out_name + '.ark' if os.path.isfile(file_name): os.remove(file_name) print(file_name + ' Removed') with open_or_fd(file_name, 'wb') as f: for k, v in embeddings.items(): write_vec_flt(f, v, k) print('End of embeddings computation.')
def forward(self, mode, features, utter_idx, labels, records, **kwargs): """ Args: features: the features extracted by upstream put in the device assigned by command-line args labels: the speaker labels records: defaultdict(list), by appending scalars into records, these scalars will be averaged and logged on Tensorboard logger: Tensorboard SummaryWriter, given here for logging/debugging convenience, please use "self.downstream/your_content_name" as key name to log your customized contents global_step: global_step in runner, which is helpful for Tensorboard logging Return: loss: the loss to be optimized, should not be detached """ features_pad = pad_sequence(features, batch_first=True) if self.modelrc['module'] == "XVector": # TDNN layers in XVector will decrease the total sequence length by fixed 14 attention_mask = [torch.ones((feature.shape[0] - 14)) for feature in features] else: attention_mask = [torch.ones((feature.shape[0])) for feature in features] attention_mask_pad = pad_sequence(attention_mask,batch_first=True) attention_mask_pad = (1.0 - attention_mask_pad) * -100000.0 features_pad = self.connector(features_pad) if mode == 'train': agg_vec = self.model(features_pad, attention_mask_pad.cuda()) labels = torch.LongTensor(labels).to(features_pad.device) loss = self.objective(agg_vec, labels) records['loss'].append(loss.item()) return loss elif mode in ['dev', 'test']: agg_vec = self.model.inference(features_pad, attention_mask_pad.cuda()) agg_vec = agg_vec / (torch.norm(agg_vec, dim=-1).unsqueeze(-1)) # separate batched data to pair data. vec1, vec2 = self.separate_data(agg_vec) scores = self.score_fn(vec1, vec2).cpu().detach().tolist() records['scores'].extend(scores) records['labels'].extend(labels) return torch.tensor(0) elif mode in ['train_plda', 'test_plda'] and is_leader_process(): agg_vec = self.model.inference(features_pad, attention_mask_pad.cuda()) agg_vec = agg_vec / (torch.norm(agg_vec, dim=-1).unsqueeze(-1)) for key, vec in zip(utter_idx, agg_vec): vec = vec.view(-1).detach().cpu().numpy() kaldi_io.write_vec_flt(self.ark, vec, key=key)
continue print(utt, end=' ', file=score_txt) flags = flags_dict[utt] ali = ali_dict[utt] scores = np.zeros_like(flags, dtype=float) # Split frames into segments segment_idxs = get_segment_idxs(ali) feats_segments = np.split(feats, segment_idxs) feats_segments = pad_sequences(feats_segments) ali_segments = np.split(ali, segment_idxs) predicts = model.predict(feats_segments) for i in range(len(ali_segments)): phone = ali_segments[i][0] flag = flags[i] predict = predicts[i] # Mark silence and insertion errors with NaN value if flag > 2: scores[i] = float('nan') else: scores[i] = predict[phn_map[phone]] print("%.2f" % scores[i], end=' ', file=score_txt) print("", file=score_txt) kio.write_vec_flt(f, scores, key=utt) score_txt.close()
for i in indices ], dtype=h5py.special_dtype(vlen=str)) elif ((out_format == "kaldi") or (out_format == "both")): # Write h5 for the subset if (name_sep == ""): kaldi_ark_out = data_dir + "/" + sss + suffix + '.ark' else: kaldi_ark_out = data_dir + "/" + sss.replace( "_HZ", "") + suffix + '.ark' with open(kaldi_ark_out, 'wb') as f: for d, k in zip(data[sub_indices], [physical[i] for i in indices]): kaldi_io.write_vec_flt(f, d, key=k) else: log.error("ERROR: Wrong out format requested") # Write scp for the subset if (name_sep == ""): scp_out = data_dir + "/" + sss + suffix + '.scp' else: scp_out = data_dir + "/" + sss.replace("_HZ", "") + suffix + '.scp' log.info("Saving scp to " + scp_out) with open(scp_out, 'w') as f: for i in sub_indices: f.write(physical_2_name[physical[i]] + " " + physical[i] + "\n")