def prepData(self): self.nmesc_thres_list = [] if self.param.affinity_score_file.split('.')[-1] == "scp": print("=== [INFO] .scp file and .ark files were provided") self.key_mat_generator_dist_score = list(kaldi_io.read_mat_scp(self.param.affinity_score_file)) elif self.param.affinity_score_file.split('.')[-1] == "txt": print("=== [INFO] .txt file and .npy files were provided") self.key_mat_generator_dist_score = self.npy_to_generator() if self.param.affinity_score_for_spk_count != 'None': if self.param.affinity_score_for_spk_count.split('.')[-1] == "scp": print("=== [INFO] Speaker Count .scp file and .ark files were provided") self.key_mat_generator_dist_score_spkcount = list(kaldi_io.read_mat_scp(self.param.affinity_score_for_spk_count)) elif self.param.affinity_score_file.split('.')[-1] == "txt": print("=== [INFO] Speaker Count .txt file and .npy files were provided") self.key_mat_generator_dist_score_spkcount = self.npy_to_generator() elif self.param.segment_spkcount_input_path == "None": print("=== [INFO] No speaker Count .scp file and .ark files. Using the original mat files") self.key_mat_generator_dist_score_spkcount = self.key_mat_generator_dist_score if self.param.spt_est_thres in ["EigRatio", "NMESC"]: pass elif self.param.spt_est_thres != "None": cont_spt_est_thres = modules.read_txt(self.param.spt_est_thres) self.spt_est_thres_dict = { x.split()[0]:float(x.split()[1]) for x in cont_spt_est_thres }
def Evaluation(): #Finding threshold value eigen_values, eigen_vectors = LDA() P = eigen_vectors[:,-1] P = P[:,None] #load data list_M_speaker = [] list_F_speaker = [] for key,mat in read_mat_scp("exp/ivectors_sre10_test_male/ivector.scp"): list_M_speaker.append(mat) for key,mat in read_mat_scp("exp/ivectors_sre10_test_female/ivector.scp"): list_F_speaker.append(mat) arr_M_speaker = np.vstack(list_M_speaker) arr_F_speaker = np.vstack(list_F_speaker) y1 = np.dot(arr_M_speaker, P) y2 = np.dot(arr_F_speaker, P) Prev_diff = 100 Mini_threshold = -2 for threshold in np.arange(-1, 1, 0.001): M_error_rate = np.mean(y1 > threshold) F_error_rate = np.mean(y2 < threshold) #print "Male error rate is", M_error_rate, "and Female error rate is", F_error_rate, "while threshold value is", threshold Cur_diff = abs(M_error_rate - F_error_rate) if Cur_diff < Prev_diff: Prev_diff = Cur_diff Mini_threshold = threshold print ("The optimal threshold for the classifier is", Mini_threshold, "with Error rate difference", Prev_diff)
def plot(): #Visualize the data eigen_values, eigen_vectors = LDA() P = eigen_vectors[:,-1] P = P[:,None] #load data list_M_speaker = [] list_F_speaker = [] for key,mat in read_mat_scp("exp/ivectors_sre10_test_male/ivector.scp"): list_M_speaker.append(mat) for key,mat in read_mat_scp("exp/ivectors_sre10_test_female/ivector.scp"): list_F_speaker.append(mat) arr_M_speaker = np.vstack(list_M_speaker) arr_F_speaker = np.vstack(list_F_speaker) y1 = np.dot(arr_M_speaker, P) y2 = np.dot(arr_F_speaker, P) fig = plt.figure() bins = np.linspace(-2, 2, 250) plt.hist(y1, bins, alpha=0.65, label='male') plt.hist(y2, bins, alpha=0.65, label='female') plt.legend(loc='upper right') plt.show() fig.savefig('temp.png')
def extract_adv_voiced_feats(grads, vads, ori_feats, sigma): grads_data = {key: mat for key, mat in kaldi_io.read_mat_scp(grads)} vad_data = {key: vec for key, vec in kaldi_io.read_vec_flt_scp(vads)} ori_data = {key: mat for key, mat in kaldi_io.read_mat_scp(ori_feats)} num_spoofed = len(grads_data.keys()) num_vad = len(vad_data.keys()) num_ori = len(ori_data.keys()) trial_keys = list(grads_data.keys()) assert num_vad == num_ori, \ "Length does not match! (%d %d)" %(num_vad, num_ori) gen_mat = [] for key in trial_keys: print('Process %s utts.' % (key)) grads_mat = grads_data.get(key) testkey = key[26:] vad_vec = vad_data.get(testkey) ori_mat = ori_data.get(testkey) assert vad_vec is not None, 'No vad for %s %s' % (key, testkey) assert ori_mat is not None, 'No original feats for %s %s' % (key, testkey) sen_mat = [] k = 0 for j in range(len(vad_vec)): if vad_vec[j] == 1.0: sen_mat.append(grads_mat[k] * sigma + ori_mat[j]) k = k + 1 sen_mat = np.stack(sen_mat, 0) gen_mat.append(sen_mat) return trial_keys, gen_mat
def LDA(): """ First, read in i vectors from scp file and create numpy array for male and female speaker respectively. Secondly, calculate within-class and between-class covariances. Lastly, train and return eigenvales and eigenvectors. """ #initiate lists list_male = [] list_female = [] #Create a list of i-vectors for key,mat in read_mat_scp("exp/ivectors_sre_male/ivector.scp"): list_male.append(mat) for key,mat in read_mat_scp("exp/ivectors_sre_female/ivector.scp"): list_female.append(mat) #Transform lists to matrices arr_male = np.vstack(list_male) arr_female = np.vstack(list_female) #Calculate in-class and between class covariances #First, calculate mean of classes and global mean of the data i_vector_dim = arr_male.shape[1] #i-vector dimension N_female_ivector = arr_female.shape[0] #No.female ivectors N_male_ivector = arr_male.shape[0] #No.male ivectors arr_male_mean = np.mean(arr_male, axis = 0) arr_female_mean = np.mean(arr_female, axis = 0) global_mean = np.zeros(i_vector_dim) #global mean for i in range(i_vector_dim): global_mean[i] = (arr_male_mean[i]*N_male_ivector+ arr_female_mean[i]*N_female_ivector)/(N_female_ivector+N_male_ivector) #Implement Between-class covariance CovB = np.zeros((i_vector_dim, i_vector_dim)) #initiate covariance matrix CovB += np.dot(arr_male_mean-global_mean, (arr_male_mean-global_mean).T) CovB += np.dot(arr_female_mean-global_mean, (arr_female_mean-global_mean).T) CovB /= 2 #Implement Within-class covariances CovW1 = np.zeros((i_vector_dim, i_vector_dim)) CovW2 = np.zeros((i_vector_dim, i_vector_dim)) #initiate covariance matrices for i in range(N_male_ivector): CovW1 += np.dot(arr_male[i]-arr_male_mean, (arr_male[i]-arr_male_mean).T) CovW1 /= N_male_ivector for i in range(N_female_ivector): CovW2 += np.dot(arr_female[i]-arr_female_mean, (arr_female[i]-arr_female_mean).T) CovW2 /= N_female_ivector CovW = (CovW1+CovW2)/2 #train LDA eigen_values, eigen_vectors = scipy.linalg.eig(CovB, CovW) return eigen_values, eigen_vectors #return tuples
def kaldi2npz(scp_file, npz_file, utt2spk_file): '''load kaldi scp(ark) format and save as npz format''' print("Loading kaldi scp file...") utts = [] mats = [] for k, v in kaldi_io.read_mat_scp(scp_file): utts.append(k) mats.append(v) utt2spk = create_utt2spk_map(utt2spk_file) feats = [] spker_label = [] utt_label = [] idx = 0 for mat in mats: for i in mat: feats.append(i) spker_label.append(utt2spk[utts[idx]]) utt_label.append(utts[idx]) idx += 1 if not os.path.exists(os.path.dirname(npz_file)): os.makedirs(os.path.dirname(npz_file)) np.savez(npz_file, feats=feats, spker_label=spker_label, utt_label=utt_label) print("Convert {} to {} ".format(scp_file, npz_file))
def load_ranges_data(utt_to_chunks, minibatch_info, minibatch_size, scp_file_path, fea_dim, logger=None): num_err, num_done = 0, 0 if logger is not None: logger.info('Start allocating memories for loading training examples ...') all_data = np.ndarray(len(minibatch_info), dtype=object) labels = np.ndarray(len(minibatch_info), dtype=object) for i in range(len(minibatch_info)): all_data[i] = np.zeros((minibatch_size, minibatch_info[i][1], fea_dim), dtype=np.float32) labels[i] = np.zeros(minibatch_size, dtype=np.int32) if logger is not None: logger.info('Start loading training examples to the memory ...') for key, mat in kaldi_io.read_mat_scp(scp_file_path): got = utt_to_chunks.get(key) if key is None: if logger is not None: logger.info("Could not create examples from utterance '%s' " "because it has no entry in the ranges input file." % key) num_err += 1 else: num_done += 1 for minibatch_index, offset, length, label in got: info = minibatch_info[minibatch_index] mm = mat[offset:offset + length, :] dat = all_data[minibatch_index] assert dat.shape[1] == mm.shape[0] and dat.shape[2] == mm.shape[1] dat[info[2], :, :] = mm labels[minibatch_index][info[2]] = label info[2] += 1 if logger is not None: logger.info('Loading features finished with {0} errors and {1} success from total {2} files.'. format(num_err, num_done, num_err + num_done)) return all_data, labels
def _file_to_feats(self, file): assert file.suffix == '.wav' # To support CMVN files in the future cmvn_spec = None def _run_cmd(cmd): logging.warn("Running {}".format(cmd)) try: check_call(cmd, shell=True, universal_newlines=True) except CalledProcessError as e: logging.error("Failed with code {}:".format(e.returncode)) logging.error(e.output) raise e with TemporaryDirectory() as temp_dir: temp_dir = Path(temp_dir) # Create config placeholder conf_file = temp_dir / 'fbank.conf' conf_file.write_text('--num-mel-bins=80\n') # Create SCP placeholder input_scp = temp_dir / 'input.scp' input_scp.write_text('file-0 {}\n'.format(file)) # Compute speech features feat_ark = temp_dir / "feat.ark" feat_scp = temp_dir / "feat.scp" cmd = f"compute-fbank-feats --config={conf_file} scp:{input_scp} ark,scp:{feat_ark},{feat_scp}" _run_cmd(cmd) cmvn_scp = temp_dir / "cmvn.scp" if cmvn_spec is not None: # If CMVN specifier is provided, we create a dummy scp cmvn_scp.write_text("file-0 {cmvn_spec}\n") else: # Compute CMVN stats cmvn_ark = temp_dir / "cmvn.ark" cmd = f"compute-cmvn-stats scp:{feat_scp} ark,scp:{cmvn_ark},{cmvn_scp}" _run_cmd(cmd) # Apply CMVN final_ark = temp_dir / "final.ark" final_scp = temp_dir / "final.scp" cmd = f"apply-cmvn --norm-vars=true scp:{cmvn_scp} scp:{feat_scp} ark,scp:{final_ark},{final_scp}" _run_cmd(cmd) with final_scp.open('rb') as fp: feats = [ features for _, features in kaldi_io.read_mat_scp(fp) ][0] # Process data feats_new = feats reps = self._model(np.array(feats_new)) return reps
def testMatrixReadWrite(self): """ Test read/write for float matrices. """ # read, flt_mat = { k: m for k, m in kaldi_io.read_mat_scp('tests/data/feats_ascii.scp') } # ascii-scp, flt_mat2 = { k: m for k, m in kaldi_io.read_mat_ark('tests/data/feats_ascii.ark') } # ascii-ark, flt_mat3 = { k: m for k, m in kaldi_io.read_mat_ark('tests/data/feats.ark') } # ascii-ark, # store, with kaldi_io.open_or_fd('tests/data_re-saved/mat.ark', 'wb') as f: for k, m in flt_mat3.items(): kaldi_io.write_mat(f, m, k) # read and compare, for k, m in kaldi_io.read_mat_ark('tests/data_re-saved/mat.ark'): self.assertTrue(np.array_equal(m, flt_mat3[k]), msg="flt. matrix same after re-saving")
def __init__(self, partition, config): """Initialize dataset.""" self.is_train = (partition == "train") self.feature_dim = config.feature_dim data_scp = getattr(config, "%sfile" % partition) labels, data = zip(*read_mat_scp(data_scp)) words = [re.split("_", x)[0] for x in labels] uwords = np.unique(words) word2id = {v: k for k, v in enumerate(uwords)} ids = [word2id[w] for w in words] feature_mean, n = 0.0, 0 for x in data: feature_mean += np.sum(x) n += np.prod(x.shape) self.feature_mean = feature_mean / n self.data = np.array([x - self.feature_mean for x in data]) self.ids = np.array(ids, dtype=np.int32) self.id_counts = Counter(ids) self.num_classes = len(self.id_counts) self.num_examples = len(self.ids)
def _read(self, file_path: str) -> Iterable[Instance]: # pylint: disable=arguments-differ logger.info('Loading data from %s', file_path) dropped_instances = 0 feats: List[Tuple[str, np.ndarray]] = [ (key, mat) for key, mat in kaldi_io.read_mat_scp(file_path) ] feats = sorted(feats, key=lambda pair: pair[1].shape[0]) dataset_size = len(feats) batched_indices = list(range(0, dataset_size, self._shard_size)) random.shuffle(batched_indices) for start_idx in batched_indices: end_idx = min(start_idx + self._shard_size, dataset_size) for idx in range(start_idx, end_idx): key, src = feats[idx] tgt = self.transcripts[key] instance = self.text_to_instance(src, tgt) tgt_len = instance.fields['target_tokens'].sequence_length() if tgt_len < 1: dropped_instances += 1 else: yield instance del src del instance if not dropped_instances: logger.info("No instances dropped from {}.".format(file_path)) else: logger.warning("Dropped {} instances from {}.".format( dropped_instances, file_path))
def load_feats_train(u): start_from_zero = False #n_avl_samp = np.array( [train_scp_info['durations'][uu] for uu in u] ) n_avl_samp = np.array( [train_scp_info['utt2sideInfo'][uu] for uu in u]) files = [train_scp_info['utt2file'][uu] for uu in u] min_n_avl_samp = np.min(n_avl_samp) max_len = np.min( [min_n_avl_samp + 1, max_length] ) # Need to add 1 because max_len because the intervall is [min_len, max_len)?????!!!!??? n_sel_samp = rng_f_tr.randint(min_length, max_len) # not [min_len, max_len] start = [] end = [] for i, f in enumerate(files): # The start_from_zero option is mainly for debugging/development if start_from_zero: start.append(0) else: last_possible_start = n_avl_samp[i] - n_sel_samp start.append( rng_f_tr.randint(0, last_possible_start + 1)[0] ) # This means the intervall is [0,last_possible_start + 1) = [0, last_possible_start] end.append(start[-1] + n_sel_samp) ff = [ "xxx {}[{}:{},:]".format(files[i], start[i], end[i]) for i in range(len(files)) ] data = [rr[1] for rr in kaldi_io.read_mat_scp(ff)] data = np.stack(data, axis=0) print(data.shape) return data
def iter(self): for key, feat in kio.read_mat_scp(self.fn_scp): feat = norm_feats(feat, norm_mean=self.norm_mean, norm_var=self.norm_var) feat = splice_feat(feat, self.left, self.right, self.skip) yield key, feat
def load_kaldi_feats_segm_same_dur(rng, files, min_length, max_length, n_avl_samp, start_from_zero): min_n_avl_samp = np.min(n_avl_samp) max_len = np.min( [min_n_avl_samp + 1, max_length] ) # Need to add 1 because max_len because the intervall is [min_len, max_len)?????!!!!??? n_sel_samp = rng.randint(min_length, max_len) # not [min_len, max_len] start = [] end = [] for i, f in enumerate(files): # The start_from_zero option is mainly for debugging/development if start_from_zero: start.append(0) else: last_possible_start = n_avl_samp[i] - n_sel_samp start.append( rng.randint(0, last_possible_start + 1)[0] ) # This means the intervall is [0,last_possible_start + 1) = [0, last_possible_start] end.append(start[-1] + n_sel_samp) ff = [ "xxx {}[{}:{},:]".format(files[i], start[i], end[i]) for i in range(len(files)) ] data = [rr[1] for rr in kaldi_io.read_mat_scp(ff)] data = np.stack(data, axis=0) #print(data.shape) return data
def main(): parser = argparse.ArgumentParser() parser.add_argument('-read_feats_scp_file', required=True) parser.add_argument('-read_vocab_file', required=True) parser.add_argument('-max_token_seq_len', type=int, required=True) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-n_head', type=int, default=8) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-d_inner_hid', type=int, default=1024) parser.add_argument('-d_k', type=int, default=64) parser.add_argument('-d_v', type=int, default=64) parser.add_argument('-dropout', type=float, default=0.1) parser.add_argument('-proj_share_weight', action='store_true') parser.add_argument('-embs_share_weight', action='store_true') parser.add_argument('-save_model_file', required=True) opt = parser.parse_args() print('--------------------[PROCEDURE]--------------------') print( '[PROCEDURE] reading dimension from data file and initialize the model' ) for key, matrix in kaldi_io.read_mat_scp(opt.read_feats_scp_file): opt.src_dim = matrix.shape[1] break print('[INFO] get feature of dimension {} from {}.'.format( opt.src_dim, opt.read_feats_scp_file)) word2idx = torch.load(opt.read_vocab_file) opt.tgt_vocab_dim = len(word2idx) print('[INFO] get label of dimension {} from {}.'.format( opt.tgt_vocab_dim, opt.read_vocab_file)) print('[INFO] model will initialized with add_argument:\n{}.'.format(opt)) model = Transformer(opt.src_dim, opt.tgt_vocab_dim, opt.max_token_seq_len, n_layers=opt.n_layers, n_head=opt.n_head, d_model=opt.d_model, d_inner_hid=opt.d_inner_hid, d_k=opt.d_k, d_v=opt.d_v, dropout=opt.dropout, proj_share_weight=opt.proj_share_weight, embs_share_weight=opt.embs_share_weight) checkpoint = {'model': model, 'model_options': opt, 'epoch': 0} torch.save(checkpoint, opt.save_model_file) #can be readed by: #checkpoint = torch.load(opt.save_model_file) #model = checkpoint['model'] print('[INFO] initialized model is saved to {}.'.format( opt.save_model_file))
def write_kaldi(orig_feat_scp, ark_scp_output, max_len): """Write the slice feature matrix to ark_scp_output """ with ko.open_or_fd(ark_scp_output,'wb') as f: for key,mat in ko.read_mat_scp(orig_feat_scp): tensor = tensor_cnn_utt(mat, max_len) if tensor.shape[1] != max_len: print(tensor.shape) ko.write_mat(f, tensor, key=key)
def main(): args = get_args() inp_feats_scp = args.inp_feats_scp out_feats_ark = args.out_feats_ark prefix = args.prefix for utt, feats in kaldi_io.read_mat_scp(inp_feats_scp): if prefix is not None: utt = '{p}-{u}'.format(p=prefix, u=utt) np.save('{u}.npy'.format(u=utt), feats)
def __init__(self, data_path, uttid_list, class_num): data_dic = { k:m for k,m in kaldi_io.read_mat_scp(data_path) } self.utt_list = list(data_dic.keys()) self.data_list = list(data_dic.values()) self.uttid_list = uttid_list self.class_num = class_num assert len(uttid_list)==len(self.data_list), "The lengths of uttid_list and data unmatch!"
def construct_tensor(orig_feat_scp, ark_scp_output, tuncate_len): with ko.open_or_fd(ark_scp_output, 'wb') as f: for key, mat in ko.read_mat_scp(orig_feat_scp): tensor = tensor_cnn_utt(mat, truncate_len) repetition = int(tensor.shape[1] / truncate_len) for i in range(repetition): sub_tensor = tensor[:, i * truncate_len:(i + 1) * truncate_len] new_key = key + '-' + str(i) ko.write_mat(f, sub_tensor, key=new_key)
def SequenceGenerator(file_name, out_file): model.eval() f = open(out_file, 'w') with torch.no_grad(): for lab, x in read_mat_scp(file_name): y=[x] y_pred=model.predict(torch.from_numpy(np.array(y,dtype=np.float32)).to(device)).cpu().data.numpy().flatten() f.write(lab+' [ '+' '.join(map(str, y_pred.tolist()))+' ]\n') f.close()
def plot(): #Visualize the data eigen_values, eigen_vectors = LDA() P = eigen_vectors[:,-1] P = P[:,None] #load data list_M_speaker = [] list_F_speaker = [] for key,mat in read_mat_scp("exp/ivectors_sre10_test_male/ivector.scp"): list_M_speaker.append(mat) for key,mat in read_mat_scp("exp/ivectors_sre10_test_female/ivector.scp"): list_F_speaker.append(mat) arr_M_speaker = np.vstack(list_M_speaker) arr_F_speaker = np.vstack(list_F_speaker) y1 = np.dot(arr_M_speaker, P) y2 = np.dot(arr_F_speaker, P) fig = plt.figure()
def run(config): mm_dict = {} delta_list = config.delta_list.strip().split(',') delta_list = [int(x) for x in delta_list] for key, mat in kaldi_io.read_mat_scp(config.post_scp): print('Computing m-measure score for utterance {}'.format(key)) sys.stdout.flush() mm_dict[key] = compute_mmeasure(mat, delta_list) return mm_dict
def load_cmvn(filename): ''' Load the cmvn file. Requires filename. ''' gen = kaldi_io.read_mat_scp(filename) spk2cmvn = {} for k, m in gen: total = m[0, -1] spk2cmvn[k] = {'mu': m[0, :-1] / total, 'var': m[1, :-1] / total} return spk2cmvn
def __init__(self, dir, transform): feat_scp = dir + '/feats.scp' spk2utt = dir + '/spk2utt' trials = dir + '/trials' if not os.path.exists(feat_scp): raise FileExistsError(feat_scp) if not os.path.exists(spk2utt): raise FileExistsError(spk2utt) if not os.path.exists(trials): raise FileExistsError(trials) dataset = {} with open(spk2utt, 'r') as u: all_cls = u.readlines() for line in all_cls: spk_utt = line.split(' ') spk_name = spk_utt[0] if spk_name not in dataset.keys(): spk_utt[-1] = spk_utt[-1].rstrip('\n') dataset[spk_name] = spk_utt[1:] speakers = [spk for spk in dataset.keys()] speakers.sort() print('==> There are {} speakers in Test Dataset.'.format( len(speakers))) uid2feat = {} for utt_id, feat in kaldi_io.read_mat_scp(feat_scp): uid2feat[utt_id] = feat print('\tThere are {} utterances in Test Dataset.'.format( len(uid2feat))) trials_pair = [] with open(trials, 'r') as t: all_pairs = t.readlines() for line in all_pairs: pair = line.split(' ') if pair[2] == 'nontarget\n': pair_true = False else: pair_true = True trials_pair.append((pair[0], pair[1], pair_true)) print('\tThere are {} pairs in test Dataset.\n'.format( len(trials_pair))) self.feat_dim = uid2feat[dataset[speakers[0]][0]].shape[1] self.speakers = speakers self.uid2feat = uid2feat self.trials_pair = trials_pair self.num_spks = len(speakers) self.transform = transform
def load_mat_ark_from_scp(basedir, scpfile): cwd = os.getcwd() os.chdir(basedir) key = list() val = list() for k, v in kaldi_io.read_mat_scp(scpfile): key.append(k) val.append(v) mat = np.vstack(val) os.chdir(cwd) return key, mat
def main(): args = get_args() inp_feats_scp = args.inp_feats_scp out_feats_ark = args.out_feats_ark ark_scp_output = 'ark:| copy-feats --compress=true ark:- ark,scp:{p}.ark,{p}.scp'.format( p=out_feats_ark) with kaldi_io.open_or_fd(ark_scp_output, 'wb') as f: for utt, feats in kaldi_io.read_mat_scp(inp_feats_scp): mfcc = convert_mfcc_to_fbank(feats) np.save('ark_check4/{u}.npy'.format(u=utt), mfcc) kaldi_io.write_mat(f, mfcc, key=utt)
def __init__(self, scp_file, utt2label_file, M): 'Initialization' self.M = M self.scp_file = scp_file self.utt2len = ako.read_key_len(scp_file) self.utt2label = ako.read_key_label(utt2label_file) self.feat_gen = ko.read_mat_scp(scp_file) # feature generator self.utt2mat = {} for key, mat in self.feat_gen: self.utt2mat[key] = mat
def main(): model = LstmMfcc(HIDDEN_DIM, MFCC_DIM, SPK) model.cuda() # use gpu loss_function = nn.NLLLoss() optimizer = optim.SGD(model.parameters(), lr=0.1) file = '/export/b17/jlai/michael/mfcc/all_mfcc.scp' # pre-run score for _,mat in read_mat_scp(file): tensor = autograd.Variable(torch.from_numpy(mat)) tag_scores = model(tensor) print(tag_scores) break # train for 6 epoches (70%) for epoch in range(6): for key,mat in read_mat_scp(file): #if key not in key2num: # continue tensor = autograd.Variable(torch.from_numpy(mat)) tensor = tensor.cuda() # use gpu targets = autograd.Variable(torch.from_numpy()) targets = targets.cuda() model.zero_grad() model.hidden = model.init_hidden() # forward pass tag_scores = model(tensor) # backprop loss = loss_function(tag_scores, targets) loss.backward() optimizer.step() # post-run score (30%) for _,mat in read_mat_scp(file): tensor = torch.from_numpy(mat) tag_scores = model(tensor) print(tag_scores) break
def __init__(self, scp_file, utt2label_file): 'Initialization' self.scp_file = scp_file self.utt2label = ako.read_key_label(utt2label_file) self.feat_gen = ko.read_mat_scp(scp_file) # feature generator mats, labels = [], [] # construct feature and label matrices for key, mat in self.feat_gen: mats.append(mat) labels.append(np.repeat(self.utt2label[key], len(mat))) self.label_mat = np.hstack(labels) self.feat_mat = np.vstack(mats)
def convert_to_npy(datatype, arkscppath, outputnpyfilepath, file_list_path): if not os.path.isdir(outputnpyfilepath): print('Creating directory where npy scores will be saved : {}'.format( outputnpyfilepath)) os.makedirs(outputnpyfilepath) else: print("xvectors numpy path exists !") # exit() file_name = os.path.basename(arkscppath) ext = os.path.splitext(file_name)[1] if datatype == 'mat': #for score files if ext == ".scp": d = {key: mat for key, mat in kaldi_io.read_mat_scp(arkscppath)} else: print("File type not correct. scp required.") elif datatype == 'vec': #for embeddings if ext == ".scp": d = { key: mat for key, mat in kaldi_io.read_vec_flt_scp(arkscppath) } elif ext == ".ark": d = { key: mat for key, mat in kaldi_io.read_vec_flt_ark(arkscppath) } else: print("File type not correct. scp/ark required.") else: print("first argument should be mat/vec ") file_list = open(file_list_path, 'r').readlines() file_count = 0 for count, (i, j) in enumerate(d.items()): if count == 0: system = j.reshape(1, -1) # if count % 100 == 0: # print("Done with {} files".format(count)) fn = file_list[file_count].rsplit()[0] if fn in i: system = np.vstack((system, j)) else: print('fielname:', fn) if not os.path.isfile(outputnpyfilepath + '/' + fn + '.npy'): np.save(outputnpyfilepath + '/' + fn + '.npy', system) file_count = file_count + 1 system = j.reshape(1, -1) # last file print('fielname:', fn) if not os.path.isfile(outputnpyfilepath + '/' + fn + '.npy'): np.save(outputnpyfilepath + '/' + fn + '.npy', system)
def testMatrixReadWrite(self): """ Test read/write for float matrices. """ # read, flt_mat = { k:m for k,m in kaldi_io.read_mat_scp('tests/data/feats_ascii.scp') } # ascii-scp, flt_mat2 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats_ascii.ark') } # ascii-ark, flt_mat3 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats.ark') } # ascii-ark, # store, with kaldi_io.open_or_fd('tests/data_re-saved/mat.ark','wb') as f: for k,m in flt_mat3.items(): kaldi_io.write_mat(f, m, k) # read and compare, for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat.ark'): self.assertTrue(np.array_equal(m, flt_mat3[k]), msg="flt. matrix same after re-saving")