def test_mk_data_dict(self): X, Y, fnames = self.gen_data(self.dims, self.n, self.K, self.shared_cov, self.dist_bw_means) sorting_idxs = np.argsort(fnames) X, Y, fnames = X[sorting_idxs], Y[sorting_idxs], fnames[sorting_idxs] order = np.arange(0, Y.shape[0]) np.random.seed(0) np.random.shuffle(order) model_1 = PLDA(X[order], Y[order], fnames[order]) model_2 = PLDA(X, Y) keys = list(model_1.data.keys()) tolerance = 1e-100 for i, key in enumerate(keys): n_1 = model_1.data[key]['n'] n_2 = model_2.data[key]['n'] X_1 = model_1.data[key]['X'] X_2 = model_2.data[key]['X'] mean_1 = model_1.data[key]['mean'] mean_2 = model_2.data[key]['mean'] cov_1 = model_1.data[key]['cov'] cov_2 = model_2.data[key]['cov'] fnames_1 = model_1.data[key]['fnames'] fnames_2 = model_2.data[key]['fnames'] # Assert that data dicts in both models are equal, except fnames. self.assertEqual(n_1, n_2) X_1 = np.asarray(X_1)[np.argsort(fnames_1)] X_2 = np.asarray(X_2) # X_2 is already sorted. self.assert_same(X_1, X_2, tolerance=tolerance) self.assert_same(mean_1, mean_2, tolerance=tolerance) self.assert_same(cov_1, cov_2, tolerance=tolerance) self.assertEqual(model_1.data[key].keys(), model_2.data[key].keys()) self.assertFalse(np.array_equal(fnames_1, fnames_2)) # Assert that data dicts are storing the correct values. self.assertEqual(n_1, self.n) X_subset = X[Y == key, :] self.assert_same(mean_1, X_subset.mean(axis=0), tolerance=tolerance) self.assert_same(cov_1, np.cov(X_subset.T), tolerance=tolerance) truth = [None] * int(self.n) self.assert_same(np.asarray(fnames_2), np.asarray(truth)) fnames_1.sort() self.assert_same(np.asarray(fnames_1), np.asarray(fnames[Y == key]))
def __init__(self, ivecs_dir, norm_list, plda_model_dir): """ Class constructor. :param ivecs_dir: path to directory with i-vectors :type ivecs_dir: str :param norm_list: path to list with files relative to directory with i-vectors :type norm_list: str :param plda_model_dir: path to directory with models :type plda_model_dir: str """ self.ivecs_dir = ivecs_dir self.scale, self.shift, self.model = None, None, None self.norm_list = norm_list self.plda = PLDA(plda_model_dir) if self.norm_list is not None: self.norm_ivecs = np.array(list(self.load_norm_ivecs()))
def setUp(self): self.dims = 5 self.n = 100 self.K = 5 self.shared_cov = np.eye(self.dims) self.dist_bw_means = 3 np.random.seed(0) X, Y, fnames = self.gen_data(self.dims, self.n, self.K, self.shared_cov, self.dist_bw_means) self.model = PLDA(X, Y, fnames) self.X, self.Y, self.fnames = X, Y, fnames
def setUpClass(cls): cls.dims = 3 cls.n = 100 cls.K = 5 cls.shared_cov = np.eye(cls.dims) cls.dist_bw_means = 3 np.random.seed(0) X, Y, fnames = cls.gen_data(cls.dims, cls.n, cls.K, cls.shared_cov, cls.dist_bw_means) cls.model = PLDA(X, Y, fnames) cls.X, cls.Y, cls.fnames = X, Y, fnames
def main(): if len(sys.argv) != 4: print('<plda> <adapt-ivector-rspecifier> <plda-adapt> \n', ) sys.exit() plda = sys.argv[1] train_vecs_adapt = sys.argv[2] plda_adapt = sys.argv[3] plda_new = PLDA() plda_new.plda_read(plda) plda_new.get_output() aplda_model = PldaUnsupervisedAdaptor() for _, vec in kaldi_io.read_vec(train_vecs_adapt): aplda_model.add_stats(1, vec) aplda_model.update_plda(plda_new) plda_new.plda_trans_write(plda_adapt)
def requires(self): plda_target_task = Target2LDA(self.conf) plda_model_task = PLDA(self.conf) self.plda_target = plda_target_task.output() self.plda_model_target = plda_model_task.output() return [plda_target_task, plda_model_task]
class Normalization(object): """ Speaker normalization S-Norm. Handles also some other operation as calibration, detecting number of speakers. """ def __init__(self, ivecs_dir, norm_list, plda_model_dir): """ Class constructor. :param ivecs_dir: path to directory with i-vectors :type ivecs_dir: str :param norm_list: path to list with files relative to directory with i-vectors :type norm_list: str :param plda_model_dir: path to directory with models :type plda_model_dir: str """ self.ivecs_dir = ivecs_dir self.scale, self.shift, self.model = None, None, None self.norm_list = norm_list self.plda = PLDA(plda_model_dir) if self.norm_list is not None: self.norm_ivecs = np.array(list(self.load_norm_ivecs())) else: self.norm_ivecs = None def load_norm_ivecs(self): """ Load normalization i-vectors, scale and shift files and also pretrained model. :returns: i-vectors :rtype: numpy.array """ line = None with open(self.norm_list, 'r') as f: for line in f: line = line.rstrip() loginfo( '[Diarization.load_norm_ivecs] Loading npy file {} ...'. format(line)) try: yield np.load('{}.npy'.format( os.path.join(self.ivecs_dir, line))).flatten() except IOError: logwarning( '[Diarization.load_norm_ivecs] No pickle file found for {}.' .format(line)) self.scale = np.load( os.path.join(self.ivecs_dir, os.path.dirname(line), 'scale.npy')) self.shift = np.load( os.path.join(self.ivecs_dir, os.path.dirname(line), 'shift.npy')) try: with open( os.path.join(self.ivecs_dir, os.path.dirname(line), 'model.pkl')) as f: self.model = pickle.load(f) except IOError: logwarning( '[Diarization.load_norm_ivecs] No pretrained model found.') def s_norm(self, test, enroll): """ Run S-Norm on input i-vectors. :param test: test i-vectors :type test: numpy.array :param enroll: enroll i-vectors :type enroll: numpy.array :returns: scores matrix :rtype: numpy.array """ a = self.plda.score(test, self.norm_ivecs, scale=self.scale, shift=self.shift) b = self.plda.score(enroll, self.norm_ivecs, scale=self.scale, shift=self.shift) c = self.plda.score(enroll, test, scale=self.scale, shift=self.shift) scores = [] for ii in range(test.shape[0]): test_scores = [] for jj in range(enroll.shape[0]): test_mean, test_std = np.mean(a.T[ii]), np.std(a.T[ii]) enroll_mean, enroll_std = np.mean(b.T[jj]), np.std(b.T[jj]) s = c[ii][jj] test_scores.append((((s - test_mean) / test_std + (s - enroll_mean) / enroll_std) / 2)) scores.append(test_scores) return np.array(scores).T @staticmethod def get_features(scores): """ Compute features from input scores. :param scores: input scores :type scores: list :returns: mean, std and median :rtype: tuple """ return np.mean(scores), np.std(scores), np.median(scores)
def test_add_datum(self): tolerance = 1e-100 old_model = self.model # Test adding to existing class, with fname supplied. new_X = np.ones(self.dims) existing_Y = list(self.model.data.keys())[-1] new_fname = 'new_fname.jpg' new_model = PLDA(self.X, self.Y, self.fnames) new_model.add_datum(new_X, existing_Y, new_fname) labels = set(list(self.model.data.keys())) unchanged = labels - set([existing_Y]) for key in unchanged: self.assertEqual(old_model.data[key]['n'], new_model.data[key]['n']) self.assert_same(np.asarray(old_model.data[key]['X']), np.asarray(new_model.data[key]['X']), tolerance=tolerance) self.assert_same(old_model.data[key]['mean'], new_model.data[key]['mean'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['fnames'], new_model.data[key]['fnames']) new_X_truth = old_model.data[existing_Y]['X'] + [new_X] new_n_model = new_model.data[existing_Y]['n'] new_mean_model = new_model.data[existing_Y]['mean'] new_cov_model = new_model.data[existing_Y]['cov'] new_fnames_model = new_model.data[existing_Y]['fnames'] new_n_truth = old_model.data[existing_Y]['n'] + 1 self.assertEqual(new_n_model, new_n_truth) new_mean_truth = np.asarray(new_X_truth).mean(axis=0) self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance) new_cov_truth = np.cov(np.asarray(new_X_truth).T) self.assert_same(new_cov_model, new_cov_truth) new_fnames_truth = old_model.data[existing_Y]['fnames'] + [new_fname] new_fnames_truth = np.asarray(new_fnames_truth) self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort()) # Test adding to existing class without supplying fname. new_X = np.ones(self.dims) existing_Y = list(self.model.data.keys())[-1] new_model = PLDA(self.X, self.Y, self.fnames) new_model.add_datum(new_X, existing_Y) labels = set(list(self.model.data.keys())) unchanged = labels - set([existing_Y]) for key in unchanged: self.assertEqual(old_model.data[key]['n'], new_model.data[key]['n']) self.assert_same(np.asarray(old_model.data[key]['X']), np.asarray(new_model.data[key]['X']), tolerance=tolerance) self.assert_same(old_model.data[key]['mean'], new_model.data[key]['mean'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['fnames'], new_model.data[key]['fnames']) new_X_truth = old_model.data[existing_Y]['X'] + [new_X] new_n_model = new_model.data[existing_Y]['n'] new_mean_model = new_model.data[existing_Y]['mean'] new_cov_model = new_model.data[existing_Y]['cov'] new_fnames_model = new_model.data[existing_Y]['fnames'] new_n_truth = old_model.data[existing_Y]['n'] + 1 self.assertEqual(new_n_model, new_n_truth) new_mean_truth = np.asarray(new_X_truth).mean(axis=0) self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance) new_cov_truth = np.cov(np.asarray(new_X_truth).T) self.assert_same(new_cov_model, new_cov_truth) new_fnames_truth = old_model.data[existing_Y]['fnames'] new_fnames_model.remove(None) new_fnames_truth = np.asarray(new_fnames_truth) self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort()) # Test creating a new class with fname new_X = np.ones(self.dims) new_Y = 'new_category' new_fname = 'new_fname.jpg' new_model = PLDA(self.X, self.Y, self.fnames) new_model.add_datum(new_X, new_Y, new_fname) labels = set(list(new_model.data.keys())) unchanged = labels - set([new_Y]) for key in unchanged: self.assertEqual(old_model.data[key]['n'], new_model.data[key]['n']) self.assert_same(np.asarray(old_model.data[key]['X']), np.asarray(new_model.data[key]['X']), tolerance=tolerance) self.assert_same(old_model.data[key]['mean'], new_model.data[key]['mean'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['fnames'], new_model.data[key]['fnames']) new_X_truth = [new_X] new_n_model = new_model.data[new_Y]['n'] new_mean_model = new_model.data[new_Y]['mean'] new_cov_model = new_model.data[new_Y]['cov'] new_fnames_model = new_model.data[new_Y]['fnames'] new_n_truth = 1 self.assertEqual(new_n_model, new_n_truth) new_mean_truth = new_X.copy() self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance) new_cov_truth = None self.assert_same(new_cov_model, new_cov_truth) new_fnames_truth = [new_fname] new_fnames_truth = np.asarray(new_fnames_truth) self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort()) # Test creating a new class without fname. new_X = np.ones(self.dims) new_Y = 'new_category' new_model = PLDA(self.X, self.Y, self.fnames) new_model.add_datum(new_X, new_Y) labels = set(list(new_model.data.keys())) unchanged = labels - set([new_Y]) for key in unchanged: self.assertEqual(old_model.data[key]['n'], new_model.data[key]['n']) self.assert_same(np.asarray(old_model.data[key]['X']), np.asarray(new_model.data[key]['X']), tolerance=tolerance) self.assert_same(old_model.data[key]['mean'], new_model.data[key]['mean'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['cov'], new_model.data[key]['cov'], tolerance=tolerance) self.assert_same(old_model.data[key]['fnames'], new_model.data[key]['fnames']) new_X_truth = [new_X] new_n_model = new_model.data[new_Y]['n'] new_mean_model = new_model.data[new_Y]['mean'] new_cov_model = new_model.data[new_Y]['cov'] new_fnames_model = new_model.data[new_Y]['fnames'] new_n_truth = 1 self.assertEqual(new_n_model, new_n_truth) new_mean_truth = new_X.copy() self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance) new_cov_truth = None self.assert_same(new_cov_model, new_cov_truth) new_fnames_truth = [None] new_fnames_truth = np.asarray(new_fnames_truth) self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort())