Ejemplo n.º 1
0
    def test_mk_data_dict(self):
        X, Y, fnames = self.gen_data(self.dims, self.n, self.K,
                                     self.shared_cov, self.dist_bw_means)
        sorting_idxs = np.argsort(fnames)
        X, Y, fnames = X[sorting_idxs], Y[sorting_idxs], fnames[sorting_idxs]

        order = np.arange(0, Y.shape[0])
        np.random.seed(0)
        np.random.shuffle(order)

        model_1 = PLDA(X[order], Y[order], fnames[order])
        model_2 = PLDA(X, Y)
        keys = list(model_1.data.keys())

        tolerance = 1e-100
        for i, key in enumerate(keys):
            n_1 = model_1.data[key]['n']
            n_2 = model_2.data[key]['n']

            X_1 = model_1.data[key]['X']
            X_2 = model_2.data[key]['X']

            mean_1 = model_1.data[key]['mean']
            mean_2 = model_2.data[key]['mean']

            cov_1 = model_1.data[key]['cov']
            cov_2 = model_2.data[key]['cov']

            fnames_1 = model_1.data[key]['fnames']
            fnames_2 = model_2.data[key]['fnames']

            # Assert that data dicts in both models are equal, except fnames.
            self.assertEqual(n_1, n_2)

            X_1 = np.asarray(X_1)[np.argsort(fnames_1)]
            X_2 = np.asarray(X_2)  # X_2 is already sorted.
            self.assert_same(X_1, X_2, tolerance=tolerance)

            self.assert_same(mean_1, mean_2, tolerance=tolerance)
            self.assert_same(cov_1, cov_2, tolerance=tolerance)
            self.assertEqual(model_1.data[key].keys(),
                             model_2.data[key].keys())
            self.assertFalse(np.array_equal(fnames_1, fnames_2))

            # Assert that data dicts are storing the correct values.
            self.assertEqual(n_1, self.n)

            X_subset = X[Y == key, :]
            self.assert_same(mean_1,
                             X_subset.mean(axis=0),
                             tolerance=tolerance)
            self.assert_same(cov_1, np.cov(X_subset.T), tolerance=tolerance)

            truth = [None] * int(self.n)
            self.assert_same(np.asarray(fnames_2), np.asarray(truth))

            fnames_1.sort()
            self.assert_same(np.asarray(fnames_1),
                             np.asarray(fnames[Y == key]))
Ejemplo n.º 2
0
    def __init__(self, ivecs_dir, norm_list, plda_model_dir):
        """ Class constructor.

            :param ivecs_dir: path to directory with i-vectors
            :type ivecs_dir: str
            :param norm_list: path to list with files relative to directory with i-vectors
            :type norm_list: str
            :param plda_model_dir: path to directory with models
            :type plda_model_dir: str
        """
        self.ivecs_dir = ivecs_dir
        self.scale, self.shift, self.model = None, None, None
        self.norm_list = norm_list
        self.plda = PLDA(plda_model_dir)
        if self.norm_list is not None:
            self.norm_ivecs = np.array(list(self.load_norm_ivecs()))
Ejemplo n.º 3
0
    def setUp(self):
        self.dims = 5
        self.n = 100
        self.K = 5
        self.shared_cov = np.eye(self.dims)
        self.dist_bw_means = 3

        np.random.seed(0)
        X, Y, fnames = self.gen_data(self.dims, self.n, self.K,
                                     self.shared_cov, self.dist_bw_means)
        self.model = PLDA(X, Y, fnames)
        self.X, self.Y, self.fnames = X, Y, fnames
Ejemplo n.º 4
0
    def setUpClass(cls):
        cls.dims = 3
        cls.n = 100
        cls.K = 5
        cls.shared_cov = np.eye(cls.dims)
        cls.dist_bw_means = 3

        np.random.seed(0)
        X, Y, fnames = cls.gen_data(cls.dims, cls.n, cls.K, cls.shared_cov,
                                    cls.dist_bw_means)
        cls.model = PLDA(X, Y, fnames)
        cls.X, cls.Y, cls.fnames = X, Y, fnames
Ejemplo n.º 5
0
def main():

    if len(sys.argv) != 4:
        print('<plda> <adapt-ivector-rspecifier> <plda-adapt> \n', )
        sys.exit()

    plda = sys.argv[1]
    train_vecs_adapt = sys.argv[2]
    plda_adapt = sys.argv[3]

    plda_new = PLDA()
    plda_new.plda_read(plda)
    plda_new.get_output()

    aplda_model = PldaUnsupervisedAdaptor()
    for _, vec in kaldi_io.read_vec(train_vecs_adapt):
        aplda_model.add_stats(1, vec)
    aplda_model.update_plda(plda_new)
    plda_new.plda_trans_write(plda_adapt)
Ejemplo n.º 6
0
	def requires(self):
		plda_target_task = Target2LDA(self.conf) 
		plda_model_task = PLDA(self.conf)
		self.plda_target = plda_target_task.output()
		self.plda_model_target = plda_model_task.output()
		return [plda_target_task, plda_model_task]
Ejemplo n.º 7
0
class Normalization(object):
    """ Speaker normalization S-Norm. Handles also some other operation as calibration, detecting number of speakers.

    """
    def __init__(self, ivecs_dir, norm_list, plda_model_dir):
        """ Class constructor.

            :param ivecs_dir: path to directory with i-vectors
            :type ivecs_dir: str
            :param norm_list: path to list with files relative to directory with i-vectors
            :type norm_list: str
            :param plda_model_dir: path to directory with models
            :type plda_model_dir: str
        """
        self.ivecs_dir = ivecs_dir
        self.scale, self.shift, self.model = None, None, None
        self.norm_list = norm_list
        self.plda = PLDA(plda_model_dir)
        if self.norm_list is not None:
            self.norm_ivecs = np.array(list(self.load_norm_ivecs()))
        else:
            self.norm_ivecs = None

    def load_norm_ivecs(self):
        """ Load normalization i-vectors, scale and shift files and also pretrained model.

            :returns: i-vectors
            :rtype: numpy.array
        """
        line = None
        with open(self.norm_list, 'r') as f:
            for line in f:
                line = line.rstrip()
                loginfo(
                    '[Diarization.load_norm_ivecs] Loading npy file {} ...'.
                    format(line))
                try:
                    yield np.load('{}.npy'.format(
                        os.path.join(self.ivecs_dir, line))).flatten()
                except IOError:
                    logwarning(
                        '[Diarization.load_norm_ivecs] No pickle file found for {}.'
                        .format(line))
        self.scale = np.load(
            os.path.join(self.ivecs_dir, os.path.dirname(line), 'scale.npy'))
        self.shift = np.load(
            os.path.join(self.ivecs_dir, os.path.dirname(line), 'shift.npy'))
        try:
            with open(
                    os.path.join(self.ivecs_dir, os.path.dirname(line),
                                 'model.pkl')) as f:
                self.model = pickle.load(f)
        except IOError:
            logwarning(
                '[Diarization.load_norm_ivecs] No pretrained model found.')

    def s_norm(self, test, enroll):
        """ Run S-Norm on input i-vectors.

            :param test: test i-vectors
            :type test: numpy.array
            :param enroll: enroll i-vectors
            :type enroll: numpy.array
            :returns: scores matrix
            :rtype: numpy.array
        """
        a = self.plda.score(test,
                            self.norm_ivecs,
                            scale=self.scale,
                            shift=self.shift)
        b = self.plda.score(enroll,
                            self.norm_ivecs,
                            scale=self.scale,
                            shift=self.shift)
        c = self.plda.score(enroll, test, scale=self.scale, shift=self.shift)
        scores = []
        for ii in range(test.shape[0]):
            test_scores = []
            for jj in range(enroll.shape[0]):
                test_mean, test_std = np.mean(a.T[ii]), np.std(a.T[ii])
                enroll_mean, enroll_std = np.mean(b.T[jj]), np.std(b.T[jj])
                s = c[ii][jj]
                test_scores.append((((s - test_mean) / test_std +
                                     (s - enroll_mean) / enroll_std) / 2))
            scores.append(test_scores)
        return np.array(scores).T

    @staticmethod
    def get_features(scores):
        """ Compute features from input scores.

            :param scores: input scores
            :type scores: list
            :returns: mean, std and median
            :rtype: tuple
        """
        return np.mean(scores), np.std(scores), np.median(scores)
Ejemplo n.º 8
0
    def test_add_datum(self):
        tolerance = 1e-100
        old_model = self.model

        # Test adding to existing class, with fname supplied.
        new_X = np.ones(self.dims)
        existing_Y = list(self.model.data.keys())[-1]
        new_fname = 'new_fname.jpg'

        new_model = PLDA(self.X, self.Y, self.fnames)
        new_model.add_datum(new_X, existing_Y, new_fname)

        labels = set(list(self.model.data.keys()))
        unchanged = labels - set([existing_Y])
        for key in unchanged:
            self.assertEqual(old_model.data[key]['n'],
                             new_model.data[key]['n'])
            self.assert_same(np.asarray(old_model.data[key]['X']),
                             np.asarray(new_model.data[key]['X']),
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['mean'],
                             new_model.data[key]['mean'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['fnames'],
                             new_model.data[key]['fnames'])

        new_X_truth = old_model.data[existing_Y]['X'] + [new_X]
        new_n_model = new_model.data[existing_Y]['n']
        new_mean_model = new_model.data[existing_Y]['mean']
        new_cov_model = new_model.data[existing_Y]['cov']
        new_fnames_model = new_model.data[existing_Y]['fnames']

        new_n_truth = old_model.data[existing_Y]['n'] + 1
        self.assertEqual(new_n_model, new_n_truth)

        new_mean_truth = np.asarray(new_X_truth).mean(axis=0)
        self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance)

        new_cov_truth = np.cov(np.asarray(new_X_truth).T)
        self.assert_same(new_cov_model, new_cov_truth)

        new_fnames_truth = old_model.data[existing_Y]['fnames'] + [new_fname]
        new_fnames_truth = np.asarray(new_fnames_truth)
        self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort())

        # Test adding to existing class without supplying fname.
        new_X = np.ones(self.dims)
        existing_Y = list(self.model.data.keys())[-1]

        new_model = PLDA(self.X, self.Y, self.fnames)
        new_model.add_datum(new_X, existing_Y)

        labels = set(list(self.model.data.keys()))
        unchanged = labels - set([existing_Y])
        for key in unchanged:
            self.assertEqual(old_model.data[key]['n'],
                             new_model.data[key]['n'])
            self.assert_same(np.asarray(old_model.data[key]['X']),
                             np.asarray(new_model.data[key]['X']),
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['mean'],
                             new_model.data[key]['mean'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['fnames'],
                             new_model.data[key]['fnames'])

        new_X_truth = old_model.data[existing_Y]['X'] + [new_X]
        new_n_model = new_model.data[existing_Y]['n']
        new_mean_model = new_model.data[existing_Y]['mean']
        new_cov_model = new_model.data[existing_Y]['cov']
        new_fnames_model = new_model.data[existing_Y]['fnames']

        new_n_truth = old_model.data[existing_Y]['n'] + 1
        self.assertEqual(new_n_model, new_n_truth)

        new_mean_truth = np.asarray(new_X_truth).mean(axis=0)
        self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance)

        new_cov_truth = np.cov(np.asarray(new_X_truth).T)
        self.assert_same(new_cov_model, new_cov_truth)

        new_fnames_truth = old_model.data[existing_Y]['fnames']
        new_fnames_model.remove(None)
        new_fnames_truth = np.asarray(new_fnames_truth)
        self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort())

        # Test creating a new class with fname
        new_X = np.ones(self.dims)
        new_Y = 'new_category'
        new_fname = 'new_fname.jpg'

        new_model = PLDA(self.X, self.Y, self.fnames)
        new_model.add_datum(new_X, new_Y, new_fname)

        labels = set(list(new_model.data.keys()))
        unchanged = labels - set([new_Y])
        for key in unchanged:
            self.assertEqual(old_model.data[key]['n'],
                             new_model.data[key]['n'])
            self.assert_same(np.asarray(old_model.data[key]['X']),
                             np.asarray(new_model.data[key]['X']),
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['mean'],
                             new_model.data[key]['mean'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['fnames'],
                             new_model.data[key]['fnames'])

        new_X_truth = [new_X]
        new_n_model = new_model.data[new_Y]['n']
        new_mean_model = new_model.data[new_Y]['mean']
        new_cov_model = new_model.data[new_Y]['cov']
        new_fnames_model = new_model.data[new_Y]['fnames']

        new_n_truth = 1
        self.assertEqual(new_n_model, new_n_truth)

        new_mean_truth = new_X.copy()
        self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance)

        new_cov_truth = None
        self.assert_same(new_cov_model, new_cov_truth)

        new_fnames_truth = [new_fname]
        new_fnames_truth = np.asarray(new_fnames_truth)
        self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort())

        # Test creating a new class without fname.
        new_X = np.ones(self.dims)
        new_Y = 'new_category'

        new_model = PLDA(self.X, self.Y, self.fnames)
        new_model.add_datum(new_X, new_Y)

        labels = set(list(new_model.data.keys()))
        unchanged = labels - set([new_Y])
        for key in unchanged:
            self.assertEqual(old_model.data[key]['n'],
                             new_model.data[key]['n'])
            self.assert_same(np.asarray(old_model.data[key]['X']),
                             np.asarray(new_model.data[key]['X']),
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['mean'],
                             new_model.data[key]['mean'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['cov'],
                             new_model.data[key]['cov'],
                             tolerance=tolerance)
            self.assert_same(old_model.data[key]['fnames'],
                             new_model.data[key]['fnames'])

        new_X_truth = [new_X]
        new_n_model = new_model.data[new_Y]['n']
        new_mean_model = new_model.data[new_Y]['mean']
        new_cov_model = new_model.data[new_Y]['cov']
        new_fnames_model = new_model.data[new_Y]['fnames']

        new_n_truth = 1
        self.assertEqual(new_n_model, new_n_truth)

        new_mean_truth = new_X.copy()
        self.assert_same(new_mean_model, new_mean_truth, tolerance=tolerance)

        new_cov_truth = None
        self.assert_same(new_cov_model, new_cov_truth)

        new_fnames_truth = [None]
        new_fnames_truth = np.asarray(new_fnames_truth)
        self.assert_same(new_fnames_model.sort(), new_fnames_truth.sort())