def test_ISVTrainInitialize(): # Check that the initialization is consistent and using the rng (cf. issue #118) eps = 1e-10 # UBM GMM ubm = GMMMachine(2,3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR ## ISV ib = ISVBase(ubm, 2) # first round rng = bob.core.random.mt19937(0) it = ISVTrainer(10) #it.rng = rng it.initialize(ib, TRAINING_STATS, rng) u1 = ib.u d1 = ib.d # second round rng = bob.core.random.mt19937(0) #it.rng = rng it.initialize(ib, TRAINING_STATS, rng) u2 = ib.u d2 = ib.d assert numpy.allclose(u1, u2, eps) assert numpy.allclose(d1, d2, eps)
def test_JFATrainInitialize(): # Check that the initialization is consistent and using the rng (cf. issue #118) eps = 1e-10 # UBM GMM ubm = GMMMachine(2,3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR ## JFA jb = JFABase(ubm, 2, 2) # first round rng = bob.core.random.mt19937(0) jt = JFATrainer() #jt.rng = rng jt.initialize(jb, TRAINING_STATS, rng) u1 = jb.u v1 = jb.v d1 = jb.d # second round rng = bob.core.random.mt19937(0) jt.initialize(jb, TRAINING_STATS, rng) u2 = jb.u v2 = jb.v d2 = jb.d assert numpy.allclose(u1, u2, eps) assert numpy.allclose(v1, v2, eps) assert numpy.allclose(d1, d2, eps)
def enroll(self, data): """Enrolls a GMM using MAP adaptation given a reference's feature vectors Returns a GMMMachine tuned from the UBM with MAP on a biometric reference data. """ # if input is a list (or SampleBatch) of 2 dimensional arrays, stack them data = check_data_dim(data, expected_ndim=2) # Use the array to train a GMM and return it logger.info("Enrolling with %d feature vectors", data.shape[0]) gmm = GMMMachine( n_gaussians=self.n_gaussians, trainer="map", ubm=copy.deepcopy(self), convergence_threshold=self.convergence_threshold, max_fitting_steps=self.enroll_iterations, random_state=self.random_state, update_means=self.enroll_update_means, update_variances=self.enroll_update_variances, update_weights=self.enroll_update_weights, mean_var_update_threshold=self.mean_var_update_threshold, map_relevance_factor=self.enroll_relevance_factor, map_alpha=self.enroll_alpha, ) gmm.fit(data) return gmm
def test_enroll(): # Load the UBM ubm = GMMMachine.from_hdf5( pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_ubm.hdf5")) # Create a GMM object with that UBM gmm1 = GMM( number_of_gaussians=2, enroll_update_means=True, enroll_update_variances=True, ) gmm1.ubm = ubm # Enroll the biometric reference from random features enroll = utils.random_training_set((20, 45), 5, -5.0, 5.0, seed=seed_value) biometric_reference = gmm1.enroll(enroll) assert not biometric_reference.is_similar_to(biometric_reference.ubm) assert isinstance(biometric_reference, GMMMachine) reference_file = pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_enrolled.hdf5") if regenerate_refs: gmm1.write_biometric_reference(biometric_reference, reference_file) # Compare to pre-generated file gmm2 = gmm1.read_biometric_reference(reference_file) assert biometric_reference.is_similar_to(gmm2) with tempfile.NamedTemporaryFile(prefix="bob_", suffix="_bioref.hdf5") as fd: temp_file = fd.name gmm1.write_biometric_reference(biometric_reference, temp_file) assert GMMMachine.from_hdf5(temp_file, ubm).is_similar_to(gmm2)
def test_JFATrainer_updateYandV(): # test the JFATrainer for updating Y and V v_ref = numpy.array( [0.7228, 0.7892, 0.6475, 0.6080, 0.8631, 0.8416, 1.6512, 1.6068, 0.0500, 0.0101, 0.4325, 0.6719]).reshape((6,2)) y1 = numpy.array([0., 0.]) y2 = numpy.array([0., 0.]) y3 = numpy.array([0.9630, 1.3868]) y4 = numpy.array([0.0426, -0.3721]) y=[y1, y2] # call the updateY function ubm = GMMMachine(2,3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR m = JFABase(ubm,2,2) t = JFATrainer() t.initialize(m, TRAINING_STATS) m.u = M_u m.v = M_v m.d = M_d t.__X__ = M_x t.__Y__ = y t.__Z__ = M_z t.e_step_v(m, TRAINING_STATS) t.m_step_v(m, TRAINING_STATS) # Expected results(JFA cookbook, matlab) assert equals(t.__Y__[0], y3, 2e-4) assert equals(t.__Y__[1], y4, 2e-4) assert equals(m.v, v_ref, 2e-4)
def test_JFATrainInitialize(): # Check that the initialization is consistent and using the rng (cf. issue #118) eps = 1e-10 # UBM GMM ubm = GMMMachine(2, 3) ubm.means = UBM_MEAN.reshape((2, 3)) ubm.variances = UBM_VAR.reshape((2, 3)) # JFA it = JFAMachine(2, 2, em_iterations=10, ubm=ubm) # first round n_classes = it.estimate_number_of_classes(TRAINING_STATS_y) it.initialize(TRAINING_STATS_X, TRAINING_STATS_y, n_classes) u1 = it.U v1 = it.V d1 = it.D # second round it.initialize(TRAINING_STATS_X, TRAINING_STATS_y, n_classes) u2 = it.U v2 = it.V d2 = it.D np.testing.assert_allclose(u1, u2, rtol=eps, atol=1e-8) np.testing.assert_allclose(v1, v2, rtol=eps, atol=1e-8) np.testing.assert_allclose(d1, d2, rtol=eps, atol=1e-8)
def test_JFATrainer_updateZandD(): # test the JFATrainer for updating Z and D d_ref = numpy.array([0.3110, 1.0138, 0.8297, 1.0382, 0.0095, 0.6320]) z1 = numpy.array([0., 0., 0., 0., 0., 0.]) z2 = numpy.array([0., 0., 0., 0., 0., 0.]) z3_ref = numpy.array([0.3256, 1.8633, 0.6480, 0.8085, -0.0432, 0.2885]) z4_ref = numpy.array([-0.3324, -0.1474, -0.4404, -0.4529, 0.0484, -0.5848]) z=[z1, z2] # call the updateZ function ubm = GMMMachine(2,3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR m = JFABase(ubm,2,2) t = JFATrainer() t.initialize(m, TRAINING_STATS) m.u = M_u m.v = M_v m.d = M_d t.__X__ = M_x t.__Y__ = M_y t.__Z__ = z t.e_step_d(m, TRAINING_STATS) t.m_step_d(m, TRAINING_STATS) # Expected results(JFA cookbook, matlab) assert equals(t.__Z__[0], z3_ref, 2e-4) assert equals(t.__Z__[1], z4_ref, 2e-4) assert equals(m.d, d_ref, 2e-4)
def test_JFATrainer_updateXandU(): # test the JFATrainer for updating X and U u_ref = numpy.array([ 0.6729, 0.3408, 0.0544, 1.0653, 0.5399, 1.3035, 2.4995, 0.4385, 0.1292, -0.0576, 1.1962, 0.0117 ]).reshape((6, 2)) x1 = numpy.array([0., 0., 0., 0.]).reshape((2, 2)) x2 = numpy.array([0., 0., 0., 0.]).reshape((2, 2)) x3 = numpy.array([0.2143, 1.8275, 3.1979, 0.1227]).reshape((2, 2)) x4 = numpy.array([-1.3861, 0.2359, 5.3326, -0.7914]).reshape((2, 2)) x = [x1, x2] # call the updateX function ubm = GMMMachine(2, 3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR m = JFABase(ubm, 2, 2) t = JFATrainer() t.initialize(m, TRAINING_STATS) m.u = M_u m.v = M_v m.d = M_d t.__X__ = x t.__Y__ = M_y t.__Z__ = M_z t.e_step_u(m, TRAINING_STATS) t.m_step_u(m, TRAINING_STATS) # Expected results(JFA cookbook, matlab) assert equals(t.__X__[0], x3, 2e-4) assert equals(t.__X__[1], x4, 2e-4) assert equals(m.u, u_ref, 2e-4)
def test_JFATrainer_updateXandU(): # test the JFATrainer for updating X and U u_ref = numpy.array( [0.6729, 0.3408, 0.0544, 1.0653, 0.5399, 1.3035, 2.4995, 0.4385, 0.1292, -0.0576, 1.1962, 0.0117]).reshape((6,2)) x1 = numpy.array([0., 0., 0., 0.]).reshape((2,2)) x2 = numpy.array([0., 0., 0., 0.]).reshape((2,2)) x3 = numpy.array([0.2143, 1.8275, 3.1979, 0.1227]).reshape((2,2)) x4 = numpy.array([-1.3861, 0.2359, 5.3326, -0.7914]).reshape((2,2)) x = [x1, x2] # call the updateX function ubm = GMMMachine(2,3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR m = JFABase(ubm,2,2) t = JFATrainer() t.initialize(m, TRAINING_STATS) m.u = M_u m.v = M_v m.d = M_d t.__X__ = x t.__Y__ = M_y t.__Z__ = M_z t.e_step_u(m, TRAINING_STATS) t.m_step_u(m, TRAINING_STATS) # Expected results(JFA cookbook, matlab) assert equals(t.__X__[0], x3, 2e-4) assert equals(t.__X__[1], x4, 2e-4) assert equals(m.u, u_ref, 2e-4)
def test_JFATrainInitialize(): # Check that the initialization is consistent and using the rng (cf. issue #118) eps = 1e-10 # UBM GMM ubm = GMMMachine(2, 3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR ## JFA jb = JFABase(ubm, 2, 2) # first round rng = bob.core.random.mt19937(0) jt = JFATrainer() #jt.rng = rng jt.initialize(jb, TRAINING_STATS, rng) u1 = jb.u v1 = jb.v d1 = jb.d # second round rng = bob.core.random.mt19937(0) jt.initialize(jb, TRAINING_STATS, rng) u2 = jb.u v2 = jb.v d2 = jb.d assert numpy.allclose(u1, u2, eps) assert numpy.allclose(v1, v2, eps) assert numpy.allclose(d1, d2, eps)
def test_JFATrainer_updateZandD(): # test the JFATrainer for updating Z and D d_ref = numpy.array([0.3110, 1.0138, 0.8297, 1.0382, 0.0095, 0.6320]) z1 = numpy.array([0., 0., 0., 0., 0., 0.]) z2 = numpy.array([0., 0., 0., 0., 0., 0.]) z3_ref = numpy.array([0.3256, 1.8633, 0.6480, 0.8085, -0.0432, 0.2885]) z4_ref = numpy.array([-0.3324, -0.1474, -0.4404, -0.4529, 0.0484, -0.5848]) z = [z1, z2] # call the updateZ function ubm = GMMMachine(2, 3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR m = JFABase(ubm, 2, 2) t = JFATrainer() t.initialize(m, TRAINING_STATS) m.u = M_u m.v = M_v m.d = M_d t.__X__ = M_x t.__Y__ = M_y t.__Z__ = z t.e_step_d(m, TRAINING_STATS) t.m_step_d(m, TRAINING_STATS) # Expected results(JFA cookbook, matlab) assert equals(t.__Z__[0], z3_ref, 2e-4) assert equals(t.__Z__[1], z4_ref, 2e-4) assert equals(m.d, d_ref, 2e-4)
def test_JFATrainer_updateYandV(): # test the JFATrainer for updating Y and V v_ref = numpy.array([ 0.7228, 0.7892, 0.6475, 0.6080, 0.8631, 0.8416, 1.6512, 1.6068, 0.0500, 0.0101, 0.4325, 0.6719 ]).reshape((6, 2)) y1 = numpy.array([0., 0.]) y2 = numpy.array([0., 0.]) y3 = numpy.array([0.9630, 1.3868]) y4 = numpy.array([0.0426, -0.3721]) y = [y1, y2] # call the updateY function ubm = GMMMachine(2, 3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR m = JFABase(ubm, 2, 2) t = JFATrainer() t.initialize(m, TRAINING_STATS) m.u = M_u m.v = M_v m.d = M_d t.__X__ = M_x t.__Y__ = y t.__Z__ = M_z t.e_step_v(m, TRAINING_STATS) t.m_step_v(m, TRAINING_STATS) # Expected results(JFA cookbook, matlab) assert equals(t.__Y__[0], y3, 2e-4) assert equals(t.__Y__[1], y4, 2e-4) assert equals(m.v, v_ref, 2e-4)
def test_ISVTrainInitialize(): # Check that the initialization is consistent and using the rng (cf. issue #118) eps = 1e-10 # UBM GMM ubm = GMMMachine(2, 3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR ## ISV ib = ISVBase(ubm, 2) # first round rng = bob.core.random.mt19937(0) it = ISVTrainer(10) #it.rng = rng it.initialize(ib, TRAINING_STATS, rng) u1 = ib.u d1 = ib.d # second round rng = bob.core.random.mt19937(0) #it.rng = rng it.initialize(ib, TRAINING_STATS, rng) u2 = ib.u d2 = ib.d assert numpy.allclose(u1, u2, eps) assert numpy.allclose(d1, d2, eps)
def test_ISVBase(): # Creates a UBM weights = numpy.array([0.4, 0.6], 'float64') means = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64') variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64') ubm = GMMMachine(2,3) ubm.weights = weights ubm.means = means ubm.variances = variances # Creates a ISVBase U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64') d = numpy.array([0, 1, 0, 1, 0, 1], 'float64') m = ISVBase(ubm, ru=1) _,_,ru = m.shape assert ru == 1 # Checks for correctness m.resize(2) m.u = U m.d = d n_gaussians,dim,ru = m.shape supervector_length = m.supervector_length assert (m.u == U).all() assert (m.d == d).all() assert n_gaussians == 2 assert dim == 3 assert supervector_length == 6 assert ru == 2 # Saves and loads filename = str(tempfile.mkstemp(".hdf5")[1]) m.save(bob.io.base.HDF5File(filename, 'w')) m_loaded = ISVBase(bob.io.base.HDF5File(filename)) m_loaded.ubm = ubm assert m == m_loaded assert (m != m_loaded) is False assert m.is_similar_to(m_loaded) # Copy constructor mc = ISVBase(m) assert m == mc # Variant #mv = ISVBase() # Checks for correctness #mv.ubm = ubm #mv.resize(2) #mv.u = U #mv.d = d #assert (m.u == U).all() #assert (m.d == d).all() #ssert m.dim_c == 2 #assert m.dim_d == 3 #assert m.dim_cd == 6 #assert m.dim_ru == 2 # Clean-up os.unlink(filename)
def test_ml_em(): # Simple GMM test data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]]) n_gaussians = 2 n_features = data.shape[-1] machine = GMMMachine( n_gaussians, update_means=True, update_variances=True, update_weights=True, ) machine.means = np.repeat([[2], [8]], n_features, 1) machine.variances = np.ones_like(machine.means) stats = gmm_module.e_step( data, machine, ) gmm_module.m_step( [stats], machine, ) expected_means = np.array([[1.5, 1.5, 2.0], [7.0, 8.0, 8.0]]) np.testing.assert_almost_equal(machine.means, expected_means) expected_weights = np.array([2 / 5, 3 / 5]) np.testing.assert_almost_equal(machine.weights, expected_weights) eps = np.finfo(float).eps expected_variances = np.array([[1 / 4, 1 / 4, eps], [eps, 2 / 3, 2 / 3]]) np.testing.assert_almost_equal(machine.variances, expected_variances)
def test_ISVTrainInitialize(): # Check that the initialization is consistent and using the rng (cf. issue #118) eps = 1e-10 # UBM GMM ubm = GMMMachine(2, 3) ubm.means = UBM_MEAN.reshape((2, 3)) ubm.variances = UBM_VAR.reshape((2, 3)) # ISV it = ISVMachine(2, em_iterations=10, ubm=ubm) # it.rng = rng n_classes = it.estimate_number_of_classes(TRAINING_STATS_y) it.initialize(TRAINING_STATS_X, TRAINING_STATS_y, n_classes) u1 = copy.deepcopy(it.U) d1 = copy.deepcopy(it.D) # second round it.initialize(TRAINING_STATS_X, TRAINING_STATS_y, n_classes) u2 = it.U d2 = it.D np.testing.assert_allclose(u1, u2, rtol=eps, atol=1e-8) np.testing.assert_allclose(d1, d2, rtol=eps, atol=1e-8)
def enroll(self, data): """Enrolls a GMM using MAP adaptation given a reference's feature vectors Returns a GMMMachine tuned from the UBM with MAP on a biometric reference data. """ for feature in data: self._check_feature(feature) # if input is a list (or SampleBatch) of 2 dimensional arrays, stack them if data[0].ndim == 2: data = np.vstack(data) # Use the array to train a GMM and return it logger.info("Enrolling with %d feature vectors", data.shape[0]) gmm = GMMMachine( n_gaussians=self.number_of_gaussians, trainer="map", ubm=copy.deepcopy(self.ubm), convergence_threshold=self.training_threshold, max_fitting_steps=self.gmm_enroll_iterations, random_state=self.rng, update_means=self.enroll_update_means, update_variances=self.enroll_update_variances, update_weights=self.enroll_update_weights, mean_var_update_threshold=self.variance_threshold, map_relevance_factor=self.enroll_relevance_factor, map_alpha=self.enroll_alpha, ) gmm.fit(data) return gmm
def _create_ubm_prior(means): # Creating a fake prior with 2 gaussians prior_gmm = GMMMachine(2) prior_gmm.means = means.copy() # All nice and round diagonal covariance prior_gmm.variances = np.ones((2, 3)) * 0.5 prior_gmm.weights = np.array([0.3, 0.7]) return prior_gmm
def test_ISVTrainAndEnrol(): # Train and enroll an 'ISVMachine' eps = 1e-10 d_ref = numpy.array([ 0.39601136, 0.07348469, 0.47712682, 0.44738127, 0.43179856, 0.45086029 ], 'float64') u_ref = numpy.array([[0.855125642430777, 0.563104284748032], [-0.325497865404680, 1.923598985291687], [0.511575659503837, 1.964288663083095], [9.330165761678115, 1.073623827995043], [0.511099245664012, 0.278551249248978], [5.065578541930268, 0.509565618051587]], 'float64') z_ref = numpy.array([ -0.079315777443826, 0.092702428248543, -0.342488761656616, -0.059922635809136, 0.133539981073604, 0.213118695516570 ], 'float64') # Calls the train function ubm = GMMMachine(2, 3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR mb = ISVBase(ubm, 2) t = ISVTrainer(4.) t.initialize(mb, TRAINING_STATS) mb.u = M_u for i in range(10): t.e_step(mb, TRAINING_STATS) t.m_step(mb) assert numpy.allclose(mb.d, d_ref, eps) assert numpy.allclose(mb.u, u_ref, eps) # Calls the enroll function m = ISVMachine(mb) Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2, 2)) Fe = numpy.array([ 0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345, 0.2463, 0.4789, 0.5236 ]).reshape((6, 2)) gse1 = GMMStats(2, 3) gse1.n = Ne[:, 0] gse1.sum_px = Fe[:, 0].reshape(2, 3) gse2 = GMMStats(2, 3) gse2.n = Ne[:, 1] gse2.sum_px = Fe[:, 1].reshape(2, 3) gse = [gse1, gse2] t.enroll(m, gse, 5) assert numpy.allclose(m.z, z_ref, eps) #Testing exceptions nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1, 2, 2]) nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1, 2, 2]]) nose.tools.assert_raises(RuntimeError, t.e_step, mb, [1, 2, 2]) nose.tools.assert_raises(RuntimeError, t.e_step, mb, [[1, 2, 2]]) nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1, 2, 2]], 5)
def loadGMM(): gmm = GMMMachine(2, 2) gmm.weights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/")) gmm.means = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/")) gmm.variances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/")) #gmm.variance_thresholds = numpy.array([[0.001, 0.001],[0.001, 0.001]], 'float64') return gmm
def test_gmm_MAP_3(): # Train a GMMMachine with MAP_GMMTrainer; compares to old reference ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) # Initialize GMMMachine n_gaussians = 5 prior_gmm = GMMMachine(n_gaussians) prior_gmm.means = load_array( resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) prior_gmm.variances = load_array( resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) prior_gmm.weights = load_array( resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) threshold = 0.001 prior_gmm.variance_thresholds = threshold # Initialize MAP Trainer prior = 0.001 accuracy = 0.00001 gmm = GMMMachine( n_gaussians, trainer="map", ubm=prior_gmm, convergence_threshold=prior, max_fitting_steps=1, update_means=True, update_variances=False, update_weights=False, mean_var_update_threshold=accuracy, map_relevance_factor=None, ) gmm.variance_thresholds = threshold # Test results # Load torch3vision reference meansMAP_ref = load_array( resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5")) variancesMAP_ref = load_array( resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5")) weightsMAP_ref = load_array( resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5")) for transform in (to_numpy, to_dask_array): ar = transform(ar) # Train gmm = gmm.fit(ar) # Compare to current results # Gaps are quite large. This might be explained by the fact that there is no # adaptation of a given Gaussian in torch3 when the corresponding responsibilities # are below the responsibilities threshold np.testing.assert_allclose(gmm.means, meansMAP_ref, atol=2e-1) np.testing.assert_allclose(gmm.variances, variancesMAP_ref, atol=1e-4) np.testing.assert_allclose(gmm.weights, weightsMAP_ref, atol=1e-4)
def loadGMM(): gmm = GMMMachine(n_gaussians=2) gmm.weights = load_array( resource_filename("bob.learn.em", "data/gmm.init_weights.hdf5")) gmm.means = load_array( resource_filename("bob.learn.em", "data/gmm.init_means.hdf5")) gmm.variances = load_array( resource_filename("bob.learn.em", "data/gmm.init_variances.hdf5")) return gmm
def loadGMM(): gmm = GMMMachine(2, 2) gmm.weights = bob.io.base.load( datafile('gmm.init_weights.hdf5', __name__, path="../data/")) gmm.means = bob.io.base.load( datafile('gmm.init_means.hdf5', __name__, path="../data/")) gmm.variances = bob.io.base.load( datafile('gmm.init_variances.hdf5', __name__, path="../data/")) #gmm.variance_thresholds = numpy.array([[0.001, 0.001],[0.001, 0.001]], 'float64') return gmm
def test_GMMMachine_3(): # Test a GMMMachine (log-likelihood computation) data = bob.io.base.load(datafile('data.hdf5', __name__, path="../data/")) gmm = GMMMachine(2, 50) gmm.weights = bob.io.base.load(datafile('weights.hdf5', __name__, path="../data/")) gmm.means = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/")) gmm.variances = bob.io.base.load(datafile('variances.hdf5', __name__, path="../data/")) # Compare the log-likelihood with the one obtained using Chris Matlab # implementation matlab_ll_ref = -2.361583051672024e+02 assert abs(gmm(data) - matlab_ll_ref) < 1e-10
def test_gmm_MAP_2(): # Train a GMMMachine with MAP_GMMTrainer and compare with matlab reference data = bob.io.base.load(datafile('data.hdf5', __name__, path="../data/")) data = data.reshape((1, data.shape[0])) # make a 2D array out of it means = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/")) variances = bob.io.base.load(datafile('variances.hdf5', __name__, path="../data/")) weights = bob.io.base.load(datafile('weights.hdf5', __name__, path="../data/")) gmm = GMMMachine(2,50) gmm.means = means gmm.variances = variances gmm.weights = weights map_adapt = MAP_GMMTrainer(update_means=True, update_variances=False, update_weights=False, mean_var_update_responsibilities_threshold=0.,prior_gmm=gmm, relevance_factor=4.) gmm_adapted = GMMMachine(2,50) gmm_adapted.means = means gmm_adapted.variances = variances gmm_adapted.weights = weights #map_adapt.max_iterations = 1 #map_adapt.train(gmm_adapted, data) bob.learn.em.train(map_adapt, gmm_adapted, data, max_iterations = 1) new_means = bob.io.base.load(datafile('new_adapted_mean.hdf5', __name__, path="../data/")) # print new_means[0,:] # print gmm_adapted.means[:,0] # Compare to matlab reference assert equals(new_means[0,:], gmm_adapted.means[:,0], 1e-4) assert equals(new_means[1,:], gmm_adapted.means[:,1], 1e-4)
def load_model(self, ubm_file): """Loads the projector (UBM) from a file.""" hdf5file = HDF5File(ubm_file, "r") logger.debug("Loading model from file '%s'", ubm_file) # Read the UBM self.ubm = GMMMachine.from_hdf5(hdf5file) self.ubm.variance_thresholds = self.variance_threshold
def test_score(): gmm1 = GMM(number_of_gaussians=2) gmm1.load_model( pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_ubm.hdf5")) biometric_reference = GMMMachine.from_hdf5( pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_enrolled.hdf5"), ubm=gmm1.ubm, ) probe = GMMStats.from_hdf5( pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_projected.hdf5")) probe_data = utils.random_array((20, 45), -5.0, 5.0, seed=seed_value) reference_score = 0.6509 numpy.testing.assert_almost_equal(gmm1.score(biometric_reference, probe), reference_score, decimal=5) multi_refs = gmm1.score_multiple_biometric_references( [biometric_reference, biometric_reference, biometric_reference], probe) assert multi_refs.shape == (3, 1), multi_refs.shape numpy.testing.assert_almost_equal(multi_refs, reference_score, decimal=5) # With not projected data numpy.testing.assert_almost_equal(gmm1.score(biometric_reference, probe_data), reference_score, decimal=5)
def test_GMMMachine_3(): # Test a GMMMachine (log-likelihood computation) data = bob.io.base.load(datafile('data.hdf5', __name__, path="../data/")) gmm = GMMMachine(2, 50) gmm.weights = bob.io.base.load( datafile('weights.hdf5', __name__, path="../data/")) gmm.means = bob.io.base.load( datafile('means.hdf5', __name__, path="../data/")) gmm.variances = bob.io.base.load( datafile('variances.hdf5', __name__, path="../data/")) # Compare the log-likelihood with the one obtained using Chris Matlab # implementation matlab_ll_ref = -2.361583051672024e+02 assert abs(gmm(data) - matlab_ll_ref) < 1e-10
def test_training(): """Tests the generation of the UBM.""" # Set a small training iteration count gmm1 = GMM( number_of_gaussians=2, kmeans_training_iterations=5, ubm_training_iterations=5, init_seed=seed_value, kmeans_oversampling_factor=2, ) train_data = utils.random_training_set((100, 45), count=5, minimum=-5.0, maximum=5.0) train_data = numpy.vstack(train_data) # Train the UBM (projector) gmm1.fit(train_data) # Test saving and loading of projector with tempfile.NamedTemporaryFile(prefix="bob_", suffix="_model.hdf5") as fd: temp_file = fd.name gmm1.save_model(temp_file) reference_file = pkg_resources.resource_filename( "bob.bio.gmm.test", "data/gmm_ubm.hdf5") if regenerate_refs: gmm1.save_model(reference_file) gmm2 = GMM(number_of_gaussians=2) gmm2.load_model(temp_file) ubm_reference = GMMMachine.from_hdf5(reference_file) assert gmm2.ubm.is_similar_to(ubm_reference)
def test_gmm_ML_1(): """Trains a GMMMachine with ML_GMMTrainer""" ar = load_array( resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) gmm_ref = GMMMachine.from_hdf5( HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r")) for transform in (to_numpy, to_dask_array): ar = transform(ar) gmm = loadGMM() # test rng handling gmm.convergence_threshold = 0.001 gmm.update_means = True gmm.update_variances = True gmm.update_weights = True gmm.random_state = np.random.RandomState(seed=12345) gmm = gmm.fit(ar) gmm = loadGMM() gmm.convergence_threshold = 0.001 gmm.update_means = True gmm.update_variances = True gmm.update_weights = True # Generate reference # gmm.save(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "w")) gmm = gmm.fit(ar) assert_gmm_equal(gmm, gmm_ref)
def test_likelihood(): data = np.array([[1, 1, 1], [-1, 0, 0], [0, 0, 1], [2, 2, 2]]) n_gaussians = 3 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], 3, 1) machine.variances = np.ones_like(machine.means) for transform in (to_numpy, to_dask_array): data = transform(data) log_likelihood = machine.log_likelihood(data) expected_ll = np.array([ -3.6519900964986527, -3.83151883210222, -3.83151883210222, -5.344374066745753, ]) np.testing.assert_almost_equal(log_likelihood, expected_ll)
def test_gmm_MAP_1(): # Train a GMMMachine with MAP_GMMTrainer ar = bob.io.base.load( datafile('faithful.torch3_f64.hdf5', __name__, path="../data/")) # test with rng rng = bob.core.random.mt19937(12345) gmm = GMMMachine( bob.io.base.HDF5File(datafile("gmm_ML.hdf5", __name__, path="../data/"))) gmmprior = GMMMachine( bob.io.base.HDF5File(datafile("gmm_ML.hdf5", __name__, path="../data/"))) map_gmmtrainer = MAP_GMMTrainer(update_means=True, update_variances=False, update_weights=False, prior_gmm=gmmprior, relevance_factor=4.) bob.learn.em.train(map_gmmtrainer, gmm, ar, rng=rng) gmm = GMMMachine( bob.io.base.HDF5File(datafile("gmm_ML.hdf5", __name__, path="../data/"))) gmmprior = GMMMachine( bob.io.base.HDF5File(datafile("gmm_ML.hdf5", __name__, path="../data/"))) map_gmmtrainer = MAP_GMMTrainer(update_means=True, update_variances=False, update_weights=False, prior_gmm=gmmprior, relevance_factor=4.) #map_gmmtrainer.train(gmm, ar) bob.learn.em.train(map_gmmtrainer, gmm, ar) gmm_ref = GMMMachine( bob.io.base.HDF5File( datafile('gmm_MAP.hdf5', __name__, path="../data/"))) assert (equals(gmm.means, gmm_ref.means, 1e-3) and equals(gmm.variances, gmm_ref.variances, 1e-3) and equals(gmm.weights, gmm_ref.weights, 1e-3))
def test_gmm_test(): # Tests a GMMMachine by computing scores against a model and compare to # an old reference ar = bob.io.base.load( datafile('dataforMAP.hdf5', __name__, path="../data/")) # Initialize GMMMachine n_gaussians = 5 n_inputs = 45 gmm = GMMMachine(n_gaussians, n_inputs) gmm.means = bob.io.base.load( datafile('meansAfterML.hdf5', __name__, path="../data/")) gmm.variances = bob.io.base.load( datafile('variancesAfterML.hdf5', __name__, path="../data/")) gmm.weights = bob.io.base.load( datafile('weightsAfterML.hdf5', __name__, path="../data/")) threshold = 0.001 gmm.set_variance_thresholds(threshold) # Test against the model score_mean_ref = -1.50379e+06 score = 0. for v in ar: score += gmm(v) score /= len(ar) # Compare current results to torch3vision assert abs(score - score_mean_ref) / score_mean_ref < 1e-4
def test_ISVTrainAndEnrol(): # Train and enroll an 'ISVMachine' eps = 1e-10 d_ref = numpy.array([0.39601136, 0.07348469, 0.47712682, 0.44738127, 0.43179856, 0.45086029], 'float64') u_ref = numpy.array([[0.855125642430777, 0.563104284748032], [-0.325497865404680, 1.923598985291687], [0.511575659503837, 1.964288663083095], [9.330165761678115, 1.073623827995043], [0.511099245664012, 0.278551249248978], [5.065578541930268, 0.509565618051587]], 'float64') z_ref = numpy.array([-0.079315777443826, 0.092702428248543, -0.342488761656616, -0.059922635809136 , 0.133539981073604, 0.213118695516570], 'float64') # Calls the train function ubm = GMMMachine(2,3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR mb = ISVBase(ubm,2) t = ISVTrainer(4.) t.initialize(mb, TRAINING_STATS) mb.u = M_u for i in range(10): t.e_step(mb, TRAINING_STATS) t.m_step(mb) assert numpy.allclose(mb.d, d_ref, eps) assert numpy.allclose(mb.u, u_ref, eps) # Calls the enroll function m = ISVMachine(mb) Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2,2)) Fe = numpy.array([0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345, 0.2463, 0.4789, 0.5236]).reshape((6,2)) gse1 = GMMStats(2,3) gse1.n = Ne[:,0] gse1.sum_px = Fe[:,0].reshape(2,3) gse2 = GMMStats(2,3) gse2.n = Ne[:,1] gse2.sum_px = Fe[:,1].reshape(2,3) gse = [gse1, gse2] t.enroll(m, gse, 5) assert numpy.allclose(m.z, z_ref, eps) #Testing exceptions nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.e_step, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.e_step, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1,2,2]],5)
def test_gmm_test(): # Tests a GMMMachine by computing scores against a model and comparing to a reference ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) # Initialize GMMMachine n_gaussians = 5 gmm = GMMMachine(n_gaussians) gmm.means = load_array( resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) gmm.variances = load_array( resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) gmm.weights = load_array( resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) threshold = 0.001 gmm.variance_thresholds = threshold # Test against the model score_mean_ref = -1.50379e06 for transform in (to_numpy, to_dask_array): ar = transform(ar) score = gmm.log_likelihood(ar).sum() score /= len(ar) # Compare current results to torch3vision assert abs(score - score_mean_ref) / score_mean_ref < 1e-4
def convert_gmm(gmm): """ Converts rasr_cache.MixtureFile to bob.em.learn.GMMachine :param gmm: (MixtureFile) :return: (GMMachine) """ ubm = GMMMachine(gmm.nMeans, gmm.dim) tmp_m = np.ndarray((gmm.nMeans, gmm.dim)) tmp_c = np.ndarray((gmm.nCovs, gmm.dim)) for i in range(gmm.nMeans): tmp_m[i, :] = np.array(gmm.getMeanByIdx(i)) tmp_c[i, :] = np.array( gmm.getCovByIdx(0) ) # TODO figure out to generate same number of covariances as means ubm.means = tmp_m ubm.variances = tmp_c return ubm
def test_gmm_MAP_1(): # Train a GMMMachine with MAP_GMMTrainer ar = load_array( resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) # test with rng gmmprior = GMMMachine.from_hdf5( HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r")) gmm = GMMMachine.from_hdf5( HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"), ubm=gmmprior, ) gmm.update_means = True gmm.update_variances = False gmm.update_weights = False rng = np.random.RandomState(seed=12345) gmm.random_state = rng gmm = gmm.fit(ar) gmmprior = GMMMachine.from_hdf5( HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r")) gmm = GMMMachine.from_hdf5( HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"), ubm=gmmprior, ) gmm.update_means = True gmm.update_variances = False gmm.update_weights = False # Generate reference # gmm.save(HDF5File(resource_filename("bob.learn.em", "data/gmm_MAP.hdf5"), "w")) gmm_ref = GMMMachine.from_hdf5( HDF5File(resource_filename("bob.learn.em", "data/gmm_MAP.hdf5"), "r")) for transform in (to_numpy, to_dask_array): ar = transform(ar) gmm = gmm.fit(ar) np.testing.assert_almost_equal(gmm.means, gmm_ref.means, decimal=3) np.testing.assert_almost_equal(gmm.variances, gmm_ref.variances, decimal=3) np.testing.assert_almost_equal(gmm.weights, gmm_ref.weights, decimal=3)
def test_GMMMachine_4(): import numpy numpy.random.seed(3) # FIXING A SEED data = numpy.random.rand(100,50) #Doesn't matter if it is ramdom. The average of 1D array (in python) MUST output the same result for the 2D array (in C++) gmm = GMMMachine(2, 50) gmm.weights = bob.io.base.load(datafile('weights.hdf5', __name__, path="../data/")) gmm.means = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/")) gmm.variances = bob.io.base.load(datafile('variances.hdf5', __name__, path="../data/")) ll = 0 for i in range(data.shape[0]): ll += gmm(data[i,:]) ll /= data.shape[0] assert ll==gmm(data)
def test_gmm_kmeans_plusplus_init(): n_gaussians = 3 machine = GMMMachine( n_gaussians, k_means_trainer=KMeansMachine(n_clusters=n_gaussians, init_method="k-means++"), ) data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]]) for transform in (to_numpy, to_dask_array): data = transform(data) machine = machine.fit(data) expected_means = np.array([[2.25, 2.25], [-1.25, 0.25], [1.25, 1.25]]) expected_variances = np.array([[1 / 16, 1 / 16], [1 / 16, 1 / 16], [1 / 16, 1 / 16]]) np.testing.assert_almost_equal(machine.means, expected_means, decimal=3) np.testing.assert_almost_equal(machine.variances, expected_variances)
def test_gmm_MAP_2(): # Train a GMMMachine with MAP_GMMTrainer and compare with matlab reference data = bob.io.base.load(datafile('data.hdf5', __name__, path="../data/")) data = data.reshape((1, data.shape[0])) # make a 2D array out of it means = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/")) variances = bob.io.base.load( datafile('variances.hdf5', __name__, path="../data/")) weights = bob.io.base.load( datafile('weights.hdf5', __name__, path="../data/")) gmm = GMMMachine(2, 50) gmm.means = means gmm.variances = variances gmm.weights = weights map_adapt = MAP_GMMTrainer(update_means=True, update_variances=False, update_weights=False, mean_var_update_responsibilities_threshold=0., prior_gmm=gmm, relevance_factor=4.) gmm_adapted = GMMMachine(2, 50) gmm_adapted.means = means gmm_adapted.variances = variances gmm_adapted.weights = weights #map_adapt.max_iterations = 1 #map_adapt.train(gmm_adapted, data) bob.learn.em.train(map_adapt, gmm_adapted, data, max_iterations=1) new_means = bob.io.base.load( datafile('new_adapted_mean.hdf5', __name__, path="../data/")) # print new_means[0,:] # print gmm_adapted.means[:,0] # Compare to matlab reference assert equals(new_means[0, :], gmm_adapted.means[:, 0], 1e-4) assert equals(new_means[1, :], gmm_adapted.means[:, 1], 1e-4)
def test_gmm_MAP_3(): # Train a GMMMachine with MAP_GMMTrainer; compares to old reference ar = bob.io.base.load(datafile('dataforMAP.hdf5', __name__, path="../data/")) # Initialize GMMMachine n_gaussians = 5 n_inputs = 45 prior_gmm = GMMMachine(n_gaussians, n_inputs) prior_gmm.means = bob.io.base.load(datafile('meansAfterML.hdf5', __name__, path="../data/")) prior_gmm.variances = bob.io.base.load(datafile('variancesAfterML.hdf5', __name__, path="../data/")) prior_gmm.weights = bob.io.base.load(datafile('weightsAfterML.hdf5', __name__, path="../data/")) threshold = 0.001 prior_gmm.set_variance_thresholds(threshold) # Initialize MAP Trainer relevance_factor = 0.1 prior = 0.001 max_iter_gmm = 1 accuracy = 0.00001 map_factor = 0.5 map_gmmtrainer = MAP_GMMTrainer(prior_gmm, alpha=map_factor, update_means=True, update_variances=False, update_weights=False, mean_var_update_responsibilities_threshold=accuracy) #map_gmmtrainer.max_iterations = max_iter_gmm #map_gmmtrainer.convergence_threshold = accuracy gmm = GMMMachine(n_gaussians, n_inputs) gmm.set_variance_thresholds(threshold) # Train #map_gmmtrainer.train(gmm, ar) bob.learn.em.train(map_gmmtrainer, gmm, ar, max_iterations = max_iter_gmm, convergence_threshold=prior) # Test results # Load torch3vision reference meansMAP_ref = bob.io.base.load(datafile('meansAfterMAP.hdf5', __name__, path="../data/")) variancesMAP_ref = bob.io.base.load(datafile('variancesAfterMAP.hdf5', __name__, path="../data/")) weightsMAP_ref = bob.io.base.load(datafile('weightsAfterMAP.hdf5', __name__, path="../data/")) # Compare to current results # Gaps are quite large. This might be explained by the fact that there is no # adaptation of a given Gaussian in torch3 when the corresponding responsibilities # are below the responsibilities threshold assert equals(gmm.means, meansMAP_ref, 2e-1) assert equals(gmm.variances, variancesMAP_ref, 1e-4) assert equals(gmm.weights, weightsMAP_ref, 1e-4)
def test_gmm_test(): # Tests a GMMMachine by computing scores against a model and compare to # an old reference ar = bob.io.base.load(datafile('dataforMAP.hdf5', __name__, path="../data/")) # Initialize GMMMachine n_gaussians = 5 n_inputs = 45 gmm = GMMMachine(n_gaussians, n_inputs) gmm.means = bob.io.base.load(datafile('meansAfterML.hdf5', __name__, path="../data/")) gmm.variances = bob.io.base.load(datafile('variancesAfterML.hdf5', __name__, path="../data/")) gmm.weights = bob.io.base.load(datafile('weightsAfterML.hdf5', __name__, path="../data/")) threshold = 0.001 gmm.set_variance_thresholds(threshold) # Test against the model score_mean_ref = -1.50379e+06 score = 0. for v in ar: score += gmm(v) score /= len(ar) # Compare current results to torch3vision assert abs(score-score_mean_ref)/score_mean_ref<1e-4
def test_GMMMachine_2(): # Test a GMMMachine (statistics) arrayset = bob.io.base.load(datafile("faithful.torch3_f64.hdf5", __name__, path="../data/")) gmm = GMMMachine(2, 2) gmm.weights = numpy.array([0.5, 0.5], 'float64') gmm.means = numpy.array([[3, 70], [4, 72]], 'float64') gmm.variances = numpy.array([[1, 10], [2, 5]], 'float64') gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') stats = GMMStats(2, 2) gmm.acc_statistics(arrayset, stats) stats_ref = GMMStats(bob.io.base.HDF5File(datafile("stats.hdf5",__name__, path="../data/"))) assert stats.t == stats_ref.t assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10) #assert numpy.array_equal(stats.sumPx, stats_ref.sumPx) #Note AA: precision error above assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10) assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10)
def test_gmm_ML_2(): # Trains a GMMMachine with ML_GMMTrainer; compares to an old reference ar = bob.io.base.load(datafile('dataNormalized.hdf5', __name__, path="../data/")) # Initialize GMMMachine gmm = GMMMachine(5, 45) gmm.means = bob.io.base.load(datafile('meansAfterKMeans.hdf5', __name__, path="../data/")).astype('float64') gmm.variances = bob.io.base.load(datafile('variancesAfterKMeans.hdf5', __name__, path="../data/")).astype('float64') gmm.weights = numpy.exp(bob.io.base.load(datafile('weightsAfterKMeans.hdf5', __name__, path="../data/")).astype('float64')) threshold = 0.001 gmm.set_variance_thresholds(threshold) # Initialize ML Trainer prior = 0.001 max_iter_gmm = 25 accuracy = 0.00001 ml_gmmtrainer = ML_GMMTrainer(True, True, True, prior) # Run ML #ml_gmmtrainer.train(gmm, ar) bob.learn.em.train(ml_gmmtrainer, gmm, ar, max_iterations = max_iter_gmm, convergence_threshold=accuracy) # Test results # Load torch3vision reference meansML_ref = bob.io.base.load(datafile('meansAfterML.hdf5', __name__, path="../data/")) variancesML_ref = bob.io.base.load(datafile('variancesAfterML.hdf5', __name__, path="../data/")) weightsML_ref = bob.io.base.load(datafile('weightsAfterML.hdf5', __name__, path="../data/")) # Compare to current results assert equals(gmm.means, meansML_ref, 3e-3) assert equals(gmm.variances, variancesML_ref, 3e-3) assert equals(gmm.weights, weightsML_ref, 1e-4)
def test_JFAMachine(): # Creates a UBM weights = numpy.array([0.4, 0.6], 'float64') means = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64') variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64') ubm = GMMMachine(2,3) ubm.weights = weights ubm.means = means ubm.variances = variances # Creates a JFABase U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64') V = numpy.array([[6, 5], [4, 3], [2, 1], [1, 2], [3, 4], [5, 6]], 'float64') d = numpy.array([0, 1, 0, 1, 0, 1], 'float64') base = JFABase(ubm,2,2) base.u = U base.v = V base.d = d # Creates a JFAMachine y = numpy.array([1,2], 'float64') z = numpy.array([3,4,1,2,0,1], 'float64') m = JFAMachine(base) m.y = y m.z = z n_gaussians,dim,ru,rv = m.shape supervector_length = m.supervector_length assert n_gaussians == 2 assert dim == 3 assert supervector_length == 6 assert ru == 2 assert rv == 2 assert (m.y == y).all() assert (m.z == z).all() # Saves and loads filename = str(tempfile.mkstemp(".hdf5")[1]) m.save(bob.io.base.HDF5File(filename, 'w')) m_loaded = JFAMachine(bob.io.base.HDF5File(filename)) m_loaded.jfa_base = base assert m == m_loaded assert (m != m_loaded) is False assert m.is_similar_to(m_loaded) # Copy constructor mc = JFAMachine(m) assert m == mc # Variant #mv = JFAMachine() # Checks for correctness #mv.jfa_base = base #m.y = y #m.z = z #assert m.dim_c == 2 #assert m.dim_d == 3 #assert m.dim_cd == 6 #assert m.dim_ru == 2 #assert m.dim_rv == 2 #assert (m.y == y).all() #assert (m.z == z).all() # Defines GMMStats gs = GMMStats(2,3) log_likelihood = -3. T = 1 n = numpy.array([0.4, 0.6], 'float64') sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64') sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64') gs.log_likelihood = log_likelihood gs.t = T gs.n = n gs.sum_px = sumpx gs.sum_pxx = sumpxx # Forward GMMStats and check estimated value of the x speaker factor eps = 1e-10 x_ref = numpy.array([0.291042849767692, 0.310273618998444], 'float64') score_ref = -2.111577181208289 score = m.log_likelihood(gs) assert numpy.allclose(m.x, x_ref, eps) assert abs(score_ref-score) < eps # x and Ux x = numpy.ndarray((2,), numpy.float64) m.estimate_x(gs, x) n_gaussians, dim,_,_ = m.shape x_py = estimate_x(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx) assert numpy.allclose(x, x_py, eps) ux = numpy.ndarray((6,), numpy.float64) m.estimate_ux(gs, ux) n_gaussians, dim,_,_ = m.shape ux_py = estimate_ux(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx) assert numpy.allclose(ux, ux_py, eps) assert numpy.allclose(m.x, x, eps) score = m.forward_ux(gs, ux) assert abs(score_ref-score) < eps # Clean-up os.unlink(filename)
def test_JFATrainAndEnrol(): # Train and enroll a JFAMachine # Calls the train function ubm = GMMMachine(2,3) ubm.mean_supervector = UBM_MEAN ubm.variance_supervector = UBM_VAR mb = JFABase(ubm, 2, 2) t = JFATrainer() t.initialize(mb, TRAINING_STATS) mb.u = M_u mb.v = M_v mb.d = M_d bob.learn.em.train_jfa(t,mb, TRAINING_STATS, initialize=False) v_ref = numpy.array([[0.245364911936476, 0.978133261775424], [0.769646805052223, 0.940070736856596], [0.310779202800089, 1.456332053893072], [0.184760934399551, 2.265139705602147], [0.701987784039800, 0.081632150899400], [0.074344030229297, 1.090248340917255]], 'float64') u_ref = numpy.array([[0.049424652628448, 0.060480486336896], [0.178104127464007, 1.884873813495153], [1.204011484266777, 2.281351307871720], [7.278512126426286, -0.390966087173334], [-0.084424326581145, -0.081725474934414], [4.042143689831097, -0.262576386580701]], 'float64') d_ref = numpy.array([9.648467e-18, 2.63720683155e-12, 2.11822157653706e-10, 9.1047243e-17, 1.41163442535567e-10, 3.30581e-19], 'float64') eps = 1e-10 assert numpy.allclose(mb.v, v_ref, eps) assert numpy.allclose(mb.u, u_ref, eps) assert numpy.allclose(mb.d, d_ref, eps) # Calls the enroll function m = JFAMachine(mb) Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2,2)) Fe = numpy.array([0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345, 0.2463, 0.4789, 0.5236]).reshape((6,2)) gse1 = GMMStats(2,3) gse1.n = Ne[:,0] gse1.sum_px = Fe[:,0].reshape(2,3) gse2 = GMMStats(2,3) gse2.n = Ne[:,1] gse2.sum_px = Fe[:,1].reshape(2,3) gse = [gse1, gse2] t.enroll(m, gse, 5) y_ref = numpy.array([0.555991469319657, 0.002773650670010], 'float64') z_ref = numpy.array([8.2228e-20, 3.15216909492e-13, -1.48616735364395e-10, 1.0625905e-17, 3.7150503117895e-11, 1.71104e-19], 'float64') assert numpy.allclose(m.y, y_ref, eps) assert numpy.allclose(m.z, z_ref, eps) #Testing exceptions nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [1,2,2]) nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [[1,2,2]]) nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1,2,2]],5)
def test_LinearScoring(): ubm = GMMMachine(2, 2) ubm.weights = numpy.array([0.5, 0.5], 'float64') ubm.means = numpy.array([[3, 70], [4, 72]], 'float64') ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64') ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') model1 = GMMMachine(2, 2) model1.weights = numpy.array([0.5, 0.5], 'float64') model1.means = numpy.array([[1, 2], [3, 4]], 'float64') model1.variances = numpy.array([[9, 10], [11, 12]], 'float64') model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') model2 = GMMMachine(2, 2) model2.weights = numpy.array([0.5, 0.5], 'float64') model2.means = numpy.array([[5, 6], [7, 8]], 'float64') model2.variances = numpy.array([[13, 14], [15, 16]], 'float64') model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') stats1 = GMMStats(2, 2) stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64') stats1.n = numpy.array([1, 2], 'float64') stats1.t = 1+2 stats2 = GMMStats(2, 2) stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64') stats2.n = numpy.array([3, 4], 'float64') stats2.t = 3+4 stats3 = GMMStats(2, 2) stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64') stats3.n = numpy.array([3, 4], 'float64') stats3.t = 3+4 test_channeloffset = [numpy.array([9, 8, 7, 6], 'float64'), numpy.array([5, 4, 3, 2], 'float64'), numpy.array([1, 0, 1, 2], 'float64')] # Reference scores (from Idiap internal matlab implementation) ref_scores_00 = numpy.array([[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64') ref_scores_01 = numpy.array( [[790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64') ref_scores_10 = numpy.array([[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64') ref_scores_11 = numpy.array([[871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64') # 1/ Use GMMMachines # 1/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) assert (abs(scores - ref_scores_00) < 1e-7).all() # 1/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], [], True) assert (abs(scores - ref_scores_01) < 1e-7).all() #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True) #assert (abs(scores - ref_scores_01) < 1e-7).all() #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True) #assert (abs(scores - ref_scores_01) < 1e-7).all() # 1/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset) assert (abs(scores - ref_scores_10) < 1e-7).all() # 1/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, True) assert (abs(scores - ref_scores_11) < 1e-7).all() # 2/ Use mean/variance supervectors # 2/a/ Without test_channelOffset, without frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3]) assert (abs(scores - ref_scores_00) < 1e-7).all() # 2/b/ Without test_channelOffset, with frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], [], True) assert (abs(scores - ref_scores_01) < 1e-7).all() # 2/c/ With test_channelOffset, without frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset) assert (abs(scores - ref_scores_10) < 1e-7).all() # 2/d/ With test_channelOffset, with frame-length normalisation scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset, True) assert (abs(scores - ref_scores_11) < 1e-7).all() # 3/ Using single model/sample # 3/a/ without frame-length normalisation score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) assert abs(score - ref_scores_10[0,0]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) assert abs(score - ref_scores_10[0,1]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) assert abs(score - ref_scores_10[0,2]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) assert abs(score - ref_scores_10[1,0]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) assert abs(score - ref_scores_10[1,1]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) assert abs(score - ref_scores_10[1,2]) < 1e-7 # 3/b/ without frame-length normalisation score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) assert abs(score - ref_scores_11[0,0]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) assert abs(score - ref_scores_11[0,1]) < 1e-7 score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) assert abs(score - ref_scores_11[0,2]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) assert abs(score - ref_scores_11[1,0]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) assert abs(score - ref_scores_11[1,1]) < 1e-7 score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) assert abs(score - ref_scores_11[1,2]) < 1e-7
def test_GMMMachine_1(): # Test a GMMMachine basic features weights = numpy.array([0.5, 0.5], 'float64') weights2 = numpy.array([0.6, 0.4], 'float64') means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64') means2 = numpy.array([[3, 7, 0], [4, 72, 0]], 'float64') variances = numpy.array([[1, 10, 1], [2, 5, 2]], 'float64') variances2 = numpy.array([[10, 10, 1], [2, 5, 2]], 'float64') varianceThresholds = numpy.array([[0, 0, 0], [0, 0, 0]], 'float64') varianceThresholds2 = numpy.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], 'float64') # Initializes a GMMMachine gmm = GMMMachine(2,3) # Sets the weights, means, variances and varianceThresholds and # Checks correctness gmm.weights = weights gmm.means = means gmm.variances = variances gmm.variance_thresholds = varianceThresholds assert gmm.shape == (2,3) assert (gmm.weights == weights).all() assert (gmm.means == means).all() assert (gmm.variances == variances).all() assert (gmm.variance_thresholds == varianceThresholds).all() # Checks supervector-like accesses assert (gmm.mean_supervector == means.reshape(means.size)).all() assert (gmm.variance_supervector == variances.reshape(variances.size)).all() newMeans = numpy.array([[3, 70, 2], [4, 72, 2]], 'float64') newVariances = numpy.array([[1, 1, 1], [2, 2, 2]], 'float64') # Checks particular varianceThresholds-related methods varianceThresholds1D = numpy.array([0.3, 1, 0.5], 'float64') gmm.set_variance_thresholds(varianceThresholds1D) assert (gmm.variance_thresholds[0,:] == varianceThresholds1D).all() assert (gmm.variance_thresholds[1,:] == varianceThresholds1D).all() gmm.set_variance_thresholds(0.005) assert (gmm.variance_thresholds == 0.005).all() # Checks Gaussians access gmm.means = newMeans gmm.variances = newVariances assert (gmm.get_gaussian(0).mean == newMeans[0,:]).all() assert (gmm.get_gaussian(1).mean == newMeans[1,:]).all() assert (gmm.get_gaussian(0).variance == newVariances[0,:]).all() assert (gmm.get_gaussian(1).variance == newVariances[1,:]).all() # Checks resize gmm.resize(4,5) assert gmm.shape == (4,5) # Checks comparison gmm2 = GMMMachine(gmm) gmm3 = GMMMachine(2,3) gmm3.weights = weights2 gmm3.means = means gmm3.variances = variances #gmm3.varianceThresholds = varianceThresholds gmm4 = GMMMachine(2,3) gmm4.weights = weights gmm4.means = means2 gmm4.variances = variances #gmm4.varianceThresholds = varianceThresholds gmm5 = GMMMachine(2,3) gmm5.weights = weights gmm5.means = means gmm5.variances = variances2 #gmm5.varianceThresholds = varianceThresholds gmm6 = GMMMachine(2,3) gmm6.weights = weights gmm6.means = means gmm6.variances = variances #gmm6.varianceThresholds = varianceThresholds2 assert gmm == gmm2 assert (gmm != gmm2) is False assert gmm.is_similar_to(gmm2) assert gmm != gmm3 assert (gmm == gmm3) is False assert gmm.is_similar_to(gmm3) is False assert gmm != gmm4 assert (gmm == gmm4) is False assert gmm.is_similar_to(gmm4) is False assert gmm != gmm5 assert (gmm == gmm5) is False assert gmm.is_similar_to(gmm5) is False assert gmm != gmm6 assert (gmm == gmm6) is False assert gmm.is_similar_to(gmm6) is False
def test_ISVMachine(): # Creates a UBM weights = numpy.array([0.4, 0.6], 'float64') means = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64') variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64') ubm = GMMMachine(2,3) ubm.weights = weights ubm.means = means ubm.variances = variances # Creates a ISVBaseMachine U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64') #V = numpy.array([[0], [0], [0], [0], [0], [0]], 'float64') d = numpy.array([0, 1, 0, 1, 0, 1], 'float64') base = ISVBase(ubm,2) base.u = U #base.v = V base.d = d # Creates a JFAMachine z = numpy.array([3,4,1,2,0,1], 'float64') m = ISVMachine(base) m.z = z n_gaussians,dim,ru = m.shape supervector_length = m.supervector_length assert n_gaussians == 2 assert dim == 3 assert supervector_length == 6 assert ru == 2 assert (m.z == z).all() # Saves and loads filename = str(tempfile.mkstemp(".hdf5")[1]) m.save(bob.io.base.HDF5File(filename, 'w')) m_loaded = ISVMachine(bob.io.base.HDF5File(filename)) m_loaded.isv_base = base assert m == m_loaded assert (m != m_loaded) is False assert m.is_similar_to(m_loaded) # Copy constructor mc = ISVMachine(m) assert m == mc # Variant mv = ISVMachine(base) # Checks for correctness #mv.isv_base = base m.z = z n_gaussians,dim,ru = m.shape supervector_length = m.supervector_length assert n_gaussians == 2 assert dim == 3 assert supervector_length == 6 assert ru == 2 assert (m.z == z).all() # Defines GMMStats gs = GMMStats(2,3) log_likelihood = -3. T = 1 n = numpy.array([0.4, 0.6], 'float64') sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64') sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64') gs.log_likelihood = log_likelihood gs.t = T gs.n = n gs.sum_px = sumpx gs.sum_pxx = sumpxx # Forward GMMStats and check estimated value of the x speaker factor eps = 1e-10 x_ref = numpy.array([0.291042849767692, 0.310273618998444], 'float64') score_ref = -3.280498193082100 score = m(gs) assert numpy.allclose(m.x, x_ref, eps) assert abs(score_ref-score) < eps # Check using alternate forward() method supervector_length = m.supervector_length Ux = numpy.ndarray(shape=(supervector_length,), dtype=numpy.float64) m.estimate_ux(gs, Ux) score = m.forward_ux(gs, Ux) assert abs(score_ref-score) < eps # x and Ux x = numpy.ndarray((2,), numpy.float64) m.estimate_x(gs, x) n_gaussians,dim,_ = m.shape x_py = estimate_x(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx) assert numpy.allclose(x, x_py, eps) ux = numpy.ndarray((6,), numpy.float64) m.estimate_ux(gs, ux) n_gaussians,dim,_ = m.shape ux_py = estimate_ux(n_gaussians, dim, ubm.mean_supervector, ubm.variance_supervector, U, n, sumpx) assert numpy.allclose(ux, ux_py, eps) assert numpy.allclose(m.x, x, eps) score = m.forward_ux(gs, ux) assert abs(score_ref-score) < eps # Clean-up os.unlink(filename)