def test_gmm_test(): # Tests a GMMMachine by computing scores against a model and comparing to a reference ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) # Initialize GMMMachine n_gaussians = 5 gmm = GMMMachine(n_gaussians) gmm.means = load_array( resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) gmm.variances = load_array( resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) gmm.weights = load_array( resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) threshold = 0.001 gmm.variance_thresholds = threshold # Test against the model score_mean_ref = -1.50379e06 for transform in (to_numpy, to_dask_array): ar = transform(ar) score = gmm.log_likelihood(ar).sum() score /= len(ar) # Compare current results to torch3vision assert abs(score - score_mean_ref) / score_mean_ref < 1e-4
def test_GMMMachine_single_ll_vs_multiple(): np.random.seed(3) # FIXING A SEED data = np.random.rand( 100, 50 ) # Doesn't matter if it is random. The average of 1D array (in python) MUST output the same result for the 2D array (in C++) gmm = GMMMachine(n_gaussians=2) gmm.weights = load_array( resource_filename("bob.learn.em", "data/weights.hdf5")) gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) gmm.variances = load_array( resource_filename("bob.learn.em", "data/variances.hdf5")) ll = 0 for i in range(data.shape[0]): ll += gmm.log_likelihood(data[i, :]) ll /= data.shape[0] assert np.isclose(ll, gmm.log_likelihood(data).mean())
def test_likelihood(): data = np.array([[1, 1, 1], [-1, 0, 0], [0, 0, 1], [2, 2, 2]]) n_gaussians = 3 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], 3, 1) machine.variances = np.ones_like(machine.means) for transform in (to_numpy, to_dask_array): data = transform(data) log_likelihood = machine.log_likelihood(data) expected_ll = np.array([ -3.6519900964986527, -3.83151883210222, -3.83151883210222, -5.344374066745753, ]) np.testing.assert_almost_equal(log_likelihood, expected_ll)
def test_likelihood_weight(): data = np.array([[1, 1, 1], [-1, 0, 0], [0, 0, 1], [2, 2, 2]]) n_gaussians = 3 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], 3, 1) machine.variances = np.ones_like(machine.means) machine.weights = [0.6, 0.1, 0.3] for transform in (to_numpy, to_dask_array): data = transform(data) log_likelihood = machine.log_likelihood(data) expected_ll = np.array([ -4.206596356117164, -3.492325679996329, -3.634745457950943, -6.49485678536014, ]) np.testing.assert_almost_equal(log_likelihood, expected_ll)
def test_GMMMachine_ll_computation(): """Test a GMMMachine (log-likelihood computation)""" data = load_array(resource_filename("bob.learn.em", "data/data.hdf5")) gmm = GMMMachine(n_gaussians=2) gmm.weights = load_array( resource_filename("bob.learn.em", "data/weights.hdf5")) gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) gmm.variances = load_array( resource_filename("bob.learn.em", "data/variances.hdf5")) # Compare the log-likelihood with the one obtained using Chris Matlab implementation matlab_ll_ref = -2.361583051672024e02 np.testing.assert_almost_equal(gmm.log_likelihood(data), matlab_ll_ref, decimal=10)
def test_likelihood_variance(): data = np.array([[1, 1, 1], [-1, 0, 0], [0, 0, 1], [2, 2, 2]]) n_gaussians = 3 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], 3, 1) machine.variances = np.array([ [1.1, 1.2, 0.8], [0.2, 0.4, 0.5], [1, 1, 1], ]) for transform in (to_numpy, to_dask_array): data = transform(data) log_likelihood = machine.log_likelihood(data) expected_ll = np.array([ -2.202846959440514, -3.8699524542323793, -4.229029034375473, -6.940892214952679, ]) np.testing.assert_almost_equal(log_likelihood, expected_ll)