def test_map_transformer(): post_data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]]) test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]]) n_gaussians = 2 n_features = 3 prior_machine = GMMMachine(n_gaussians) prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]]) prior_machine.variances = np.ones_like(prior_machine.means) prior_machine.weights = np.array([0.5, 0.5]) machine = GMMMachine( n_gaussians, trainer="map", ubm=prior_machine, update_means=True, update_variances=True, update_weights=True, ) for transform in (to_numpy, to_dask_array): post_data = transform(post_data) machine = machine.fit(post_data) expected_means = np.array([[1.83333333, 1.83333333, 2.0], [7.57142857, 8, 8]]) np.testing.assert_almost_equal(machine.means, expected_means) eps = np.finfo(float).eps expected_vars = np.array([[eps, eps, eps], [eps, eps, eps]]) np.testing.assert_almost_equal(machine.variances, expected_vars) expected_weights = np.array([0.46226415, 0.53773585]) np.testing.assert_almost_equal(machine.weights, expected_weights) stats = machine.acc_stats(test_data) expected_stats = GMMStats(n_gaussians, n_features) expected_stats.init_fields( log_likelihood=-1.3837590691807108e16, t=test_data.shape[0], n=np.array([2, 2], dtype=float), sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float), sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float), ) assert stats.is_similar_to(expected_stats)
def test_ml_transformer(): data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]]) test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]]) n_gaussians = 2 n_features = 3 machine = GMMMachine( n_gaussians, update_means=True, update_variances=True, update_weights=True, ) machine.means = np.array([[2, 2, 2], [8, 8, 8]]) machine.variances = np.ones_like(machine.means) for transform in (to_numpy, to_dask_array): data = transform(data) machine = machine.fit(data) expected_means = np.array([[1.5, 1.5, 2.0], [7.0, 8.0, 8.0]]) np.testing.assert_almost_equal(machine.means, expected_means) expected_weights = np.array([2 / 5, 3 / 5]) np.testing.assert_almost_equal(machine.weights, expected_weights) eps = np.finfo(float).eps expected_variances = np.array([[1 / 4, 1 / 4, eps], [eps, 2 / 3, 2 / 3]]) np.testing.assert_almost_equal(machine.variances, expected_variances) stats = machine.acc_stats(test_data) expected_stats = GMMStats(n_gaussians, n_features) expected_stats.init_fields( log_likelihood=-6755399441055685.0, t=test_data.shape[0], n=np.array([2, 2], dtype=float), sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float), sum_pxx=np.array([[2, 2, 5], [128, 145, 145]], dtype=float), ) assert stats.is_similar_to(expected_stats)
def test_GMMStats(): # Test a GMMStats # Initializes a GMMStats n_gaussians = 2 n_features = 3 gs = GMMStats(n_gaussians, n_features) log_likelihood = -3.0 T = 57 n = np.array([4.37, 5.31], "float64") sumpx = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64") sumpxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64") gs.log_likelihood = log_likelihood gs.t = T gs.n = n gs.sum_px = sumpx gs.sum_pxx = sumpxx np.testing.assert_equal(gs.log_likelihood, log_likelihood) np.testing.assert_equal(gs.t, T) np.testing.assert_equal(gs.n, n) np.testing.assert_equal(gs.sum_px, sumpx) np.testing.assert_equal(gs.sum_pxx, sumpxx) np.testing.assert_equal(gs.shape, (n_gaussians, n_features)) # Saves and reads from file using `from_hdf5` filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(HDF5File(filename, "w")) gs_loaded = GMMStats.from_hdf5(HDF5File(filename, "r")) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) assert type(gs_loaded.n_gaussians) is np.int64 assert type(gs_loaded.n_features) is np.int64 assert type(gs_loaded.log_likelihood) is np.float64 # Saves and load from file using `load` filename = str(tempfile.mkstemp(".hdf5")[1]) gs.save(hdf5=HDF5File(filename, "w")) gs_loaded = GMMStats(n_gaussians, n_features) gs_loaded.load(HDF5File(filename, "r")) assert gs == gs_loaded assert (gs != gs_loaded) is False assert gs.is_similar_to(gs_loaded) # Makes them different gs_loaded.t = 58 assert (gs == gs_loaded) is False assert gs != gs_loaded assert not (gs.is_similar_to(gs_loaded)) # Accumulates from another GMMStats gs2 = GMMStats(n_gaussians, n_features) gs2.log_likelihood = log_likelihood gs2.t = T gs2.n = n.copy() gs2.sum_px = sumpx.copy() gs2.sum_pxx = sumpxx.copy() gs2 += gs np.testing.assert_equal(gs2.log_likelihood, 2 * log_likelihood) np.testing.assert_equal(gs2.t, 2 * T) np.testing.assert_almost_equal(gs2.n, 2 * n, decimal=8) np.testing.assert_almost_equal(gs2.sum_px, 2 * sumpx, decimal=8) np.testing.assert_almost_equal(gs2.sum_pxx, 2 * sumpxx, decimal=8) # Re-init and checks for zeros gs_loaded.init_fields() np.testing.assert_equal(gs_loaded.log_likelihood, 0) np.testing.assert_equal(gs_loaded.t, 0) np.testing.assert_equal(gs_loaded.n, np.zeros((n_gaussians, ))) np.testing.assert_equal(gs_loaded.sum_px, np.zeros((n_gaussians, n_features))) np.testing.assert_equal(gs_loaded.sum_pxx, np.zeros((n_gaussians, n_features))) # Resize and checks size assert gs_loaded.shape == (n_gaussians, n_features) gs_loaded.resize(4, 5) assert gs_loaded.shape == (4, 5) assert gs_loaded.sum_px.shape[0] == 4 assert gs_loaded.sum_px.shape[1] == 5 # Clean-up os.unlink(filename)