def _estimate_log_prob(self, X): # TODO: Smarter solution than repeating n_components times? return _estimate_log_gaussian_prob( self.y_sub, self.means_, self.precisions_cholesky_, self.covariance_type) \ + 1/self.n_components*np.repeat(_estimate_log_gaussian_prob_forward_part(X, self.y_sub, self.forward_model, self.noise), self.n_components, axis=1)
def obj_spatial_error_sum_and_naturalness_jac(s, data): """ jacobian of error function. It is a combination of analytic solution for motion primitive model and numerical solution for kinematic error """ # Extract relevant parameters from data tuple. # Note other parameters are used for calling obj_error_sum gmm = data[0].get_gaussian_mixture_model() error_scale = data[-1] quality_scale = data[-2] logLikelihoods = _estimate_log_gaussian_prob(s, gmm.means_, gmm.precisions_cholesky_, 'full') logLikelihoods = np.ravel(logLikelihoods) numerator = 0 n_models = len(gmm.weights_) for i in range(n_models): numerator += np.exp(logLikelihoods[i]) * gmm.weights_[i] * np.dot( np.linalg.inv(gmm.covars_[i]), (s - gmm.means_[i])) denominator = np.exp(gmm.score([s])[0]) logLikelihood_jac = numerator / denominator kinematic_jac = approx_fprime( s, obj_spatial_error_sum, 1e-7, data[-2:]) # ignore the kinematic factor and quality factor jac = logLikelihood_jac * quality_scale + kinematic_jac * error_scale return jac
def test_gaussian_mixture_log_probabilities(): from sklearn.mixture.gaussian_mixture import _estimate_log_gaussian_prob # test against with _naive_lmvnpdf_diag rng = np.random.RandomState(0) rand_data = RandomData(rng) n_samples = 500 n_features = rand_data.n_features n_components = rand_data.n_components means = rand_data.means covars_diag = rng.rand(n_components, n_features) X = rng.rand(n_samples, n_features) log_prob_naive = _naive_lmvnpdf_diag(X, means, covars_diag) # full covariances precs_full = np.array([np.diag(1. / np.sqrt(x)) for x in covars_diag]) log_prob = _estimate_log_gaussian_prob(X, means, precs_full, 'full') assert_array_almost_equal(log_prob, log_prob_naive) # diag covariances precs_chol_diag = 1. / np.sqrt(covars_diag) log_prob = _estimate_log_gaussian_prob(X, means, precs_chol_diag, 'diag') assert_array_almost_equal(log_prob, log_prob_naive) # tied covars_tied = np.array([x for x in covars_diag]).mean(axis=0) precs_tied = np.diag(np.sqrt(1. / covars_tied)) log_prob_naive = _naive_lmvnpdf_diag(X, means, [covars_tied] * n_components) log_prob = _estimate_log_gaussian_prob(X, means, precs_tied, 'tied') assert_array_almost_equal(log_prob, log_prob_naive) # spherical covars_spherical = covars_diag.mean(axis=1) precs_spherical = 1. / np.sqrt(covars_diag.mean(axis=1)) log_prob_naive = _naive_lmvnpdf_diag(X, means, [[k] * n_features for k in covars_spherical]) log_prob = _estimate_log_gaussian_prob(X, means, precs_spherical, 'spherical') assert_array_almost_equal(log_prob, log_prob_naive)
def test_gaussian_mixture_log_probabilities(): from sklearn.mixture.gaussian_mixture import _estimate_log_gaussian_prob # test aginst with _naive_lmvnpdf_diag rng = np.random.RandomState(0) rand_data = RandomData(rng) n_samples = 500 n_features = rand_data.n_features n_components = rand_data.n_components means = rand_data.means covars_diag = rng.rand(n_components, n_features) X = rng.rand(n_samples, n_features) log_prob_naive = _naive_lmvnpdf_diag(X, means, covars_diag) # full covariances precs_full = np.array([np.diag(1. / np.sqrt(x)) for x in covars_diag]) log_prob = _estimate_log_gaussian_prob(X, means, precs_full, 'full') assert_array_almost_equal(log_prob, log_prob_naive) # diag covariances precs_chol_diag = 1. / np.sqrt(covars_diag) log_prob = _estimate_log_gaussian_prob(X, means, precs_chol_diag, 'diag') assert_array_almost_equal(log_prob, log_prob_naive) # tied covars_tied = np.array([x for x in covars_diag]).mean(axis=0) precs_tied = np.diag(np.sqrt(1. / covars_tied)) log_prob_naive = _naive_lmvnpdf_diag(X, means, [covars_tied] * n_components) log_prob = _estimate_log_gaussian_prob(X, means, precs_tied, 'tied') assert_array_almost_equal(log_prob, log_prob_naive) # spherical covars_spherical = covars_diag.mean(axis=1) precs_spherical = 1. / np.sqrt(covars_diag.mean(axis=1)) log_prob_naive = _naive_lmvnpdf_diag(X, means, [[k] * n_features for k in covars_spherical]) log_prob = _estimate_log_gaussian_prob(X, means, precs_spherical, 'spherical') assert_array_almost_equal(log_prob, log_prob_naive)
def log_likelihood_jac(s, gmm): logLikelihoods = _estimate_log_gaussian_prob(s, gmm.means_, gmm.precisions_cholesky_, 'full') logLikelihoods = np.ravel(logLikelihoods) numerator = 0 n_models = len(gmm.weights) for i in range(n_models): numerator += np.exp(logLikelihoods[i]) * gmm.weights[i] * np.dot( np.linalg.inv(gmm.covars[i]), (s - gmm.means[i])) denominator = np.exp(gmm.score(s)) if denominator != 0: return numerator / denominator else: return np.ones(s.shape)
def test_hand_computation_of_log_prob_vs_sklearn(self): """ Something seems wrong with my mahadist computation. Before digging further into the C library to find the error, I want to make sure that the results I think it should give are right. One way to gather evidence in favor of this conclusion is to use the result in the computation of the log probability (this is what led me here in the first place). This test does so, and consequently doesn't actually test any of the code in gmm.py. For this to work the mask must be entirely shared. """ cov_type = 'spherical' rs = np.random.RandomState(10) gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=3, num_feat_shared=3, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type, random_state=rs) gmm.fit_sparsifier(X=self.td.X) means = rs.rand(gmm.n_components, gmm.num_feat_full) covariances = rs.rand(gmm.n_components) precisions = _compute_precision_cholesky(covariances, cov_type) # this is where we need the mask to be shared, so that all mask rows # equal mask[0] masked_means = means[:, gmm.mask[0]] log_prob_true = _estimate_log_gaussian_prob(gmm.RHDX, masked_means, precisions, cov_type) log_prob_test = np.zeros((gmm.num_samp, gmm.n_components)) for data_ind in range(gmm.num_samp): for comp_ind in range(gmm.n_components): test_const = gmm.num_feat_comp * np.log(2 * np.pi) test_logdet = gmm.num_feat_comp * np.log(covariances[comp_ind]) test_mahadist = 1/covariances[comp_ind] * \ np.linalg.norm(gmm.RHDX[data_ind] - means[comp_ind][gmm.mask[data_ind]])**2 log_prob_test[data_ind, comp_ind] = -.5*(test_const + \ test_logdet + test_mahadist) self.assertArrayEqual(log_prob_test, log_prob_true)
def test__estimate_log_prob_resp_spherical_shared_compression(self): rs = np.random.RandomState(11) cov_type = 'spherical' gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=3, num_feat_shared=3, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type, random_state=rs) gmm.fit_sparsifier(X=self.td.X) means = rs.rand(gmm.n_components, gmm.num_feat_full) covariances = rs.rand(gmm.n_components) weights = rs.rand(gmm.n_components) weights /= weights.sum() log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp( weights, means, covariances, cov_type) # find skl's values, pretty ugly to do. precisions = _compute_precision_cholesky(covariances, cov_type) gmm_skl = GMSKL(n_components=3, covariance_type=cov_type) # we need the mask to be shared so that we can use mask[0] on all means gmm_skl.means_ = means[:, gmm.mask[0]] gmm_skl.precisions_cholesky_ = precisions gmm_skl.weights_ = weights gmm_skl.covariance_type_ = cov_type log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp( gmm.RHDX) # if anything is bad later this overwrite with mean seems suspect: log_prob_norm_true = log_prob_norm_true.mean() # now get the log_prob from another function log_prob_true = _estimate_log_gaussian_prob(gmm.RHDX, gmm_skl.means_, precisions, cov_type) # run the tests self.assertArrayEqual(log_prob_test, log_prob_true) self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test) self.assertArrayEqual(log_resp_true, log_resp_test)
def test__compute_log_prob_diagonal_no_compression(self): """ Compare the log_prob computation to that of sklearn with no compression. Implemented as a precursor to testing it with compression, to follow. Diagonal covariances. """ cov_type = 'diag' gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=5, num_feat_shared=5, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type) gmm.fit_sparsifier(X=self.td.X) means = np.random.rand(gmm.n_components, gmm.num_feat_comp) covariances = np.random.rand(gmm.n_components, gmm.num_feat_comp) log_prob_test = gmm._compute_log_prob(means, covariances, cov_type) precisions = _compute_precision_cholesky(covariances, cov_type) log_prob_true = _estimate_log_gaussian_prob(self.td.X, means, precisions, cov_type) self.assertArrayEqual(log_prob_test, log_prob_true)
def test__estimate_log_prob_resp_diagonal_no_compression(self): cov_type = 'diag' gmm = GaussianMixture(n_components=3, num_feat_full=5, num_feat_comp=5, num_feat_shared=5, num_samp=4, transform=None, mask=None, D_indices=None, covariance_type=cov_type) gmm.fit_sparsifier(X=self.td.X) means = np.random.rand(gmm.n_components, gmm.num_feat_comp) covariances = np.random.rand(gmm.n_components, gmm.num_feat_comp) weights = np.random.rand(gmm.n_components) weights /= weights.sum() log_prob_test, log_resp_test, log_prob_norm_test = gmm._estimate_log_prob_resp( weights, means, covariances, cov_type) # find skl's values, pretty ugly to do. precisions = _compute_precision_cholesky(covariances, cov_type) gmm_skl = GMSKL(n_components=3, covariance_type=cov_type) gmm_skl.means_ = means gmm_skl.precisions_cholesky_ = precisions gmm_skl.weights_ = weights gmm_skl.covariance_type_ = cov_type log_prob_norm_true, log_resp_true = gmm_skl._estimate_log_prob_resp( self.td.X) # if anything is bad later this overwrite with mean seems suspect: log_prob_norm_true = log_prob_norm_true.mean() # now get the log_prob from another function log_prob_true = _estimate_log_gaussian_prob(self.td.X, means, precisions, cov_type) # run the tests self.assertArrayEqual(log_prob_test, log_prob_true) self.assertArrayEqual(log_prob_norm_true, log_prob_norm_test) self.assertArrayEqual(log_resp_true, log_resp_test)
def _multivariate_gaussian_prediction(gmm, X): return np.exp( _estimate_log_gaussian_prob(X, gmm.means_, gmm.precisions_cholesky_, gmm.covariance_type))
def _estimate_log_prob(self, X): return _estimate_log_gaussian_prob(X, self.means_, self.precisions_cholesky_, self.covariance_type)