def plot_contours(self, ax, show=False): COV = self.emp_cov COV_slice = EmpiricalCovariance() COV_slice.location_ = np.array([ COV.location_[0], COV.location_[1] ]) COV_slice.covariance_ = np.array([ COV.covariance_[ 0,0 ], COV.covariance_[ 0,1 ], COV.covariance_[ 1,0 ], COV.covariance_[ 1,1 ] ]) COV_slice.covariance_ = COV_slice.covariance_.reshape((2,2)) COV_slice.precision_ = np.array([ COV.precision_[ 0,0 ], COV.precision_[ 0,1 ], COV.precision_[ 1,0 ], COV.precision_[ 1,1 ] ]) COV_slice.precision_ = COV_slice.precision_.reshape((2,2)) # Show contours of the distance functions xx, yy = np.meshgrid( np.linspace(COV_slice.location_[0]-5*math.sqrt(COV_slice.covariance_[0,0]), COV_slice.location_[0]+5*math.sqrt(COV_slice.covariance_[0,0]), 100), np.linspace(COV_slice.location_[1]-5*math.sqrt(COV_slice.covariance_[1,1]), COV_slice.location_[1]+5*math.sqrt(COV_slice.covariance_[1,1]), 100), ) zz = np.c_[xx.ravel(), yy.ravel()] # Empirical fit is not so good. Don't plot this if False: # keep for debugging mahal_emp_cov = COV_slice.mahalanobis(zz) mahal_emp_cov = mahal_emp_cov.reshape(xx.shape) emp_cov_contour = ax.contour(xx, yy, np.sqrt(mahal_emp_cov), levels=[1.,2.,3.,4.,5.], #cmap=plt.cm.PuBu_r, cmap=plt.cm.cool_r, linestyles='dashed') COV = self.rob_cov COV_slice = EmpiricalCovariance() COV_slice.location_ = np.array([ COV.location_[0], COV.location_[1] ]) COV_slice.covariance_ = np.array([ COV.covariance_[ 0,0 ], COV.covariance_[ 0,1 ], COV.covariance_[ 1,0 ], COV.covariance_[ 1,1 ] ]) COV_slice.covariance_ = COV_slice.covariance_.reshape((2,2)) COV_slice.precision_ = np.array([ COV.precision_[ 0,0 ], COV.precision_[ 0,1 ], COV.precision_[ 1,0 ], COV.precision_[ 1,1 ] ]) COV_slice.precision_ = COV_slice.precision_.reshape((2,2)) self.robust_model_XY = COV_slice # robust is better if show: mahal_robust_cov = COV_slice.mahalanobis(zz) mahal_robust_cov = mahal_robust_cov.reshape(xx.shape) robust_contour = ax.contour(xx, yy, np.sqrt(mahal_robust_cov), levels=[1.,2.,3.,4.,5.], #cmap=plt.cm.YlOrBr_r, cmap=plt.cm.spring_r, linestyles='dotted')
def test_suffstat_sk_tied(): # use equation Nk * Sk / N = S_tied rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full, 0) / n_samples covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() ecov.covariance_ = covars_pred_full assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied') precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T) precs_est = linalg.inv(covars_pred_tied) assert_array_almost_equal(precs_est, precs_pred)
def test_suffstat_sk_tied(): # use equation Nk * Sk / N = S_tied rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full, 0) / n_samples covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() ecov.covariance_ = covars_pred_full assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied') precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T) precs_est = linalg.inv(covars_pred_tied) assert_array_almost_equal(precs_est, precs_pred)
def test_suffstat_sk_diag(): # test against 'full' case rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] precs_pred_full = _estimate_gaussian_precisions_cholesky_full( resp, X, nk, xk, 0) covars_pred_full = [ linalg.inv(np.dot(precision_chol, precision_chol.T)) for precision_chol in precs_pred_full ] precs_pred_diag = _estimate_gaussian_precisions_cholesky_diag( resp, X, nk, xk, 0) covars_pred_diag = np.array([np.diag(1. / d)**2 for d in precs_pred_diag]) ecov = EmpiricalCovariance() for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag): ecov.covariance_ = np.diag(np.diag(cov_full)) assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)
def test_suffstat_sk_diag(): # test against 'full' case rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] precs_pred_full = _estimate_gaussian_precisions_cholesky_full(resp, X, nk, xk, 0) covars_pred_full = [linalg.inv(np.dot(precision_chol, precision_chol.T)) for precision_chol in precs_pred_full] precs_pred_diag = _estimate_gaussian_precisions_cholesky_diag(resp, X, nk, xk, 0) covars_pred_diag = np.array([np.diag(1. / d) ** 2 for d in precs_pred_diag]) ecov = EmpiricalCovariance() for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag): ecov.covariance_ = np.diag(np.diag(cov_full)) assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)
def test_gaussian_mixture_fit(): # recover the ground truth rng = np.random.RandomState(0) rand_data = RandomData(rng) n_features = rand_data.n_features n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture( n_components=n_components, n_init=20, reg_covar=0, random_state=rng, covariance_type=covar_type, ) g.fit(X) # needs more data to pass the test with rtol=1e-7 assert_allclose(np.sort(g.weights_), np.sort(rand_data.weights), rtol=0.1, atol=1e-2) arg_idx1 = g.means_[:, 0].argsort() arg_idx2 = rand_data.means[:, 0].argsort() assert_allclose(g.means_[arg_idx1], rand_data.means[arg_idx2], rtol=0.1, atol=1e-2) if covar_type == "full": prec_pred = g.precisions_ prec_test = rand_data.precisions["full"] elif covar_type == "tied": prec_pred = np.array([g.precisions_] * n_components) prec_test = np.array([rand_data.precisions["tied"]] * n_components) elif covar_type == "spherical": prec_pred = np.array( [np.eye(n_features) * c for c in g.precisions_]) prec_test = np.array([ np.eye(n_features) * c for c in rand_data.precisions["spherical"] ]) elif covar_type == "diag": prec_pred = np.array([np.diag(d) for d in g.precisions_]) prec_test = np.array( [np.diag(d) for d in rand_data.precisions["diag"]]) arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort() arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort() for k, h in zip(arg_idx1, arg_idx2): ecov = EmpiricalCovariance() ecov.covariance_ = prec_test[h] # the accuracy depends on the number of data and randomness, rng assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.15)
def test_gaussian_mixture_fit(): # recover the ground truth rng = np.random.RandomState(0) rand_data = RandomData(rng) n_features = rand_data.n_features n_components = rand_data.n_components for covar_type in COVARIANCE_TYPE: X = rand_data.X[covar_type] g = GaussianMixture(n_components=n_components, n_init=20, reg_covar=0, random_state=rng, covariance_type=covar_type) g.fit(X) # needs more data to pass the test with rtol=1e-7 assert_allclose(np.sort(g.weights_), np.sort(rand_data.weights), rtol=0.1, atol=1e-2) arg_idx1 = g.means_[:, 0].argsort() arg_idx2 = rand_data.means[:, 0].argsort() assert_allclose(g.means_[arg_idx1], rand_data.means[arg_idx2], rtol=0.1, atol=1e-2) if covar_type == 'full': prec_pred = g.precisions_ prec_test = rand_data.precisions['full'] elif covar_type == 'tied': prec_pred = np.array([g.precisions_] * n_components) prec_test = np.array([rand_data.precisions['tied']] * n_components) elif covar_type == 'spherical': prec_pred = np.array([np.eye(n_features) * c for c in g.precisions_]) prec_test = np.array([np.eye(n_features) * c for c in rand_data.precisions['spherical']]) elif covar_type == 'diag': prec_pred = np.array([np.diag(d) for d in g.precisions_]) prec_test = np.array([np.diag(d) for d in rand_data.precisions['diag']]) arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort() arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort() for k, h in zip(arg_idx1, arg_idx2): ecov = EmpiricalCovariance() ecov.covariance_ = prec_test[h] # the accuracy depends on the number of data and randomness, rng assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.1)
def test_suffstat_sk_tied(): # use equation Nk * Sk / N = S_tied rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariance_full(resp, X, nk, xk, 0) covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full, 0) / n_samples covars_pred_tied = _estimate_gaussian_covariance_tied(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() ecov.covariance_ = covars_pred_full assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)
def test_suffstat_sk_diag(): # test against 'full' case rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_diag = _estimate_gaussian_covariances_diag(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag): ecov.covariance_ = np.diag(np.diag(cov_full)) cov_diag = np.diag(cov_diag) assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_diag, 'diag') assert_almost_equal(covars_pred_diag, 1. / precs_chol_pred**2)
def test_suffstat_sk_diag(): # test against 'full' case rng = np.random.RandomState(0) n_samples, n_features, n_components = 500, 2, 2 resp = rng.rand(n_samples, n_components) resp = resp / resp.sum(axis=1)[:, np.newaxis] X = rng.rand(n_samples, n_features) nk = resp.sum(axis=0) xk = np.dot(resp.T, X) / nk[:, np.newaxis] covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0) covars_pred_diag = _estimate_gaussian_covariances_diag(resp, X, nk, xk, 0) ecov = EmpiricalCovariance() for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag): ecov.covariance_ = np.diag(np.diag(cov_full)) cov_diag = np.diag(cov_diag) assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0) assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0) # check the precision computation precs_chol_pred = _compute_precision_cholesky(covars_pred_diag, 'diag') assert_almost_equal(covars_pred_diag, 1. / precs_chol_pred ** 2)