Esempio n. 1
0
  def plot_contours(self, ax, show=False):
    COV = self.emp_cov
    COV_slice = EmpiricalCovariance()
    COV_slice.location_ = np.array([ COV.location_[0], COV.location_[1] ])
    COV_slice.covariance_ = np.array([ COV.covariance_[ 0,0 ], COV.covariance_[ 0,1 ],
                                       COV.covariance_[ 1,0 ], COV.covariance_[ 1,1 ] ])
    COV_slice.covariance_ = COV_slice.covariance_.reshape((2,2))
    COV_slice.precision_ = np.array([ COV.precision_[ 0,0 ], COV.precision_[ 0,1 ],
                                      COV.precision_[ 1,0 ], COV.precision_[ 1,1 ] ])
    COV_slice.precision_ = COV_slice.precision_.reshape((2,2))

    # Show contours of the distance functions
    xx, yy = np.meshgrid(
          np.linspace(COV_slice.location_[0]-5*math.sqrt(COV_slice.covariance_[0,0]), COV_slice.location_[0]+5*math.sqrt(COV_slice.covariance_[0,0]), 100),
          np.linspace(COV_slice.location_[1]-5*math.sqrt(COV_slice.covariance_[1,1]), COV_slice.location_[1]+5*math.sqrt(COV_slice.covariance_[1,1]), 100),
    )
    zz = np.c_[xx.ravel(), yy.ravel()]

    # Empirical fit is not so good.  Don't plot this
    if False: # keep for debugging
      mahal_emp_cov = COV_slice.mahalanobis(zz)
      mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
      emp_cov_contour = ax.contour(xx, yy, np.sqrt(mahal_emp_cov),
                                  levels=[1.,2.,3.,4.,5.],
                                  #cmap=plt.cm.PuBu_r,
                                  cmap=plt.cm.cool_r,
                                  linestyles='dashed')

    COV = self.rob_cov
    COV_slice = EmpiricalCovariance()
    COV_slice.location_ = np.array([ COV.location_[0], COV.location_[1] ])
    COV_slice.covariance_ = np.array([ COV.covariance_[ 0,0 ], COV.covariance_[ 0,1 ],
                                       COV.covariance_[ 1,0 ], COV.covariance_[ 1,1 ] ])
    COV_slice.covariance_ = COV_slice.covariance_.reshape((2,2))
    COV_slice.precision_ = np.array([ COV.precision_[ 0,0 ], COV.precision_[ 0,1 ],
                                      COV.precision_[ 1,0 ], COV.precision_[ 1,1 ] ])
    COV_slice.precision_ = COV_slice.precision_.reshape((2,2))
    self.robust_model_XY = COV_slice

    # robust is better
    if show:
      mahal_robust_cov = COV_slice.mahalanobis(zz)
      mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
      robust_contour = ax.contour(xx, yy, np.sqrt(mahal_robust_cov),
                                 levels=[1.,2.,3.,4.,5.],
                                 #cmap=plt.cm.YlOrBr_r,
                                 cmap=plt.cm.spring_r,
                                 linestyles='dotted')
Esempio n. 2
0
def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]

    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied')
    precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T)
    precs_est = linalg.inv(covars_pred_tied)
    assert_array_almost_equal(precs_est, precs_pred)
def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]

    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    covars_pred_tied = _estimate_gaussian_covariances_tied(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_tied, 'tied')
    precs_pred = np.dot(precs_chol_pred, precs_chol_pred.T)
    precs_est = linalg.inv(covars_pred_tied)
    assert_array_almost_equal(precs_est, precs_pred)
Esempio n. 4
0
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    precs_pred_full = _estimate_gaussian_precisions_cholesky_full(
        resp, X, nk, xk, 0)
    covars_pred_full = [
        linalg.inv(np.dot(precision_chol, precision_chol.T))
        for precision_chol in precs_pred_full
    ]

    precs_pred_diag = _estimate_gaussian_precisions_cholesky_diag(
        resp, X, nk, xk, 0)
    covars_pred_diag = np.array([np.diag(1. / d)**2 for d in precs_pred_diag])

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    precs_pred_full = _estimate_gaussian_precisions_cholesky_full(resp, X,
                                                                  nk, xk, 0)
    covars_pred_full = [linalg.inv(np.dot(precision_chol, precision_chol.T))
                        for precision_chol in precs_pred_full]

    precs_pred_diag = _estimate_gaussian_precisions_cholesky_diag(resp, X,
                                                                  nk, xk, 0)
    covars_pred_diag = np.array([np.diag(1. / d) ** 2
                                 for d in precs_pred_diag])

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)
Esempio n. 6
0
def test_gaussian_mixture_fit():
    # recover the ground truth
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(
            n_components=n_components,
            n_init=20,
            reg_covar=0,
            random_state=rng,
            covariance_type=covar_type,
        )
        g.fit(X)

        # needs more data to pass the test with rtol=1e-7
        assert_allclose(np.sort(g.weights_),
                        np.sort(rand_data.weights),
                        rtol=0.1,
                        atol=1e-2)

        arg_idx1 = g.means_[:, 0].argsort()
        arg_idx2 = rand_data.means[:, 0].argsort()
        assert_allclose(g.means_[arg_idx1],
                        rand_data.means[arg_idx2],
                        rtol=0.1,
                        atol=1e-2)

        if covar_type == "full":
            prec_pred = g.precisions_
            prec_test = rand_data.precisions["full"]
        elif covar_type == "tied":
            prec_pred = np.array([g.precisions_] * n_components)
            prec_test = np.array([rand_data.precisions["tied"]] * n_components)
        elif covar_type == "spherical":
            prec_pred = np.array(
                [np.eye(n_features) * c for c in g.precisions_])
            prec_test = np.array([
                np.eye(n_features) * c
                for c in rand_data.precisions["spherical"]
            ])
        elif covar_type == "diag":
            prec_pred = np.array([np.diag(d) for d in g.precisions_])
            prec_test = np.array(
                [np.diag(d) for d in rand_data.precisions["diag"]])

        arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort()
        arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort()
        for k, h in zip(arg_idx1, arg_idx2):
            ecov = EmpiricalCovariance()
            ecov.covariance_ = prec_test[h]
            # the accuracy depends on the number of data and randomness, rng
            assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.15)
def test_gaussian_mixture_fit():
    # recover the ground truth
    rng = np.random.RandomState(0)
    rand_data = RandomData(rng)
    n_features = rand_data.n_features
    n_components = rand_data.n_components

    for covar_type in COVARIANCE_TYPE:
        X = rand_data.X[covar_type]
        g = GaussianMixture(n_components=n_components, n_init=20,
                            reg_covar=0, random_state=rng,
                            covariance_type=covar_type)
        g.fit(X)

        # needs more data to pass the test with rtol=1e-7
        assert_allclose(np.sort(g.weights_), np.sort(rand_data.weights),
                        rtol=0.1, atol=1e-2)

        arg_idx1 = g.means_[:, 0].argsort()
        arg_idx2 = rand_data.means[:, 0].argsort()
        assert_allclose(g.means_[arg_idx1], rand_data.means[arg_idx2],
                        rtol=0.1, atol=1e-2)

        if covar_type == 'full':
            prec_pred = g.precisions_
            prec_test = rand_data.precisions['full']
        elif covar_type == 'tied':
            prec_pred = np.array([g.precisions_] * n_components)
            prec_test = np.array([rand_data.precisions['tied']] * n_components)
        elif covar_type == 'spherical':
            prec_pred = np.array([np.eye(n_features) * c
                                 for c in g.precisions_])
            prec_test = np.array([np.eye(n_features) * c for c in
                                 rand_data.precisions['spherical']])
        elif covar_type == 'diag':
            prec_pred = np.array([np.diag(d) for d in g.precisions_])
            prec_test = np.array([np.diag(d) for d in
                                 rand_data.precisions['diag']])

        arg_idx1 = np.trace(prec_pred, axis1=1, axis2=2).argsort()
        arg_idx2 = np.trace(prec_test, axis1=1, axis2=2).argsort()
        for k, h in zip(arg_idx1, arg_idx2):
            ecov = EmpiricalCovariance()
            ecov.covariance_ = prec_test[h]
            # the accuracy depends on the number of data and randomness, rng
            assert_allclose(ecov.error_norm(prec_pred[k]), 0, atol=0.1)
def test_suffstat_sk_tied():
    # use equation Nk * Sk / N = S_tied
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    covars_pred_full = _estimate_gaussian_covariance_full(resp, X, nk, xk, 0)
    covars_pred_full = np.sum(nk[:, np.newaxis, np.newaxis] * covars_pred_full,
                              0) / n_samples

    covars_pred_tied = _estimate_gaussian_covariance_tied(resp, X, nk, xk, 0)
    ecov = EmpiricalCovariance()
    ecov.covariance_ = covars_pred_full
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='frobenius'), 0)
    assert_almost_equal(ecov.error_norm(covars_pred_tied, norm='spectral'), 0)
Esempio n. 9
0
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_diag = _estimate_gaussian_covariances_diag(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        cov_diag = np.diag(cov_diag)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_diag, 'diag')
    assert_almost_equal(covars_pred_diag, 1. / precs_chol_pred**2)
def test_suffstat_sk_diag():
    # test against 'full' case
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 500, 2, 2

    resp = rng.rand(n_samples, n_components)
    resp = resp / resp.sum(axis=1)[:, np.newaxis]
    X = rng.rand(n_samples, n_features)
    nk = resp.sum(axis=0)
    xk = np.dot(resp.T, X) / nk[:, np.newaxis]
    covars_pred_full = _estimate_gaussian_covariances_full(resp, X, nk, xk, 0)
    covars_pred_diag = _estimate_gaussian_covariances_diag(resp, X, nk, xk, 0)

    ecov = EmpiricalCovariance()
    for (cov_full, cov_diag) in zip(covars_pred_full, covars_pred_diag):
        ecov.covariance_ = np.diag(np.diag(cov_full))
        cov_diag = np.diag(cov_diag)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='frobenius'), 0)
        assert_almost_equal(ecov.error_norm(cov_diag, norm='spectral'), 0)

    # check the precision computation
    precs_chol_pred = _compute_precision_cholesky(covars_pred_diag, 'diag')
    assert_almost_equal(covars_pred_diag, 1. / precs_chol_pred ** 2)