Ejemplo n.º 1
0
def launch_mcd_on_dataset(n_samples, n_features, n_outliers, tol_loc, tol_cov,
                          tol_support):

    rand_gen = np.random.RandomState(0)
    data = rand_gen.randn(n_samples, n_features)
    # add some outliers
    outliers_index = rand_gen.permutation(n_samples)[:n_outliers]
    outliers_offset = 10. * \
        (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5)
    data[outliers_index] += outliers_offset
    inliers_mask = np.ones(n_samples).astype(bool)
    inliers_mask[outliers_index] = False

    pure_data = data[inliers_mask]
    # compute MCD by fitting an object
    mcd_fit = MinCovDet(random_state=rand_gen).fit(data)
    T = mcd_fit.location_
    S = mcd_fit.covariance_
    H = mcd_fit.support_
    # compare with the estimates learnt from the inliers
    error_location = np.mean((pure_data.mean(0) - T)**2)
    assert (error_location < tol_loc)
    error_cov = np.mean((empirical_covariance(pure_data) - S)**2)
    assert (error_cov < tol_cov)
    assert (np.sum(H) >= tol_support)
    assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
Ejemplo n.º 2
0
# Show contours of the distance functions
xx, yy = np.meshgrid(np.linspace(plt.xlim()[0],
                                 plt.xlim()[1], 100),
                     np.linspace(plt.ylim()[0],
                                 plt.ylim()[1], 100))
zz = np.c_[xx.ravel(), yy.ravel()]

mahal_emp_cov = emp_cov.mahalanobis(zz)
mahal_emp_cov = mahal_emp_cov.reshape(xx.shape)
emp_cov_contour = subfig1.contour(xx,
                                  yy,
                                  np.sqrt(mahal_emp_cov),
                                  cmap=plt.cm.PuBu_r,
                                  linestyles='dashed')

mahal_robust_cov = robust_cov.mahalanobis(zz)
mahal_robust_cov = mahal_robust_cov.reshape(xx.shape)
robust_contour = subfig1.contour(xx,
                                 yy,
                                 np.sqrt(mahal_robust_cov),
                                 cmap=plt.cm.YlOrBr_r,
                                 linestyles='dotted')

subfig1.legend([
    emp_cov_contour.collections[1], robust_contour.collections[1], inlier_plot,
    outlier_plot
], ['MLE dist', 'robust dist', 'inliers', 'outliers'],
               loc="upper right",
               borderaxespad=0)
plt.xticks(())
plt.yticks(())