for j in range(repeat): # generate data X = np.random.randn(n_samples, n_features) # add some outliers outliers_index = np.random.permutation(n_samples)[:n_outliers] outliers_offset = 10. * \ (np.random.randint(2, size=(n_outliers, n_features)) - 0.5) X[outliers_index] += outliers_offset inliers_mask = np.ones(n_samples).astype(bool) inliers_mask[outliers_index] = False # fit a Minimum Covariance Determinant (MCD) robust estimator to data S = MinCovDet().fit(X) # compare raw robust estimates with the true location and covariance err_loc_mcd[i, j] = np.sum(S.location_**2) err_cov_mcd[i, j] = S.error_norm(np.eye(n_features)) # compare estimators learnt from the full data set with true parameters err_loc_emp_full[i, j] = np.sum(X.mean(0)**2) err_cov_emp_full[i, j] = EmpiricalCovariance().fit(X).error_norm( np.eye(n_features)) # compare with an empirical covariance learnt from a pure data set # (i.e. "perfect" MCD) pure_X = X[inliers_mask] pure_location = pure_X.mean(0) pure_emp_cov = EmpiricalCovariance().fit(pure_X) err_loc_emp_pure[i, j] = np.sum(pure_location**2) err_cov_emp_pure[i, j] = pure_emp_cov.error_norm(np.eye(n_features)) # Display results font_prop = matplotlib.font_manager.FontProperties(size=11) pl.subplot(2, 1, 1)
for j in range(repeat): # generate data X = np.random.randn(n_samples, n_features) # add some outliers outliers_index = np.random.permutation(n_samples)[:n_outliers] outliers_offset = 10. * \ (np.random.randint(2, size=(n_outliers, n_features)) - 0.5) X[outliers_index] += outliers_offset inliers_mask = np.ones(n_samples).astype(bool) inliers_mask[outliers_index] = False # fit a Minimum Covariance Determinant (MCD) robust estimator to data S = MinCovDet().fit(X) # compare raw robust estimates with the true location and covariance err_loc_mcd[i, j] = np.sum(S.location_ ** 2) err_cov_mcd[i, j] = S.error_norm(np.eye(n_features)) # compare estimators learnt from the full data set with true parameters err_loc_emp_full[i, j] = np.sum(X.mean(0) ** 2) err_cov_emp_full[i, j] = EmpiricalCovariance().fit(X).error_norm( np.eye(n_features)) # compare with an empirical covariance learnt from a pure data set # (i.e. "perfect" MCD) pure_X = X[inliers_mask] pure_location = pure_X.mean(0) pure_emp_cov = EmpiricalCovariance().fit(pure_X) err_loc_emp_pure[i, j] = np.sum(pure_location ** 2) err_cov_emp_pure[i, j] = pure_emp_cov.error_norm(np.eye(n_features)) # Display results font_prop = matplotlib.font_manager.FontProperties(size=11) pl.subplot(2, 1, 1)