n_features, n_samples = 30, 20 X_train = np.random.normal(size=(n_samples, n_features)) X_test = np.random.normal(size=(n_samples, n_features)) # Color samples coloring_matrix = np.random.normal(size=(n_features, n_features)) X_train = np.dot(X_train, coloring_matrix) X_test = np.dot(X_test, coloring_matrix) ############################################################################### # Compute Ledoit-Wolf and Covariances on a grid of shrinkages from scikits.learn.covariance import LedoitWolf, ShrunkCovariance lw = LedoitWolf() loglik_lw = lw.fit(X_train).score(X_test) cov = ShrunkCovariance() shrinkages = np.logspace(-2, 0, 30) negative_logliks = [-cov.fit(X_train, shrinkage=s).score(X_test) \ for s in shrinkages] ############################################################################### # Plot results pl.loglog(shrinkages, negative_logliks) pl.xlabel('Shrinkage') pl.ylabel('Negative log-likelihood') pl.vlines(lw.shrinkage_, pl.ylim()[0], -loglik_lw, color='g', linewidth=3, label='Ledoit-Wolf estimate') pl.legend() pl.show()
n_features, n_samples = 30, 20 X_train = np.random.normal(size=(n_samples, n_features)) X_test = np.random.normal(size=(n_samples, n_features)) # Color samples coloring_matrix = np.random.normal(size=(n_features, n_features)) X_train = np.dot(X_train, coloring_matrix) X_test = np.dot(X_test, coloring_matrix) ############################################################################### # Compute Ledoit-Wolf and Covariances on a grid of shrinkages from scikits.learn.covariance import LedoitWolf, ShrunkCovariance lw = LedoitWolf() loglik_lw = lw.fit(X_train).score(X_test) cov = ShrunkCovariance() shrinkages = np.logspace(-2, 0, 30) negative_logliks = [-cov.fit(X_train, shrinkage=s).score(X_test) \ for s in shrinkages] ############################################################################### # Plot results pl.loglog(shrinkages, negative_logliks) pl.xlabel('Shrinkage') pl.ylabel('Negative log-likelihood') pl.vlines(lw.shrinkage_, pl.ylim()[0], -loglik_lw, color='g',
base_X_test = np.random.normal(size=(n_samples, n_features)) # Color samples coloring_matrix = np.random.normal(size=(n_features, n_features)) X_train = np.dot(base_X_train, coloring_matrix) X_test = np.dot(base_X_test, coloring_matrix) ############################################################################### # Compute Ledoit-Wolf and Covariances on a grid of shrinkages from scikits.learn.covariance import LedoitWolf, OAS, ShrunkCovariance, \ log_likelihood, empirical_covariance # Ledoit-Wolf optimal shrinkage coefficient estimate lw = LedoitWolf() loglik_lw = lw.fit(X_train, assume_centered=True).score( X_test, assume_centered=True) # OAS coefficient estimate oa = OAS() loglik_oa = oa.fit(X_train, assume_centered=True).score( X_test, assume_centered=True) # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-3, 0, 30) negative_logliks = [-ShrunkCovariance(shrinkage=s).fit( X_train, assume_centered=True).score(X_test, assume_centered=True) \ for s in shrinkages] # getting the likelihood under the real model real_cov = np.dot(coloring_matrix.T, coloring_matrix) emp_cov = empirical_covariance(X_train)
base_X_test = np.random.normal(size=(n_samples, n_features)) # Color samples coloring_matrix = np.random.normal(size=(n_features, n_features)) X_train = np.dot(base_X_train, coloring_matrix) X_test = np.dot(base_X_test, coloring_matrix) ############################################################################### # Compute Ledoit-Wolf and Covariances on a grid of shrinkages from scikits.learn.covariance import LedoitWolf, OAS, ShrunkCovariance, \ log_likelihood, empirical_covariance # Ledoit-Wolf optimal shrinkage coefficient estimate lw = LedoitWolf() loglik_lw = lw.fit(X_train, assume_centered=True).score(X_test, assume_centered=True) # OAS coefficient estimate oa = OAS() loglik_oa = oa.fit(X_train, assume_centered=True).score(X_test, assume_centered=True) # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-3, 0, 30) negative_logliks = [-ShrunkCovariance(shrinkage=s).fit( X_train, assume_centered=True).score(X_test, assume_centered=True) \ for s in shrinkages] # getting the likelihood under the real model real_cov = np.dot(coloring_matrix.T, coloring_matrix) emp_cov = empirical_covariance(X_train)
real_cov = toeplitz(r**np.arange(n_features)) coloring_matrix = cholesky(real_cov) n_samples_range = np.arange(6, 31, 1) repeat = 100 lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False) lw.fit(X, assume_centered=True) lw_mse[i, j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i, j] = lw.shrinkage_ oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) oa_mse[i, j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i, j] = oa.shrinkage_ # plot MSE pl.subplot(2, 1, 1) pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='g')
real_cov = toeplitz(r**np.arange(n_features)) coloring_matrix = cholesky(real_cov) n_samples_range = np.arange(6, 31, 1) repeat = 100 lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot( np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False) lw.fit(X, assume_centered=True) lw_mse[i,j] = lw.mse(real_cov) lw_shrinkage[i,j] = lw.shrinkage_ oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) oa_mse[i,j] = oa.mse(real_cov) oa_shrinkage[i,j] = oa.shrinkage_ # plot MSE pl.subplot(2,1,1) pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='g') pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1), label='OAS', color='r') pl.ylabel("MSE")