def MiniBach_SparcePCA(self,N_component): MiniPCA_calculator = skdecomp.MiniBatchSparsePCA(N_component,batch_size=15,normalize_components=True) self.MiniPCs = MiniPCA_calculator.fit(self.vector_centered) all_MiniPCs = self.MiniPCs.components_ pp.save_variable(all_MiniPCs,save_folder+r'\\MiniPCed_Data.pkl') print('MiniBach Sparce PCA done, generating graphs') self.cell_graph_plot('MINIPCA',all_MiniPCs)
def MiniBatchSparsePCA(self, source): min_max_scaler = preprocessing.MinMaxScaler() data_source = min_max_scaler.fit_transform(source) pca = decomposition.MiniBatchSparsePCA(n_components=2) #没有相关参数------------------------ result = {} result['data'] = pca.fit_transform(data_source) result['params'] = 0 return result
def gen_estimators(): ''' List of the different estimators, whether to center and transpose the problem, and whether the transformer uses the clustering API. ''' rng = RandomState(0) estimators = [ ('Eigenfaces - RandomizedPCA', decomposition.RandomizedPCA(n_components=n_components, whiten=True), True), ('Non-negative components - NMF tol=1e-4', decomposition.NMF(n_components=n_components, init='nndsvda', tol=1e-4, solver='cd'), False), ('Non-negative components - NMF tol=1e-6', decomposition.NMF( n_components=n_components, init='nndsvd', ), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=2), True), ] return estimators
# List of the different estimators, whether to center and transpose the # problem, and whether the transformer uses the clustering API. estimators = [ ('Eigenfaces - PCA using randomized SVD', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA',
from sklearn import decomposition import numpy as np import matplotlib.pyplot as plt data = np.loadtxt("../Dataset/data.txt", delimiter=';') target = np.loadtxt("../Dataset/target.txt", delimiter=';') data_pre = np.loadtxt("../Dataset/data_pre.txt", delimiter=';') #sentiment = svm.SVC(kernel="rbf", gamma=0.001, C=1000) sentiment_bay_m = MultinomialNB() sentiment_bay_g = GaussianNB() sentiment_bay_b = BernoulliNB() sentiment_dec = tree.DecisionTreeClassifier() pca = decomposition.MiniBatchSparsePCA(n_components=20) pca.fit_transform(data) accuracy = np.mean(cross_validation.cross_val_score(sentiment, data, target)) print accuracy ''' accuracy_pre = np.mean(cross_validation.cross_val_score(sentiment,data_pre,target)) accuracy_bay_m = np.mean(cross_validation.cross_val_score(sentiment_bay_m,data,target)) accuracy_bay_pre_m = np.mean(cross_validation.cross_val_score(sentiment_bay_m,data_pre,target)) accuracy_bay_g = np.mean(cross_validation.cross_val_score(sentiment_bay_g,data,target)) accuracy_bay_pre_g = np.mean(cross_validation.cross_val_score(sentiment_bay_g,data_pre,target)) accuracy_bay_b = np.mean(cross_validation.cross_val_score(sentiment_bay_b,data,target)) accuracy_bay_pre_b = np.mean(cross_validation.cross_val_score(sentiment_bay_b,data_pre,target)) print accuracy,accuracy_pre,accuracy_bay_m,accuracy_bay_pre_m,accuracy_bay_g,accuracy_bay_pre_g,accuracy_bay_b,accuracy_bay_pre_b '''
def _eval_search_params(params_builder): search_params = {} for p in params_builder['param_set']: search_list = p['sp_list'].strip() if search_list == '': continue param_name = p['sp_name'] if param_name.lower().endswith(NON_SEARCHABLE): print("Warning: `%s` is not eligible for search and was " "omitted!" % param_name) continue if not search_list.startswith(':'): safe_eval = SafeEval(load_scipy=True, load_numpy=True) ev = safe_eval(search_list) search_params[param_name] = ev else: # Have `:` before search list, asks for estimator evaluatio safe_eval_es = SafeEval(load_estimators=True) search_list = search_list[1:].strip() # TODO maybe add regular express check ev = safe_eval_es(search_list) preprocessings = ( preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), preprocessing.PolynomialFeatures(), preprocessing.RobustScaler(), feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.TruncatedSVD(random_state=0), kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), cluster.FeatureAgglomeration(), skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.CondensedNearestNeighbour( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RepeatedEditedNearestNeighbours( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.InstanceHardnessThreshold( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NeighbourhoodCleaningRule( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RandomUnderSampler(random_state=0), imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.RandomOverSampler(random_state=0), imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0)) newlist = [] for obj in ev: if obj is None: newlist.append(None) elif obj == 'all_0': newlist.extend(preprocessings[0:35]) elif obj == 'sk_prep_all': # no KernalCenter() newlist.extend(preprocessings[0:7]) elif obj == 'fs_all': newlist.extend(preprocessings[7:14]) elif obj == 'decomp_all': newlist.extend(preprocessings[14:25]) elif obj == 'k_appr_all': newlist.extend(preprocessings[25:29]) elif obj == 'reb_all': newlist.extend(preprocessings[30:35]) elif obj == 'imb_all': newlist.extend(preprocessings[35:54]) elif type(obj) is int and -1 < obj < len(preprocessings): newlist.append(preprocessings[obj]) elif hasattr(obj, 'get_params'): # user uploaded object if 'n_jobs' in obj.get_params(): newlist.append(obj.set_params(n_jobs=N_JOBS)) else: newlist.append(obj) else: sys.exit("Unsupported estimator type: %r" % (obj)) search_params[param_name] = newlist return search_params
def __init__(self, source): min_max_scaler = preprocessing.MinMaxScaler() data_source = min_max_scaler.fit_transform(source) pca = decomposition.MiniBatchSparsePCA(n_components=2) self.return_data = pca.fit_transform(data_source)
('Eigenfaces - RandomizedPCA', decomposition.RandomizedPCA(n_components=n_components, whiten=True), True, False), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', beta=5.0, tol=5e-3, sparseness='components'), False, False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True, max_iter=10), True, True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=1e-3, n_iter=100, chunk_size=3, random_state=rng), True, False), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_atoms=15, alpha=5e-3, n_iter=50, chunk_size=3, random_state=rng), True, False), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(k=n_components, tol=1e-3, chunk_size=20, max_iter=50, random_state=rng), True, False) ]
def get_search_params(params_builder): search_params = {} safe_eval = SafeEval(load_scipy=True, load_numpy=True) safe_eval_es = SafeEval(load_estimators=True) for p in params_builder['param_set']: search_p = p['search_param_selector']['search_p'] if search_p.strip() == '': continue param_type = p['search_param_selector']['selected_param_type'] lst = search_p.split(':') assert ( len(lst) == 2 ), "Error, make sure there is one and only one colon in search parameter input." literal = lst[1].strip() param_name = lst[0].strip() if param_name: if param_name.lower() == 'n_jobs': sys.exit("Parameter `%s` is invalid for search." % param_name) elif not param_name.endswith('-'): ev = safe_eval(literal) if param_type == 'final_estimator_p': search_params['estimator__' + param_name] = ev else: search_params['preprocessing_' + param_type[5:6] + '__' + param_name] = ev else: # only for estimator eval, add `-` to the end of param #TODO maybe add regular express check ev = safe_eval_es(literal) for obj in ev: if 'n_jobs' in obj.get_params(): obj.set_params(n_jobs=N_JOBS) if param_type == 'final_estimator_p': search_params['estimator__' + param_name[:-1]] = ev else: search_params['preprocessing_' + param_type[5:6] + '__' + param_name[:-1]] = ev elif param_type != 'final_estimator_p': #TODO regular express check ? ev = safe_eval_es(literal) preprocessors = [ preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.Imputer(), preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), preprocessing.PolynomialFeatures(), preprocessing.RobustScaler(), feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.TruncatedSVD(random_state=0), kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), cluster.FeatureAgglomeration(), skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.CondensedNearestNeighbour( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RepeatedEditedNearestNeighbours( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.InstanceHardnessThreshold( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NeighbourhoodCleaningRule( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RandomUnderSampler(random_state=0), imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.RandomOverSampler(random_state=0), imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0) ] newlist = [] for obj in ev: if obj is None: newlist.append(None) elif obj == 'all_0': newlist.extend(preprocessors[0:36]) elif obj == 'sk_prep_all': # no KernalCenter() newlist.extend(preprocessors[0:8]) elif obj == 'fs_all': newlist.extend(preprocessors[8:15]) elif obj == 'decomp_all': newlist.extend(preprocessors[15:26]) elif obj == 'k_appr_all': newlist.extend(preprocessors[26:30]) elif obj == 'reb_all': newlist.extend(preprocessors[31:36]) elif obj == 'imb_all': newlist.extend(preprocessors[36:55]) elif type(obj) is int and -1 < obj < len(preprocessors): newlist.append(preprocessors[obj]) elif hasattr(obj, 'get_params'): # user object if 'n_jobs' in obj.get_params(): newlist.append(obj.set_params(n_jobs=N_JOBS)) else: newlist.append(obj) else: sys.exit("Unsupported preprocessor type: %r" % (obj)) search_params['preprocessing_' + param_type[5:6]] = newlist else: sys.exit("Parameter name of the final estimator can't be skipped!") return search_params
def main(): dataset = fetch_olivetti_faces(shuffle=True, random_state=rng) faces = dataset.data n_samples, n_features = faces.shape # global centering faces_centered = faces - faces.mean(axis=0) # local centering faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1) print("Dataset consists of %d faces" % n_samples) estimators = [ ('Eigenfaces - PCA using randomized SVD', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=2), True), ] plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) if hasattr(estimator, 'cluster_centers_'): components_ = estimator.cluster_centers_ else: components_ = estimator.components_ # so we skip it. if (hasattr(estimator, 'noise_variance_') and estimator.noise_variance_.ndim > 0): # Skip the Eigenfaces case plot_gallery("Pixelwise variance", estimator.noise_variance_.reshape(1, -1), n_col=1, n_row=1) plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) plt.show()
def faces_decomposition(): import logging from numpy.random import RandomState #随机数生成器种子,从高斯分布或者其他等分布产生 import matplotlib.pyplot as plt from time import time from sklearn.datasets import fetch_olivetti_faces from sklearn.cluster import MiniBatchKMeans from sklearn import decomposition logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') n_row, n_col = 2, 3 n_components = n_row * n_col image_shape = (64, 64) rng = RandomState(0) #加载数据集 dataset = fetch_olivetti_faces(shuffle=True, random_state=rng) faces = dataset.data n_samples, n_features = faces.shape faces_centered = faces - faces.mean(axis=0) faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1) print("dataset consits of %d faces" % n_samples) #样本个数 def plot_gallery(title, images, n_col=n_col, n_row=n_row): plt.figure(figsize=(2. * n_col, 2.26 * n_row)) plt.suptitle(title, size=16) for i, comp in enumerate(images): plt.subplot(n_row, n_col, i + 1) vmax = max(comp.max(), -comp.min()) plt.imshow(comp.reshape(image_shape), cmap=plt.cm.gray, interpolation='nearest', vmin=-vmax, vmax=vmax) plt.xticks(()) plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) estimators = [ ('Eigenfaces - PCA using randomized SVD', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=2), True), ] # ############################################################################# # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) # ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) if hasattr(estimator, 'cluster_centers_'): components_ = estimator.cluster_centers_ else: components_ = estimator.components_ # Plot an image representing the pixelwise variance provided by the # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator, # via the PCA decomposition, also provides a scalar noise_variance_ # (the mean of pixelwise variance) that cannot be displayed as an image # so we skip it. if (hasattr(estimator, 'noise_variance_') and estimator.noise_variance_.ndim > 0): # Skip the Eigenfaces case plot_gallery("Pixelwise variance", estimator.noise_variance_.reshape(1, -1), n_col=1, n_row=1) plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) plt.show()
def plot_faces_decomposition(): # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') n_row, n_col = 2, 3 n_components = n_row * n_col image_shape = (64, 64) rng = RandomState(0) # ############################################################################# # Load faces data faces, _ = fetch_olivetti_faces(return_X_y=True, shuffle=True, random_state=rng) n_samples, n_features = faces.shape # global centering faces_centered = faces - faces.mean(axis=0) # local centering faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1) print("Dataset consists of %d faces" % n_samples) def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray): plt.figure(figsize=(2. * n_col, 2.26 * n_row)) plt.suptitle(title, size=16) for i, comp in enumerate(images): plt.subplot(n_row, n_col, i + 1) vmax = max(comp.max(), -comp.min()) plt.imshow(comp.reshape(image_shape), cmap=cmap, interpolation='nearest', vmin=-vmax, vmax=vmax) plt.xticks(()) plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) # ############################################################################# # List of the different estimators, whether to center and transpose the # problem, and whether the transformer uses the clustering API. estimators = [ ('Eigenfaces - PCA using randomized SVD', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=20), True), ] # ############################################################################# # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) # ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) if hasattr(estimator, 'cluster_centers_'): components_ = estimator.cluster_centers_ else: components_ = estimator.components_ # Plot an image representing the pixelwise variance provided by the # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator, # via the PCA decomposition, also provides a scalar noise_variance_ # (the mean of pixelwise variance) that cannot be displayed as an image # so we skip it. if (hasattr(estimator, 'noise_variance_') and estimator.noise_variance_.ndim > 0): # Skip the Eigenfaces case plot_gallery("Pixelwise variance", estimator.noise_variance_.reshape(1, -1), n_col=1, n_row=1) plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) plt.show() # ############################################################################# # Various positivity constraints applied to dictionary learning. estimators = [ ('Dictionary learning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Dictionary learning - positive dictionary', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng, positive_dict=True), True), ('Dictionary learning - positive code', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, fit_algorithm='cd', random_state=rng, positive_code=True), True), ('Dictionary learning - positive dictionary & code', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, fit_algorithm='cd', random_state=rng, positive_dict=True, positive_code=True), True), ] # ############################################################################# # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components], cmap=plt.cm.RdBu) # ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) components_ = estimator.components_ plot_gallery(name, components_[:n_components], cmap=plt.cm.RdBu) plt.show()
ica_estimator.fit(faces_centered) plot_gallery("Independent components - FastICA", ica_estimator.components_[:n_components]) # %% # Sparse components - MiniBatchSparsePCA # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # Mini-batch sparse PCA (`MiniBatchSparsePCA`) extracts the set of sparse # components that best reconstruct the data. This variant is faster but # less accurate than the similar :py:mod:`sklearn.decomposition.SparsePCA`. # %% batch_pca_estimator = decomposition.MiniBatchSparsePCA( n_components=n_components, alpha=0.1, n_iter=100, batch_size=3, random_state=rng) batch_pca_estimator.fit(faces_centered) plot_gallery( "Sparse components - MiniBatchSparsePCA", batch_pca_estimator.components_[:n_components], ) # %% # Dictionary learning # ^^^^^^^^^^^^^^^^^^^ # # By default, :class:`MiniBatchDictionaryLearning` divides the data into # mini-batches and optimizes in an online manner by cycling over the # mini-batches for the specified number of iterations.