def MiniBach_DictionaryLearning(self,N_component): MiniDL_calculator = skdecomp.MiniBatchDictionaryLearning(N_component,batch_size = 25) self.MiniDLs = MiniDL_calculator.fit(self.vector_centered) all_MiniDLs = self.MiniDLs.components_ pp.save_variable(all_MiniDLs,save_folder+r'\\Dictionary_Learning_Data.pkl') print('MiniBach Dictionary Learning Done, generating graphs') self.cell_graph_plot('Dictionary_Learning',all_MiniDLs)
def __init__(self, dataFile, outputFile, size): data = np.loadtxt(open(dataFile, "rb"), delimiter=",", skiprows=0) dictionary = decomposition.MiniBatchDictionaryLearning( n_components=size, alpha=1, n_iter=500).fit(data) dictionaryData = pickle.dumps(dictionary) f = open(outputFile, "w") f.write(dictionaryData) f.close()
def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray): plt.figure(figsize=(4. * n_col, 2.26 * n_row,)) plt.suptitle(title, size=16) for i, comp in enumerate(images): plt.subplot(n_row, n_col, i + 1) vmax = max(comp.max(), -comp.min()) plt.imshow(comp.reshape(image_shape), cmap=cmap, interpolation='nearest', vmin=-vmax, vmax=vmax) plt.xticks(()) plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) estimators = [ ('Dictionary learning', decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1, n_iter=50, batch_size=2, random_state=rng ), True), ('Dictionary learning - positive dictionary', decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1, n_iter=50, batch_size=2, random_state=rng, positive_dict=True), True), ('Dictionary learning - positive code', decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1, n_iter=50, batch_size=2, fit_algorithm='cd', random_state=rng, positive_code=True), True), ('Dictionary learning - positive dictionary & code', decomposition.MiniBatchDictionaryLearning(n_components=70, alpha=0.1, n_iter=50, batch_size=2, fit_algorithm='cd', random_state=rng, positive_dict=True, positive_code=True), True), ]
def gen_estimators(): ''' List of the different estimators, whether to center and transpose the problem, and whether the transformer uses the clustering API. ''' rng = RandomState(0) estimators = [ ('Eigenfaces - RandomizedPCA', decomposition.RandomizedPCA(n_components=n_components, whiten=True), True), ('Non-negative components - NMF tol=1e-4', decomposition.NMF(n_components=n_components, init='nndsvda', tol=1e-4, solver='cd'), False), ('Non-negative components - NMF tol=1e-6', decomposition.NMF( n_components=n_components, init='nndsvd', ), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=2), True), ] return estimators
def create_estimator(self): if self.estimator_name == 'K-means': n_components = self.param_dict['n_components'] ninit = self.param_dict['n_init'] self.estimator = cluster.KMeans(algorithm='full', n_clusters=n_components, n_init=ninit, n_jobs=self.n_jobs) self.estimator_param_string = '_n_components_' + str( n_components) + '_' elif self.estimator_name == 'EM': n_components = self.param_dict['n_components'] covariance_type = 'full' max_iter = self.param_dict['max_iter'] self.estimator_param_string = '_n_components_' + str( n_components) + '_' self.estimator = mixture.GaussianMixture( n_components=n_components, covariance_type=covariance_type, max_iter=max_iter) elif self.estimator_name == 'PCA': n_components = self.param_dict['n_components'] self.estimator_param_string = '_n_components_' + str( n_components) + '_' self.estimator = decomposition.PCA(n_components=n_components) elif self.estimator_name == 'ICA': n_components = self.param_dict['n_components'] self.estimator_param_string = '_n_components_' + str( n_components) + '_' self.estimator = decomposition.FastICA(n_components=n_components, max_iter=1000) elif self.estimator_name == 'Random_Projection': n_components = self.param_dict['n_components'] self.estimator_param_string = '_n_components_' + str( n_components) + '_' self.estimator = random_projection.GaussianRandomProjection( n_components=n_components) elif self.estimator_name == 'Dictionary_Learning': n_components = self.param_dict['n_components'] self.estimator_param_string = '_n_components_' + str( n_components) + '_' alpha = self.param_dict['alpha'] self.estimator = decomposition.MiniBatchDictionaryLearning( n_components=n_components, alpha=alpha, batch_size=20)
nfeat = 15 rpca = decomposition.RandomizedPCA(n_components=nfeat, whiten=True) rpca.fit(unlagged_stimuli) unlagged_stimuli = rpca.transform(unlagged_stimuli) #%% #sparse pca spca = decomposition.SparsePCA(n_jobs=-1) spca.fit(unlagged_stimuli) unlagged_stimuli = spca.transform(unlagged_stimuli) #%% #dictionary minibatch mbdic = decomposition.MiniBatchDictionaryLearning(n_components=50, verbose=True) mbdic.fit(stimuli_patches) #%% #visualize V = mbdic.components_ plt.figure() for i, comp in enumerate(V): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patchsize), interpolation='nearest') #%% #now construct code representation for stimuli codes = mbdic.transform(stimuli_patches[:sum(patch_stim_lens[:100]), :]) #how are these patches constructed?
def _eval_search_params(params_builder): search_params = {} for p in params_builder['param_set']: search_list = p['sp_list'].strip() if search_list == '': continue param_name = p['sp_name'] if param_name.lower().endswith(NON_SEARCHABLE): print("Warning: `%s` is not eligible for search and was " "omitted!" % param_name) continue if not search_list.startswith(':'): safe_eval = SafeEval(load_scipy=True, load_numpy=True) ev = safe_eval(search_list) search_params[param_name] = ev else: # Have `:` before search list, asks for estimator evaluatio safe_eval_es = SafeEval(load_estimators=True) search_list = search_list[1:].strip() # TODO maybe add regular express check ev = safe_eval_es(search_list) preprocessings = ( preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), preprocessing.PolynomialFeatures(), preprocessing.RobustScaler(), feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.TruncatedSVD(random_state=0), kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), cluster.FeatureAgglomeration(), skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.CondensedNearestNeighbour( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RepeatedEditedNearestNeighbours( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.InstanceHardnessThreshold( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NeighbourhoodCleaningRule( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RandomUnderSampler(random_state=0), imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.RandomOverSampler(random_state=0), imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0)) newlist = [] for obj in ev: if obj is None: newlist.append(None) elif obj == 'all_0': newlist.extend(preprocessings[0:35]) elif obj == 'sk_prep_all': # no KernalCenter() newlist.extend(preprocessings[0:7]) elif obj == 'fs_all': newlist.extend(preprocessings[7:14]) elif obj == 'decomp_all': newlist.extend(preprocessings[14:25]) elif obj == 'k_appr_all': newlist.extend(preprocessings[25:29]) elif obj == 'reb_all': newlist.extend(preprocessings[30:35]) elif obj == 'imb_all': newlist.extend(preprocessings[35:54]) elif type(obj) is int and -1 < obj < len(preprocessings): newlist.append(preprocessings[obj]) elif hasattr(obj, 'get_params'): # user uploaded object if 'n_jobs' in obj.get_params(): newlist.append(obj.set_params(n_jobs=N_JOBS)) else: newlist.append(obj) else: sys.exit("Unsupported estimator type: %r" % (obj)) search_params[param_name] = newlist return search_params
batch_size=4, n_jobs=-1) mbsp.fit(X) #X_transformed = transformer.transform(X) # X_transformed.shape # plt.plot(mbsp.components_[0,:]); plt.show() #%% X = data_pts_1 from sklearn import decomposition mbdl = decomposition.MiniBatchDictionaryLearning(n_jobs=-1, n_components=20, alpha=0.1, n_iter=200, batch_size=5, random_state=0) mbdl.fit(X) # plt.plot(mbdl.components_[0,:]); plt.show() #%% mbdlp = decomposition.MiniBatchDictionaryLearning(n_jobs=-1, n_components=20, alpha=1, n_iter=100, batch_size=5, positive_dict=True, random_state=0) mbdlp.fit(X)
beta=5.0, tol=5e-3, sparseness='components'), False, False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True, max_iter=10), True, True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=1e-3, n_iter=100, chunk_size=3, random_state=rng), True, False), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_atoms=15, alpha=5e-3, n_iter=50, chunk_size=3, random_state=rng), True, False), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(k=n_components, tol=1e-3, chunk_size=20, max_iter=50, random_state=rng), True, False) ] ############################################################################### # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components])
def get_search_params(params_builder): search_params = {} safe_eval = SafeEval(load_scipy=True, load_numpy=True) safe_eval_es = SafeEval(load_estimators=True) for p in params_builder['param_set']: search_p = p['search_param_selector']['search_p'] if search_p.strip() == '': continue param_type = p['search_param_selector']['selected_param_type'] lst = search_p.split(':') assert ( len(lst) == 2 ), "Error, make sure there is one and only one colon in search parameter input." literal = lst[1].strip() param_name = lst[0].strip() if param_name: if param_name.lower() == 'n_jobs': sys.exit("Parameter `%s` is invalid for search." % param_name) elif not param_name.endswith('-'): ev = safe_eval(literal) if param_type == 'final_estimator_p': search_params['estimator__' + param_name] = ev else: search_params['preprocessing_' + param_type[5:6] + '__' + param_name] = ev else: # only for estimator eval, add `-` to the end of param #TODO maybe add regular express check ev = safe_eval_es(literal) for obj in ev: if 'n_jobs' in obj.get_params(): obj.set_params(n_jobs=N_JOBS) if param_type == 'final_estimator_p': search_params['estimator__' + param_name[:-1]] = ev else: search_params['preprocessing_' + param_type[5:6] + '__' + param_name[:-1]] = ev elif param_type != 'final_estimator_p': #TODO regular express check ? ev = safe_eval_es(literal) preprocessors = [ preprocessing.StandardScaler(), preprocessing.Binarizer(), preprocessing.Imputer(), preprocessing.MaxAbsScaler(), preprocessing.Normalizer(), preprocessing.MinMaxScaler(), preprocessing.PolynomialFeatures(), preprocessing.RobustScaler(), feature_selection.SelectKBest(), feature_selection.GenericUnivariateSelect(), feature_selection.SelectPercentile(), feature_selection.SelectFpr(), feature_selection.SelectFdr(), feature_selection.SelectFwe(), feature_selection.VarianceThreshold(), decomposition.FactorAnalysis(random_state=0), decomposition.FastICA(random_state=0), decomposition.IncrementalPCA(), decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS), decomposition.LatentDirichletAllocation(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchDictionaryLearning(random_state=0, n_jobs=N_JOBS), decomposition.MiniBatchSparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.NMF(random_state=0), decomposition.PCA(random_state=0), decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS), decomposition.TruncatedSVD(random_state=0), kernel_approximation.Nystroem(random_state=0), kernel_approximation.RBFSampler(random_state=0), kernel_approximation.AdditiveChi2Sampler(), kernel_approximation.SkewedChi2Sampler(random_state=0), cluster.FeatureAgglomeration(), skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS), skrebate.SURFstar(n_jobs=N_JOBS), skrebate.MultiSURF(n_jobs=N_JOBS), skrebate.MultiSURFstar(n_jobs=N_JOBS), imblearn.under_sampling.ClusterCentroids(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.CondensedNearestNeighbour( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.EditedNearestNeighbours(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RepeatedEditedNearestNeighbours( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.InstanceHardnessThreshold( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NearMiss(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.NeighbourhoodCleaningRule( random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.OneSidedSelection(random_state=0, n_jobs=N_JOBS), imblearn.under_sampling.RandomUnderSampler(random_state=0), imblearn.under_sampling.TomekLinks(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.RandomOverSampler(random_state=0), imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.BorderlineSMOTE(random_state=0, n_jobs=N_JOBS), imblearn.over_sampling.SMOTENC(categorical_features=[], random_state=0, n_jobs=N_JOBS), imblearn.combine.SMOTEENN(random_state=0), imblearn.combine.SMOTETomek(random_state=0) ] newlist = [] for obj in ev: if obj is None: newlist.append(None) elif obj == 'all_0': newlist.extend(preprocessors[0:36]) elif obj == 'sk_prep_all': # no KernalCenter() newlist.extend(preprocessors[0:8]) elif obj == 'fs_all': newlist.extend(preprocessors[8:15]) elif obj == 'decomp_all': newlist.extend(preprocessors[15:26]) elif obj == 'k_appr_all': newlist.extend(preprocessors[26:30]) elif obj == 'reb_all': newlist.extend(preprocessors[31:36]) elif obj == 'imb_all': newlist.extend(preprocessors[36:55]) elif type(obj) is int and -1 < obj < len(preprocessors): newlist.append(preprocessors[obj]) elif hasattr(obj, 'get_params'): # user object if 'n_jobs' in obj.get_params(): newlist.append(obj.set_params(n_jobs=N_JOBS)) else: newlist.append(obj) else: sys.exit("Unsupported preprocessor type: %r" % (obj)) search_params['preprocessing_' + param_type[5:6]] = newlist else: sys.exit("Parameter name of the final estimator can't be skipped!") return search_params
beta=5.0, tol=5e-3, sparseness='components'), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True, max_iter=10), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, chunk_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, chunk_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=2), True), ] ############################################################################### # Plot a sample of the input data
def faces_decomposition(): import logging from numpy.random import RandomState #随机数生成器种子,从高斯分布或者其他等分布产生 import matplotlib.pyplot as plt from time import time from sklearn.datasets import fetch_olivetti_faces from sklearn.cluster import MiniBatchKMeans from sklearn import decomposition logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') n_row, n_col = 2, 3 n_components = n_row * n_col image_shape = (64, 64) rng = RandomState(0) #加载数据集 dataset = fetch_olivetti_faces(shuffle=True, random_state=rng) faces = dataset.data n_samples, n_features = faces.shape faces_centered = faces - faces.mean(axis=0) faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1) print("dataset consits of %d faces" % n_samples) #样本个数 def plot_gallery(title, images, n_col=n_col, n_row=n_row): plt.figure(figsize=(2. * n_col, 2.26 * n_row)) plt.suptitle(title, size=16) for i, comp in enumerate(images): plt.subplot(n_row, n_col, i + 1) vmax = max(comp.max(), -comp.min()) plt.imshow(comp.reshape(image_shape), cmap=plt.cm.gray, interpolation='nearest', vmin=-vmax, vmax=vmax) plt.xticks(()) plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) estimators = [ ('Eigenfaces - PCA using randomized SVD', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=2), True), ] # ############################################################################# # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) # ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) if hasattr(estimator, 'cluster_centers_'): components_ = estimator.cluster_centers_ else: components_ = estimator.components_ # Plot an image representing the pixelwise variance provided by the # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator, # via the PCA decomposition, also provides a scalar noise_variance_ # (the mean of pixelwise variance) that cannot be displayed as an image # so we skip it. if (hasattr(estimator, 'noise_variance_') and estimator.noise_variance_.ndim > 0): # Skip the Eigenfaces case plot_gallery("Pixelwise variance", estimator.noise_variance_.reshape(1, -1), n_col=1, n_row=1) plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) plt.show()
def prepare_dictionaries(Samples, Filter_specs, Dict_alpha=2, Dict_minibatch_size=5, Dict_epochs=1, Dict_jobs=1, Debug_flag=False): """ Prepare dictionary filters for the convolution layers of fluke_net. Parameters: Samples ........... A tensor of all the samples used to make the dictionaries. This tensor should be of format: [Num_samples, Channels, Height, Width] The type of these tensors should be 64 bit floats. Filter_specs ...... A list stating how many filters must be made and their specifications. see the relevant argument for details. Return values: Filters_output .... A list of tensors. Each tensor is a set of all the kernels for a layer. The tensors are of the following format: [Num_of_kernels, Channels, Kernel_height, Kernel_width] """ Filters_output = [] for Layer, (C, K, M, _) in enumerate(Filter_specs): if Debug_flag: print('Layer ' + str(Layer) + ' Samples size: ' + str(Samples.shape)) # Extract patches from all the samples. # First unfold returns view of all slices of size 'Kernel_height', unfolding # along the height dimension. Second call handles unfolding along the width # dimension with slices of size 'Kernel_width'. The end result is a tensor # view of the samples cut into the patches needed for training. Both use a # stride of 1. # This results in a tensor of the following format: # [Num_samples, Channels, Num_height_slices, Num_width_slices, K, K] Patches = Samples.unfold(2, K, 1).unfold(3, K, 1).cpu() if Debug_flag: print('Layer ' + str(Layer) + ' Patches view size: ' + str(Patches.shape)) # Move channels dimension to the front and reshape tensor to following format: # [Channel, Num_patches, Patch_data] Patches = Patches.permute(1, 0, 2, 3, 4, 5) Patches = Patches.reshape(Patches.shape[0], -1, K**2) if Debug_flag: print('Layer ' + str(Layer) + ' Patches reshaped size: ' + str(Patches.shape)) # Fit the dictionary and append the atoms to the list of finished kernels # We must loop through each channel of the Samples to compute the parts of # the kernels that will act on that channel. Kernels_list = [] for Channel in range(Patches.shape[0]): # NOTE: # The sklearn functions take 'array-like' as parameters for fitting. # I am just passing in the tensors and it seems to be working fine, # I don't think I need to convert these back to numpy ndarrays before use. # Initialize a dictionary for the given channel of the samples. Dict = skde.MiniBatchDictionaryLearning( n_components=C, # num of dict elements to extract alpha=Dict_alpha, # sparsity controlling param n_iter=Dict_epochs, # num of epochs per partial_fit() batch_size=Dict_minibatch_size, transform_algorithm='omp', n_jobs=Dict_jobs) # number of parallel jobs to run # Fit the dictionary to the current channels patches. # Fit takes an array parameter of the following format: # [Num_samples, Num_features] Dict.fit(Patches[Channel, :, :]) # Reshape the atoms (dictionary components) into kernels and append # them to our output list. The components_ array is of format: # [Num_components, Num_features] Kernels_list.append(Dict.components_.reshape((C, K, K, 1))) # Concatenate the list of individual kernels into a ndarry. Kernels = np.concatenate(Kernels_list, axis=3) # Convert ndarray of kernels into a tensor. Load using the same datatype # and device as the Samples these kernels will convolve Kernels_tensor = torch.tensor(Kernels, dtype=Samples.dtype, device=Samples.device) # Must also reorder so that it follows the NCHW format of tensors. Kernels_tensor = Kernels_tensor.permute(0, 3, 1, 2) if Debug_flag: print('Layer ' + str(Layer) + ' Kernels size: ' + str(Kernels_tensor.shape)) # Create feature map by convolving over Samples with the filters we made # from them. Convolve_out = torch.nn.functional.conv2d(Samples, Kernels_tensor) # Normalize feature map according to activation function (ReLU) Convolve_out = torch.nn.functional.relu(Convolve_out) # Includes max pooling when specified if not M == 0: Convolve_out = torch.nn.functional.max_pool2d(Convolve_out, M) Samples = Convolve_out # Append generated filters to return list. Filters_output.append(Kernels_tensor) return Filters_output
def plot_faces_decomposition(): # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') n_row, n_col = 2, 3 n_components = n_row * n_col image_shape = (64, 64) rng = RandomState(0) # ############################################################################# # Load faces data faces, _ = fetch_olivetti_faces(return_X_y=True, shuffle=True, random_state=rng) n_samples, n_features = faces.shape # global centering faces_centered = faces - faces.mean(axis=0) # local centering faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1) print("Dataset consists of %d faces" % n_samples) def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray): plt.figure(figsize=(2. * n_col, 2.26 * n_row)) plt.suptitle(title, size=16) for i, comp in enumerate(images): plt.subplot(n_row, n_col, i + 1) vmax = max(comp.max(), -comp.min()) plt.imshow(comp.reshape(image_shape), cmap=cmap, interpolation='nearest', vmin=-vmax, vmax=vmax) plt.xticks(()) plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) # ############################################################################# # List of the different estimators, whether to center and transpose the # problem, and whether the transformer uses the clustering API. estimators = [ ('Eigenfaces - PCA using randomized SVD', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=20), True), ] # ############################################################################# # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) # ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) if hasattr(estimator, 'cluster_centers_'): components_ = estimator.cluster_centers_ else: components_ = estimator.components_ # Plot an image representing the pixelwise variance provided by the # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator, # via the PCA decomposition, also provides a scalar noise_variance_ # (the mean of pixelwise variance) that cannot be displayed as an image # so we skip it. if (hasattr(estimator, 'noise_variance_') and estimator.noise_variance_.ndim > 0): # Skip the Eigenfaces case plot_gallery("Pixelwise variance", estimator.noise_variance_.reshape(1, -1), n_col=1, n_row=1) plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) plt.show() # ############################################################################# # Various positivity constraints applied to dictionary learning. estimators = [ ('Dictionary learning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Dictionary learning - positive dictionary', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng, positive_dict=True), True), ('Dictionary learning - positive code', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, fit_algorithm='cd', random_state=rng, positive_code=True), True), ('Dictionary learning - positive dictionary & code', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, fit_algorithm='cd', random_state=rng, positive_dict=True, positive_code=True), True), ] # ############################################################################# # Plot a sample of the input data plot_gallery("First centered Olivetti faces", faces_centered[:n_components], cmap=plt.cm.RdBu) # ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) components_ = estimator.components_ plot_gallery(name, components_[:n_components], cmap=plt.cm.RdBu) plt.show()
plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) # ############################################################################# # List of the different estimators, whether to center and transpose the # problem, and whether the transformer uses the clustering API. estimators = [ ('PCA', decomposition.PCA(n_components=n_components), True), ('FastICA', decomposition.FastICA(n_components=n_components), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=n_components), True) ] for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces_centered trans_data = estimator.fit_transform(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) components_ = estimator.components_ plot_gallery('%s - Train time %.1fs' % (name, train_time), components_, trans_data)
def main(): dataset = fetch_olivetti_faces(shuffle=True, random_state=rng) faces = dataset.data n_samples, n_features = faces.shape # global centering faces_centered = faces - faces.mean(axis=0) # local centering faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1) print("Dataset consists of %d faces" % n_samples) estimators = [ ('Eigenfaces - PCA using randomized SVD', decomposition.PCA(n_components=n_components, svd_solver='randomized', whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=2), True), ] plot_gallery("First centered Olivetti faces", faces_centered[:n_components]) for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = faces if center: data = faces_centered estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) if hasattr(estimator, 'cluster_centers_'): components_ = estimator.cluster_centers_ else: components_ = estimator.components_ # so we skip it. if (hasattr(estimator, 'noise_variance_') and estimator.noise_variance_.ndim > 0): # Skip the Eigenfaces case plot_gallery("Pixelwise variance", estimator.noise_variance_.reshape(1, -1), n_col=1, n_row=1) plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) plt.show()
random_state=rng) pca_model.fit(train_data_zeroavg) train_data_pc = pca_model.transform( train_data_zeroavg) # Compressed PC-space representation of training data # ----------------------------------------------------------------------------- # Train sparse coding model on MNIST data dictionary_size = n_pcs * 2 # number of components in dictionary alpha = .5 # sparseness parameter n_iter = 500 # number of iterations # Initialize MNIST sparse coding model sparse_model = decomposition.MiniBatchDictionaryLearning( n_components=dictionary_size, alpha=alpha, fit_algorithm='cd', n_iter=n_iter, random_state=rng) # Fit model sparse_model.fit(train_data_pc) components = pca_model.inverse_transform( sparse_model.components_) # get components in pixel space k_components = 50 inds = sample(range(np.size(components, 0)), k_components) components_subset = components[inds, :] plot_components(components_subset, 'coolwarm')
def MainInforExtract(dataset, image_shape): """ :param dataset: A numpy array. (n_samples, n_features), the input images data must gray scale. :param image_shape: the input images shape :return: No return """ # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') n_row, n_col = 1, 1 n_components = n_row * n_col # ############################################################################# # Load faces data n_samples, n_features = dataset.shape # global centering data_centered = dataset - dataset.mean(axis=0) # local centering data_centered = data_centered - data_centered.mean(axis=1).reshape( n_samples, -1) print("Dataset consists of %d samples" % n_samples) def plot_gallery(title, images, n_col=n_col, n_row=n_row): plt.figure(figsize=(2. * n_col, 2.26 * n_row)) plt.suptitle(title, size=16) for i, comp in enumerate(images): maxV = np.max(comp) minV = np.min(comp) comp = (comp - minV) / (maxV - minV) plt.subplot(n_row, n_col, i + 1) plt.imshow(comp.reshape(image_shape)) plt.xticks(()) plt.yticks(()) plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.) # ############################################################################# # List of the different estimators, whether to center and transpose the # problem, and whether the transformer uses the clustering API. estimators = [ ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=12, alpha=0.12, random_state=512, batch_size=4, n_iter=3090), True), ] # ############################################################################# # Do the estimation and plot it for name, estimator, center in estimators: print("Extracting the top %d %s..." % (n_components, name)) t0 = time() data = dataset if center: data = data_centered print("Input data shape is {}".format(data.shape)) estimator.fit(data) train_time = (time() - t0) print("done in %0.3fs" % train_time) if hasattr(estimator, 'cluster_centers_'): components_ = estimator.cluster_centers_ else: components_ = estimator.components_ print("Components shape {}".format(components_.shape)) # Plot an image representing the pixelwise variance provided by the # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator, # via the PCA decomposition, also provides a scalar noise_variance_ # (the mean of pixelwise variance) that cannot be displayed as an image # so we skip it. plot_gallery('%s - Train time %.1fs' % (name, train_time), components_[:n_components]) plt.show()
whiten=True), True), ('Non-negative components - NMF', decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3), False), ('Independent components - FastICA', decomposition.FastICA(n_components=n_components, whiten=True), True), ('Sparse comp. - MiniBatchSparsePCA', decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8, n_iter=100, batch_size=3, random_state=rng), True), ('MiniBatchDictionaryLearning', decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1, n_iter=50, batch_size=3, random_state=rng), True), ('Cluster centers - MiniBatchKMeans', MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20, max_iter=50, random_state=rng), True), ('Factor Analysis components - FA', decomposition.FactorAnalysis(n_components=n_components, max_iter=20), True), ] # ############################################################################# # Plot a sample of the input data
"Sparse components - MiniBatchSparsePCA", batch_pca_estimator.components_[:n_components], ) # %% # Dictionary learning # ^^^^^^^^^^^^^^^^^^^ # # By default, :class:`MiniBatchDictionaryLearning` divides the data into # mini-batches and optimizes in an online manner by cycling over the # mini-batches for the specified number of iterations. # %% batch_dict_estimator = decomposition.MiniBatchDictionaryLearning( n_components=n_components, alpha=0.1, n_iter=50, batch_size=3, random_state=rng) batch_dict_estimator.fit(faces_centered) plot_gallery("Dictionary learning", batch_dict_estimator.components_[:n_components]) # %% # Cluster centers - MiniBatchKMeans # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # `MiniBatchKMeans` is computationally efficient and implements on-line # learning with a `partial_fit` method. That is why it could be beneficial # to enhance some time-consuming algorithms with `MiniBatchKMeans`. # %%