def dictionay_learning_MHOF_online(training_samples_num=400): from MHOF_Extraction import MHOF_Extraction from MHOF_histogram_block import MHOF_histogram_block from sklearn.decomposition import MiniBatchDictionaryLearning import numpy as np import cv2 import video cam=video.create_capture('Crowd-Activity-All.avi') height_block_num=4 width_block_num=5 bin_num=16 ret,prev=cam.read() ret,img=cam.read() flow_H=MHOF_Extraction(prev,img) flow_hist_H=MHOF_histogram_block(flow_H,height_block_num,width_block_num,bin_num) flow_hist_H=np.reshape(flow_hist_H,[1,flow_hist_H.size]) # error!!!! dico=MiniBatchDictionaryLearning(1,alpha=1,n_iter=500) dic=dico.fit(flow_hist_H).components_ for i in range(training_samples_num): ret,img=cam.read() flow_H=MHOF_Extraction(prev,img) flow_hist_H=MHOF_histogram_block(flow_H,height_block_num,width_block_num,bin_num) dico=MiniBatchDictionaryLearing(i+1,alpha=1,n_iter=500,dict_init=dic) dic=dico.fit(flow_hist_H).components return dic
def sklearn_check(img, patch_size, dic_size, T=1000): patch_shape = (patch_size, patch_size) patches = extract_patches_2d(img, patch_shape) patches = patches.reshape(patches.shape[0], -1) patches = center(patches) dl = MiniBatchDictionaryLearning(dic_size, n_iter=T) dl.fit(patches) D = dl.components_.T return D
def to_sparse(X,dim): sparse_dict = MiniBatchDictionaryLearning(dim) sparse_dict.fit(X) sparse_vectors = sparse_encode(X, sparse_dict.components_) for i in sparse_vectors: print i return sparse_vectors
class BOW_sparsecoding(BOW): def codebook(self): self.mbdl = MiniBatchDictionaryLearning(self.N_codebook) self.mbdl.fit(self.raw_features) def bow_feature_extract(self, path): des = self.raw_feature_extract(path) out = sum(sparse_encode(des, self.mbdl.components_)) out = np.array([out]) return out
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from cStringIO import StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2) sys.stdout = old_stdout assert_true(dico.components_.shape == (n_components, n_features))
def buildmodel2(): "生成有眼镜-无眼镜pair模型" modelrec = np.load('cut_rec.npy') modelglass = np.load('glassline.npy')[:modelrec.shape[0]] linkedmodel = np.empty((modelrec.shape[0],modelrec.shape[1]+modelglass.shape[1]),'f') linkedmodel[:,:modelrec.shape[1]]=modelrec linkedmodel[:,modelrec.shape[1]:]=modelglass #Train from sklearn.decomposition import MiniBatchDictionaryLearning learning = MiniBatchDictionaryLearning(500,verbose=True) learning.fit(linkedmodel) import cPickle cPickle.dump(learning,file('sparselinked','wb'),-1)
def main(games_path = None): if games_path == None: games_path = 'specmine/data/go_games/2010-01.pickle.gz' with specmine.util.openz(games_path) as games_file: games = pickle.load(games_file) boards = None # numpy array nx9x9 for game in games: if boards == None: boards = games[game].grids else: boards = numpy.vstack((boards,games[game].grids)) print 'boards shape: ', boards.shape boards = boards.reshape((boards.shape[0],-1)) print 'boards reshaped: ', boards.shape print 'Learning the dictionary... ' t0 = time() dico = MiniBatchDictionaryLearning(n_atoms=100, alpha=1, n_iter=500) V = dico.fit(boards).components_ dt = time() - t0 print 'done in %.2fs.' % dt #pl.figure(figsize=(4.2, 4)) for i, comp in enumerate(V[:100]): pl.subplot(10, 10, i + 1) pl.imshow(comp, cmap=pl.cm.gray_r) # interpolation='nearest') pl.xticks(()) pl.yticks(())
def scskl_dico_learning(list_pickled_array,n_atoms,maxepoch=5,maxiter=100): D = None for e in range(maxepoch): for a in list_pickled_array: data = joblib.load(a) dico = MiniBatchDictionaryLearning(n_components=n_atoms, n_iter=maxiter, dict_init=D) D = dico.fit(data).components_.astype(np.float32) return D
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2, random_state=0) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1, random_state=0) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2, random_state=0) sys.stdout = old_stdout assert_true(dico.components_.shape == (n_components, n_features))
def create_dictionaries(n_codewords=20): dataset_features = np.load('MSR_Features_hog-hof-skel1360423760.27.dat') hogs = [] hofs = [] skels = [] for n in dataset_features.keys(): hogs += dataset_features[n]['hog'] hofs += dataset_features[n]['hof'] skels += [normalize_skeleton(dataset_features[n]['skel_world'])] ''' Input should be features[n_samples, n_features] ''' hogs = np.vstack(hogs) hofs = np.vstack(hofs) skels = np.vstack(skels) hog_dict = MiniBatchDictionaryLearning(n_codewords, n_jobs=-1, verbose=True, transform_algorithm='lasso_lars') hog_dict.fit(hogs) hof_dict = MiniBatchDictionaryLearning(n_codewords, n_jobs=-1, verbose=True, transform_algorithm='lasso_lars') hof_dict.fit(hofs) skels_dict = MiniBatchDictionaryLearning(n_codewords, n_jobs=-1, verbose=True, transform_algorithm='lasso_lars') skels_dict.fit(skels) feature_dictionaries = {'hog':hog_dict, 'hof':hof_dict, 'skel':skels_dict} with open('MSR_Dictionaries_hog-hof-skel_%f.dat'%time.time(), 'wb') as outfile: pickle.dump(feature_dictionaries, outfile, protocol=pickle.HIGHEST_PROTOCOL)
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from io import StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2, random_state=0) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1, random_state=0) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2, random_state=0) finally: sys.stdout = old_stdout assert dico.components_.shape == (n_components, n_features)
def learning_sparse_coding(X, components=None): """ http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.DictionaryLearning.html http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.sparse_encode.html """ if components is None: print('Learning the dictionary...') t0 = time() diclearner = MiniBatchDictionaryLearning(n_components=100, verbose=True) components = diclearner.fit(X).components_ np.savetxt('components_of_convfeat.txt', components) dt = time() - t0 print('done in %.2fs.' % dt) codes = sparse_encode(X, components) np.savetxt('sparse_codes_of_convfeat.txt', codes)
def train_sparse_coding(feature_list, patch_list, dict_size=256, transform_alpha=0.5, n_iter=50): """ 使用mini batch训练稀疏编码 #feature_list 表示要训练的特征的列表 #patch_list 表示结果patch的列表 :return sc_list """ sc_list = [] i = 0 for feature, patch in zip(feature_list, patch_list): i = i + 1 ''' 由于组合数值大小比例的问题,稀疏编码可能忽略较小的特征,下面的×10需要用别的特征归一化方法代替 相关性越大,则每个向量都是有用的,所以需要更长的时间进行训练。 ''' dico = None X = np.concatenate((feature, patch), axis=1) if len(X) > 100000: np.random.shuffle(X) X = X[:90000] if len(X) < 5000: print "进入DictionaryLearning状态" dico = MiniBatchDictionaryLearning(batch_size=1000, transform_algorithm='lasso_lars', fit_algorithm='lars', transform_n_nonzero_coefs=5, n_components=len(X)/50, dict_init=X[:len(X)/50], n_iter=n_iter, transform_alpha=transform_alpha, verbose=10, n_jobs=-1) else: print "进入MiniBatchDictionaryLearning状态" dico = MiniBatchDictionaryLearning(batch_size=1000, transform_algorithm='lasso_lars', fit_algorithm='lars', transform_n_nonzero_coefs=5, n_components=len(X)/50, dict_init=X[:len(X)/50], n_iter=n_iter, transform_alpha=transform_alpha, verbose=10, n_jobs=-1) V = dico.fit(X).components_ sc_list.append(V) file_name = "./tmp_file/_tmp_sc_list_new_clsd_raw_%d.pickle" % (i) sc_file = open(file_name, 'wb') cPickle.dump(sc_list, sc_file, 1) sc_file.close() return sc_list
# SPARSITY ON IMAGENET # SHUFFELING ind = range(len(imagenet_targets)) np.random.shuffle(ind) imagenet_targets = imagenet_targets[ind] imagenet_features = imagenet_features[ind, :] # Dictionary Learning on Source sparse_components = 200 dict_sparse = MiniBatchDictionaryLearning(alpha=1, n_components=sparse_components, verbose=3, batch_size=10, n_iter=200) dict_sparse.fit(imagenet_features) Ds_0 = dict_sparse.components_ coder = SparseCoder(dictionary=Ds_0) Rs_0 = coder.transform(imagenet_features) # classification using sparse features from sklearn import cross_validation model = OneVsRestClassifier(LinearSVC(random_state=0)) parameters = {'estimator__C': [0.01, 0.1, 1, 10]} clf = grid_search.GridSearchCV(model, parameters, score_func=accuracy_score) scores = cross_validation.cross_val_score(clf, Rs_0, imagenet_targets, cv=10) #
patch_size = (m, m) patches = extract_patches_2d(img1_gray_re, patch_size) patches = patches.reshape(patches.shape[0], -1) # remove the mean value and do the normalisation patches -= np.mean(patches, axis=0) patches /= np.std(patches, axis=0) print('done in %.2fs.' % (time() - t0)) print(patches.shape) # Learn the dictionary from reference patches print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=200, alpha=0.5, n_iter=400) #TODO:check with different parameters V = dico.fit(patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) # show the learned dictionary as patches plt.figure(figsize=(6, 6)) for i, comp in enumerate(V[:100]): # we show the 100 first patches plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from patches\n' + 'Train time %.1fs on %d patches' % (dt, len(patches)), fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) ########################################
def test_dict_learning_online_estimator_shapes(): n_components = 5 dico = MiniBatchDictionaryLearning(n_components, n_iter=20, random_state=0) dico.fit(X) assert_true(dico.components_.shape == (n_components, n_features))
def learn_basis_from_unlabeled_data(unlabeled_examples, num_components, alpha, max_iter): dic = MiniBatchDictionaryLearning(n_components=num_components, alpha=alpha, n_iter=max_iter) return dic.fit(unlabeled_examples).components_
class SparseApproxSpectrum(object): """class for 2D patch analysis of audio files initialization: patch_size - size of time-frequency 2D patches in spectrogram units (freq,time) [(12,12)] max_samples - if num audio patches exceeds this threshold, randomly sample spectrum [1000000] **omp_args - keyword arguments to OrthogonalMatchingPursuit(...) [None] """ def __init__(self, patch_size=(12,12), max_samples=1000000, **omp_args): self.patch_size = patch_size self.max_samples = max_samples self.omp = OrthogonalMatchingPursuit(**omp_args) self.D = None self.data = None self.components = None self.zscore=False self.log_amplitude=False def _extract_data_patches(self, X, zscore, log_amplitude): "utility method for converting spectrogram data to 2D patches " self.zscore=zscore self.log_amplitude=log_amplitude self.X = X if self.log_amplitude: X = np.log(1+X) data = extract_patches_2d(X, self.patch_size) data = data.reshape(data.shape[0], -1) if len(data)>self.max_samples: data = np.random.permutation(data)[:self.max_samples] print data.shape if self.zscore: self.mn = np.mean(data, axis=0) self.std = np.std(data, axis=0) data -= self.mn data /= self.std self.data = data def make_gabor_field(self, X, zscore=True, log_amplitude=True, thetas=range(4), sigmas=(1,3), frequencies=(0.05, 0.25)) : """Given a spectrogram, prepare 2D patches and Gabor filter bank kernels inputs: X - spectrogram data (frequency x time) zscore - whether to zscore the ensemble of 2D patches [True] log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True] thetas - list of 2D Gabor filter orientations in units of pi/4. [range(4)] sigmas - list of 2D Gabor filter standard deviations in oriented direction [(1,3)] frequencies - list of 2D Gabor filter frequencies [(0.05,0.25)] outputs: self.data - 2D patches of input spectrogram self.D.components_ - Gabor dictionary of thetas x sigmas x frequencies atoms """ self._extract_data_patches(X, zscore, log_amplitude) self.n_components = len(thetas)*len(sigmas)*len(frequencies) self.thetas = thetas self.sigmas = sigmas self.frequencies = frequencies a,b = self.patch_size self.kernels = [] for theta in thetas: theta = theta / 4. * np.pi for sigma in sigmas: for frequency in frequencies: kernel = np.real(gabor_kernel(frequency, theta=theta, sigma_x=sigma, sigma_y=sigma)) c,d = kernel.shape if c<=a: z = np.zeros(self.patch_size) z[(a/2-c/2):(a/2-c/2+c),(b/2-d/2):(b/2-d/2+d)] = kernel else: z = kernel[(c/2-a/2):(c/2-a/2+a),(d/2-b/2):(d/2-b/2+b)] self.kernels.append(z.flatten()) class Bunch: def __init__(self, **kwds): self.__dict__.update(kwds) self.D = Bunch(components_ = np.vstack(self.kernels)) def extract_codes(self, X, n_components=16, zscore=True, log_amplitude=True, **mbl_args): """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data inputs: X - spectrogram data (frequency x time) n_components - how many components to extract [16] zscore - whether to zscore the ensemble of 2D patches [True] log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True] **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None] outputs: self.data - 2D patches of input spectrogram self.D.components_ - dictionary of learned 2D atoms for sparse coding """ self._extract_data_patches(X, zscore, log_amplitude) self.n_components = n_components self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, **mbl_args) print "Dictionary learning from data..." self.D = self.dico.fit(self.data) def plot_codes(self, cbar=False, show_axis=False, **kwargs): "plot the learned or generated 2D sparse code dictionary" N = int(np.ceil(np.sqrt(self.n_components))) kwargs.setdefault('cmap', plt.cm.gray_r) kwargs.setdefault('origin','bottom') kwargs.setdefault('interpolation','nearest') for i, comp in enumerate(self.D.components_): plt.subplot(N, N, i+1) plt.imshow(comp.reshape(self.patch_size), **kwargs) if cbar: plt.colorbar() if not show_axis: plt.axis('off') plt.xticks(()) plt.yticks(()) plt.title('%d'%(i)) plt.suptitle('Dictionary of Spectrum Patches\n', fontsize=14) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav', **mbl_args): """apply dictionary learning to entire directory of audio files (requires LOTS of RAM) inputs: **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None] """ flist=glob.glob(dir_expr) self.X = np.vstack([br.feature_scale(br.LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T self.D = extract_codes(self.X, **mbl_args) def _get_approximation_coefs(self, data, components): """utility function to fit dictionary components to data inputs: data - spectrogram data (frqeuency x time) [None] components - the dictionary components to fit to the data [None] """ w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data]) return w def reconstruct_spectrum(self, w=None, randomize=False): """reconstruct by fitting current 2D dictionary to self.data inputs: w - per-component reconstruction weights [None=calculate weights] randomize - randomly permute components after getting weights [False] returns: self.X_hat - spectral reconstruction of self.data """ data = self.data components = self.D.components_ if w is None: self.w = self._get_approximation_coefs(data, components) w = self.w if randomize: components = np.random.permutation(components) recon = np.dot(w, components) if self.zscore: recon = recon * self.std recon = recon + self.mn recon = recon.reshape(-1, *self.patch_size) self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape) if self.log_amplitude: self.X_hat = np.exp(self.X_hat) - 1.0 # invert log transform def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, rectify=True, **kwargs): """fit each dictionary component to self.data inputs: w - per-component reconstruction weights [None=calculate weights] randomize - randomly permute components after getting weights [False] plotting - whether to subplot individual spectrum reconstructions [True] rectify- remove negative ("dark energy") from individual reconstructions [True] **kwargs - keyword arguments for plotting returns: self.X_hat_l - list of indvidual spectrum reconstructions per dictionary atom """ omp_args = {} self.reconstruct_spectrum(w, randomize, **omp_args) w, components = self.w, self.D.components_ self.X_hat_l = [] for i in range(len(self.w.T)): r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,*self.patch_size) X_hat = reconstruct_from_patches_2d(r, self.X.shape) if self.log_amplitude: X_hat = np.exp(X_hat) - 1.0 if rectify: # half wave rectification X_hat[X_hat<0] = 0 self.X_hat_l.append(X_hat) if plotting: self.plot_individual_spectra(**kwargs) def plot_individual_spectra(self, **kwargs): "plot individual spectrum reconstructions for self.X_hat_l" if self.X_hat_l is None: return plt.figure() rn = np.ceil(self.n_components**0.5) for k in range(self.n_components): plt.subplot(rn,rn,k+1) br.feature_plot(self.X_hat_l[k], nofig=1, **kwargs) plt.title('%d'%(k)) plt.suptitle('Component Reconstructions\n', fontsize=14)
def reconstruct_events(event_folder=None, output_folder=None, dictionary=None, components=100, alpha=1, start=0, end=np.inf, actions=[], random_state=None): if event_folder is None: event_folder = os.path.abspath( '/share/storage/vision/subway/features/') if output_folder is None: output_folder = os.path.abspath( '/share/storage/vision/subway/reconstructed/') events_files = get_events_files(event_folder) event_counter = 0 results = [] # Check which actions occur on each event for pth, events_start, events_end in events_files: data = sio.loadmat(pth) events = data["events"] # Avoid calculating files outside the window of interest if events_end <= start or events_start >= end: print "Skipping %s" % pth continue else: print "Processing %s" % pth intercept = None deviation = None dico = MiniBatchDictionaryLearning(n_components=components, alpha=alpha, n_iter=100) for event in events: ( (x, ), (y, ), (t, ) ), event_cuboids, event_descriptors, event_cuboid_locations, event_adjacency = event if (t >= start - 40) and (t <= end + 40): sort_order = np.argsort(event_descriptors[:, 2]) X = event_descriptors[sort_order, :] if intercept is None: intercept = np.mean(X, axis=0) original = X - intercept if deviation is None: deviation = np.std(original, axis=0) original /= deviation dictionary = dico.fit(original).components_ dico.set_params(transform_algorithm='lars', transform_n_nonzero_coefs=5) code = dico.transform(original) error = (original - np.dot(code, dictionary))**2 results.append(((x, y, t), code, error)) event_counter += 1 result_pth = os.path.join( output_folder, "reconstructed_events_clip-%s-%s.mat" % (start, end)) sio.savemat(result_pth, { 'results': results, 'start': start, 'end': end, 'actions': actions }) print "%s events saved in '%s'" % (event_counter, result_pth) return dictionary
def sp_deepdictionarylearning(s_p_d, i_p_d, dl_lambda1, dl_lambda2): sp_patches_data = np.copy(s_p_d) images_patches_data = np.copy(i_p_d) index = 0 # 图片索引 sp_mean = np.mean(sp_patches_data, axis=0) # 保存下来 sp_patches_data -= sp_mean dico1 = MiniBatchDictionaryLearning(n_components=144, alpha=dl_lambda1, n_iter=200) V1 = dico1.fit(sp_patches_data).components_ # (144, 64) print('dictionary1 shape : ', V1.shape) transform_algorithms = [(('Orthogonal Matching Pursuit\n7 atoms', 'omp', { 'transform_n_nonzero_coefs': 7 }), ('Orthogonal Matching Pursuit\n7 atoms', 'omp', { 'transform_n_nonzero_coefs': 7 }))] # title, transform_algorithm, kwargs remove_files('Image_Salt_and_Pepper_DeepDictionaryLearning') for layer1, layer2 in transform_algorithms: dico1.set_params(transform_algorithm=layer1[1], **layer1[2]) code1 = dico1.transform(sp_patches_data) #激活函数 # code1 = sigmoid(code1) # code1 = relu_reverse_2(code1) dico2 = MiniBatchDictionaryLearning(n_components=256, alpha=dl_lambda2, n_iter=200) V2 = dico2.fit(code1).components_ print('dictionary2 shape : ', V2.shape) dico2.set_params(transform_algorithm=layer2[1], **layer2[2]) code2 = dico2.transform(code1) #逆激活函数 # patches = np.dot(np.dot(code2, V2), V1) patches = np.dot(np.dot(code2, V2), V1) patches += sp_mean # 将patches从(62001,64)变回(62001,8,8) patches = patches.reshape(len(sp_patches_data), *(8, 8)) if layer1[1] == 'threshold': patches -= patches.min() patches /= patches.max() # 通过reconstruct_from_patches_2d函数将patches重新拼接回图片 reconstruction_image = reconstruct_from_patches_2d(patches, (256, 256)) # 计算复原图片和原图的误差 psnr_score = psnr(reconstruct_from_patches_2d( images_patches_data.reshape(len(images_patches_data), *(8, 8)), (256, 256)), reconstruction_image, PIXEL_MAX=1) plt.figure() plt.imshow(reconstruction_image, cmap='gray') plt.title('字典表示策略 : ' + layer1[0] + '\npsnr_score : ' + str(psnr_score)) plt.show() # 保存去噪复原图 index += 1 cv2.imwrite( 'Image_Salt_and_Pepper_DeepDictionaryLearning\\' + 'algorithms_' + str(index) + '_psnr_score_' + str(round(psnr_score, 2)).replace('.', '__') + '.jpg', reconstruction_image * 255)
def sp_single_layer_dictionarylearning(s_p_d, i_p_d, dl_lambda): sp_patches_data = np.copy(s_p_d) images_patches_data = np.copy(i_p_d) index = 0 # 图片索引 print('开始从椒盐噪声的图像中提取字典...') # 使用椒盐噪声训练字典 # 每一行的data减去均值除以方差,这是zscore标准化的方法 sp_mean = np.mean(sp_patches_data, axis=0) # 保存下来 sp_patches_data -= sp_mean # 初始化MiniBatchDictionaryLearning类,并按照初始参数初始化类的属性 dico = MiniBatchDictionaryLearning(n_components=256, alpha=dl_lambda, n_iter=200) V = dico.fit(sp_patches_data).components_ # 画出V中的字典,下面逐行解释 '''figsize方法指明图片的大小,4.2英寸宽,4英寸高。其中一英寸的定义是80个像素点''' plt.figure(figsize=(8.2, 8)) # 循环画出100个字典V中的字(n_components是字典的数量) '''enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列, 同时列出数据和数据下标,一般用在 for 循环当中。''' for i, comp in enumerate(V[:256]): plt.subplot(16, 16, i + 1) plt.imshow(comp.reshape((8, 8)), cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) # 6个参数与注释后的6个属性对应 plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) # left, right, bottom, top, wspace, hspace plt.show() print('dictionary shape : ', V.shape) print('Dictionary learned on %d patches' % (len(sp_patches_data))) print('完成从椒盐噪声的图像中提取字典...') print('开始椒盐噪声的稀疏表示...') # 复原图片和原图的误差 differents = [] # 四种不同的字典表示策略 transform_algorithms = [('Orthogonal Matching Pursuit\n7 atoms', 'omp', { 'transform_n_nonzero_coefs': 7 })] # 清空此文件夹中之前的文件 remove_files('Image_Salt_and_Pepper_SingleLayer_DictionaryLearning') for title, transform_algorithm, kwargs in transform_algorithms: # 通过set_params对第二阶段的参数进行设置 dico.set_params(transform_algorithm=transform_algorithm, **kwargs) # transform根据set_params对设完参数的模型进行字典表示,表示结果放在code中。 # code总共有100列,每一列对应着V中的一个字典元素, # 所谓稀疏性就是code中每一行的大部分元素都是0,这样就可以用尽可能少的字典元素表示回去。 code = dico.transform(sp_patches_data) # code矩阵乘V得到复原后的矩阵patches patches = np.dot(code, V) # 还原数据预处理 patches += sp_mean # 将patches从(62001,64)变回(62001,8,8) patches = patches.reshape(len(sp_patches_data), *(8, 8)) if transform_algorithm == 'threshold': patches -= patches.min() patches /= patches.max() # 通过reconstruct_from_patches_2d函数将patches重新拼接回图片 reconstruction_image = reconstruct_from_patches_2d(patches, (256, 256)) # 计算复原图片和原图的误差 psnr_score = psnr(reconstruct_from_patches_2d( images_patches_data.reshape(len(images_patches_data), *(8, 8)), (256, 256)), reconstruction_image, PIXEL_MAX=1) differents.append(psnr_score) plt.figure() plt.imshow(reconstruction_image, cmap='gray') plt.title('字典表示策略 : ' + title + '\npsnr_score : ' + str(psnr_score)) plt.show() # 保存去噪复原图 index += 1 cv2.imwrite( 'Image_Salt_and_Pepper_SingleLayer_DictionaryLearning\\' + 'algorithms_' + str(index) + '_psnr_score_' + str(round(psnr_score, 2)).replace('.', '__') + '.jpg', reconstruction_image * 255) print('完成椒盐噪声的稀疏表示...')
class Layer(object): def __init__(self, hierarchy, depth, patch_size, num_features, num_patches, multiplier): """ * depth - hierarchy level (1, 2, 3, etc.) * patch_size - number of pixels representing side of the square patch. like, 8 (8x8 patches) * num_features - how many components to learn * multiplier - num of subpatches we break patch into (0 for the first level). if 3, patch will contant 3x3 subpatches. """ self.hierarchy = hierarchy self.depth = depth self.basement_size = patch_size self.num_features = num_features self.num_patches = num_patches self.multiplier = multiplier self.learning = MiniBatchDictionaryLearning( n_components=num_features, n_iter=3000, transform_algorithm='lasso_lars', transform_alpha=0.5, n_jobs=2) self.ready = False def get_data(self, data, max_patches=None): """ Extracts raw data from patches. """ max_patches = max_patches or self.num_patches if isinstance(data, np.ndarray): # one image patches = extract_patches_2d( data, (self.basement_size, self.basement_size), max_patches=max_patches) else: patches = [] # multiple images for i in xrange(max_patches): idx = np.random.randint(len(data)) # selecting random image dx = dy = self.basement_size if data[idx].shape[0] <= dx or data[idx].shape[1] <= dy: continue x = np.random.randint(data[idx].shape[0] - dx) y = np.random.randint(data[idx].shape[1] - dy) patch = data[idx][x:x + dx, y:y + dy] patches.append(patch.reshape(-1)) patches = np.vstack(patches) patches = patches.reshape(patches.shape[0], self.basement_size, self.basement_size) print 'patches', patches.shape patches = preprocessing.scale(patches) return patches def learn(self, data): data = data.reshape(data.shape[0], -1) self.learning.fit(data) self.ready = True @property def output_size(self): return int(np.sqrt(self.num_features)) @property def input_size(self): if self.depth == 0: return self.basement_size else: prev_layer = self.hierarchy.layers[self.depth - 1] r = prev_layer.output_size * self.multiplier return r return self._input_size @property def features(self): return self.learning.components_ # def get_features(self): # # going from up to down # result = [] # layers = self.hierarchy.layers[: self.depth][::-1] # if self.depth == 0: # return self.features # previous_layer = self.hierarchy.layers[self.depth - 1] # for feature in self.features: # multiplier = self.multiplier # feature = feature.reshape(self.multiplier * previous_layer.output_size, # self.multiplier * previous_layer.output_size,) # for other_layer in layers: # expressed_feature = np.empty((multiplier * other_layer.input_size, # multiplier * other_layer.input_size)) # enc_n = other_layer.output_size # n = other_layer.input_size # for dx in range(multiplier): # for dy in range(multiplier): # encoded_subfeature = feature[dx * enc_n: (dx + 1) * enc_n, # dy * enc_n: (dy + 1) * enc_n] # prev_patch = np.dot(encoded_subfeature.reshape(-1), other_layer.features) # expressed_feature[dx * n: (dx + 1) * n, dy * n: (dy + 1) * n] = prev_patch.reshape(n, n) # feature = expressed_feature # multiplier *= other_layer.multiplier # result.append(expressed_feature.reshape(-1)) # result = np.vstack(result) # return result def get_features(self): # going from down to up. these two methods are look like the same if self.depth == 0: return self.features layers = self.hierarchy.layers[1:self.depth + 1] # down --> up features = self.hierarchy.layers[ 0].features # to express upper feature for i, layer in enumerate(layers, start=1): previous_layer = self.hierarchy.layers[i - 1] expressed_features = [] for feature in layer.features: n = previous_layer.output_size m = int(np.sqrt(features.shape[1])) feature = feature.reshape((layer.input_size, layer.input_size)) expressed_feature = np.empty( (layer.multiplier * m, layer.multiplier * m)) for dx in range(layer.multiplier): for dy in range(layer.multiplier): subfeature = feature[dx * n:(dx + 1) * n, dy * n:(dy + 1) * n] # now that's previous_layer's code. replace it with reconstruction expressed_subfeature = np.dot(subfeature.reshape(-1), features) expressed_feature[dx * m:(dx + 1) * m, dy * m:(dy + 1) * m] = expressed_subfeature.reshape( (m, m)) expressed_features.append(expressed_feature.reshape(-1)) features = np.vstack(expressed_features) return features
pca.fit(imagenet_features) pca_feat = pca.transform(imagenet_features) # Shufflinig ind = range(len(imagenet_targets)) np.random.shuffle(ind) imagenet_targets = imagenet_targets[ind] pca_feat = pca_feat[ind, :] # Dictionary Learning on Source dict_sparse = MiniBatchDictionaryLearning(alpha=1, n_components=300, verbose=3, batch_size=10, n_iter=1000) dict_sparse.fit(pca_feat) Ds_0 = dict_sparse.components_ # Dictionary Learning on Target dict_sparse = DictionaryLearning(alpha=1, n_components=300, max_iter=3, verbose=3) dict_sparse.fit(features) Dt_0 = dict_sparse.components_ coder = SparseCoder(dictionary=Dt_0) Rt_0 = coder.transform(features) # Target Reconstruction Xt_1 = np.mat(Rt_0) * np.mat(Ds_0) dict_sparse = DictionaryLearning(alpha=1,
class Sparsecode(BaseEstimator, TransformerMixin): def __init__(self, patch_file=None, patch_num=10000, patch_size=(16, 16),\ n_components=384, alpha = 1, n_iter=1000, batch_size=200): self.patch_num = patch_num self.patch_size = patch_size self.patch_file = patch_file self.n_components = n_components self.alpha = alpha #sparsity controlling parameter self.n_iter = n_iter self.batch_size = batch_size def fit(self, X=None, y=None): if self.patch_file is None: num = self.patch_num // X.size data = [] for item in X: img = imread(str(item[0])) img = img_as_ubyte(rgb2gray(img)) #img = self.binary(img) # 二值化 tmp = extract_patches_2d(img, self.patch_size, max_patches = num,\ random_state=np.random.RandomState()) data.append(tmp) data = np.vstack(data) data = data.reshape(data.shape[0], -1) data = np.asarray(data, 'float32') else: data = np.load(self.patch_file,'r+') # load npy file, 注意模式,因为后面需要修改 data = np.require(data, dtype=np.float32) # Standardization #logging.info("Pre-processing : Standardization...") #self.standard = StandardScaler() #data = self.standard.fit_transform(data) # whiten #logging.info("Pre-processing : PCA Whiten...") #self.pca = RandomizedPCA(copy=True, whiten=True) #data = self.pca.fit_transform(data) # whiten logging.info("Pre-processing : ZCA Whiten...") self.zca = ZCA() data = self.zca.fit_transform(data) # 0-1 scaling 都可以用preprocessing模块实现 #self.minmax = MinMaxScaler() #data = self.minmax.fit_transform(data) """k-means self.kmeans = MiniBatchKMeans(n_clusters=self.n_components, init='k-means++', \ max_iter=self.n_iter, batch_size=self.batch_size, verbose=1,\ tol=0.0, max_no_improvement=100,\ init_size=None, n_init=3, random_state=np.random.RandomState(0),\ reassignment_ratio=0.0001) logging.info("Sparse coding : Phase 1 - Codebook learning (K-means).") self.kmeans.fit(data) logging.info("Sparse coding : Phase 2 - Define coding method (omp,lars...).") self.coder = SparseCoder(dictionary=self.kmeans.cluster_centers_, transform_n_nonzero_coefs=256, transform_alpha=None, transform_algorithm='lasso_lars', n_jobs = 1) """ #'''genertic logging.info("Sparse coding...") self.coder = MiniBatchDictionaryLearning(n_components=self.n_components, \ alpha=self.alpha, n_iter=self.n_iter, \ batch_size =self.batch_size, verbose=True) self.coder.fit(data) self.coder.transform_algorithm = 'omp' self.coder.transform_alpha = 0.1 # omp情况下,代表重建的误差 #''' return self def transform(self, X): #whiten #X_whiten = self.pca.transform(X) logging.info("Compute the sparse coding of X.") X = np.require(X, dtype=np.float32) #TODO: 是否一定需要先fit,才能transform #X = self.minmax.fit_transform(X) # -mean/std and whiten #X = self.standard.transform(X) #X = self.pca.transform(X) # ZCA X = self.zca.transform(X) # MiniBatchDictionaryLearning # return self.dico.transform(X_whiten) # k-means # TODO: sparse coder method? problem... return self.coder.transform(X) def get_params(self, deep=True): return {"patch_num": self.patch_num, "patch_size":self.patch_size, "alpha":self.alpha, "n_components":self.n_components, "n_iter":self.n_iter, "batch_size":self.batch_size} def set_params(self, **parameters): for parameter, value in parameters.items(): self.__setattr__(parameter, value) return self
def get_dictionary_data(n_comp=20, zero_index=True): unlabeled = util.load_unlabeled_training(flatten=False) height, width = 32, 32 n_images = 10000 patch_size = (8, 8) unlabeled = util.standardize(unlabeled) np.random.shuffle(unlabeled) print('Extracting reference patches...') patches = np.empty((0, 64)) t0 = time() for image in unlabeled[:n_images, :, :]: data = np.array(extract_patches_2d(image, patch_size, max_patches=0.10)) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 patches = np.concatenate([patches, data]) print('done in %.2fs.' % (time() - t0)) # whiten the patches z = zca.ZCA() z.fit(patches) z.transform(patches) print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1) V = dico.fit(patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) #plt.figure(figsize=(4.2, 4)) #for i, comp in enumerate(V[:100]): # plt.subplot(10, 10, i + 1) # plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, # interpolation='nearest') # plt.xticks(()) # plt.yticks(()) #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) #plt.show() labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True) labeled_data = util.standardize(labeled_data) test_data = util.load_all_test(flatten=False) test_data = util.standardize(test_data) #util.render_matrix(test_data, flattened=False) print('Training SVM with the training images...') t0 = time() reconstructed_images = np.empty((0, 64)) multiplied_labels = np.empty((0)) for i in range(len(labeled_data)): image = labeled_data[i, :, :] label = labels[i] data = extract_patches_2d(image, patch_size, max_patches=0.50) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) reconstructed_images = np.concatenate([reconstructed_images, patches]) extended_labels = np.asarray([label] * len(patches)) multiplied_labels = np.concatenate([multiplied_labels, extended_labels]) print(reconstructed_images.shape, multiplied_labels.shape) svc = SVC() #print('Getting cross-val scores...') #scores = cross_validation.cross_val_score(svc, reconstructed_images, multiplied_labels, cv=10) #print('cross-val scores:', scores) #print('cross-val mean:', np.mean(scores)) #print('cross-val variance:', np.var(scores)) print('done in %.2fs.' % (time() - t0)) svc.fit(reconstructed_images, multiplied_labels) print('Reconstructing the test images...') t0 = time() predictions = [] for i, image in enumerate(test_data): data = extract_patches_2d(image, patch_size, max_patches=0.25) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) pred = svc.predict(patches) print('Variance in the predictions:', np.var(pred)) predictions.append(mode(pred)) print('done in %.2fs.' % (time() - t0)) predictions += 1 util.write_results(predictions, 'svm_patches_25_percent_20_comp.csv')
def plot_image_denoising(): try: # SciPy >= 0.16 have face in misc from scipy.misc import face face = face(gray=True) except ImportError: face = sp.face(gray=True) # Convert from uint8 representation with values between 0 and 255 to # a floating point representation with values between 0 and 1. face = face / 255. # downsample for higher speed face = face[::4, ::4] + face[1::4, ::4] + face[::4, 1::4] + face[1::4, 1::4] face /= 4.0 height, width = face.shape # Distort the right half of the image print('Distorting image...') distorted = face.copy() distorted[:, width // 2:] += 0.075 * np.random.randn(height, width // 2) # Extract all reference patches from the left half of the image print('Extracting reference patches...') t0 = time() patch_size = (7, 7) data = extract_patches_2d(distorted[:, :width // 2], patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) print('done in %.2fs.' % (time() - t0)) # ############################################################################# # Learn the dictionary from reference patches print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=500) V = dico.fit(data).components_ dt = time() - t0 print('done in %.2fs.' % dt) plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(V[:100]): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from face patches\n' + 'Train time %.1fs on %d patches' % (dt, len(data)), fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) # ############################################################################# # Display the distorted image def show_with_diff(image, reference, title): """Helper function to display denoising""" plt.figure(figsize=(5, 3.3)) plt.subplot(1, 2, 1) plt.title('Image') plt.imshow(image, vmin=0, vmax=1, cmap=plt.cm.gray, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.subplot(1, 2, 2) difference = image - reference plt.title('Difference (norm: %.2f)' % np.sqrt(np.sum(difference**2))) plt.imshow(difference, vmin=-0.5, vmax=0.5, cmap=plt.cm.PuOr, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle(title, size=16) plt.subplots_adjust(0.02, 0.02, 0.98, 0.79, 0.02, 0.2) show_with_diff(distorted, face, 'Distorted image') # ############################################################################# # Extract noisy patches and reconstruct them using the dictionary print('Extracting noisy patches... ') t0 = time() data = extract_patches_2d(distorted[:, width // 2:], patch_size) data = data.reshape(data.shape[0], -1) intercept = np.mean(data, axis=0) data -= intercept print('done in %.2fs.' % (time() - t0)) transform_algorithms = [('Orthogonal Matching Pursuit\n1 atom', 'omp', { 'transform_n_nonzero_coefs': 1 }), ('Orthogonal Matching Pursuit\n2 atoms', 'omp', { 'transform_n_nonzero_coefs': 2 }), ('Least-angle regression\n5 atoms', 'lars', { 'transform_n_nonzero_coefs': 5 }), ('Thresholding\n alpha=0.1', 'threshold', { 'transform_alpha': .1 })] reconstructions = {} for title, transform_algorithm, kwargs in transform_algorithms: print(title + '...') reconstructions[title] = face.copy() t0 = time() dico.set_params(transform_algorithm=transform_algorithm, **kwargs) code = dico.transform(data) patches = np.dot(code, V) patches += intercept patches = patches.reshape(len(data), *patch_size) if transform_algorithm == 'threshold': patches -= patches.min() patches /= patches.max() reconstructions[title][:, width // 2:] = reconstruct_from_patches_2d( patches, (height, width // 2)) dt = time() - t0 print('done in %.2fs.' % dt) show_with_diff(reconstructions[title], face, title + ' (time: %.1fs)' % dt) plt.show()
data = list( numpy.array(patch, numpy.float32).flatten() for patch in patches) data = numpy.array(data) data /= 256. mean = numpy.mean(data, axis=0) data -= mean std = numpy.std(data, axis=0) data /= std if (not restart) or steps[restart] <= steps['FIT_MODEL']: with Timer("Fitting model ..."): # Fit the sparse model using Dictionary Learning. cols = ceil(sqrt(N_COMP)) rows = ceil(N_COMP / float(cols)) model = MiniBatchDictionaryLearning(n_components=N_COMP, alpha=1) fit = model.fit(data) if (not restart) or steps[restart] <= steps['DISPLAY_BASIS']: with Timer("Display components ..."): # Display the basis components (aka the dictionary). pylab.ion() pylab.show() display(fit) if (not restart) or steps[restart] <= steps['COMPUTE_PROJ']: with Timer("Compute projection ..."): # Project the input patches onto the basis using Orthonormal Matching Pursuit with 2 components. model.set_params(transform_algorithm='omp', transform_n_nonzero_coefs=N_ATOMS) # the intention is simply this: # code = model.transform(data) # but we chunk it up and store it in a sparse matrix for efficiency code = []
print('Extracting reference patches...') t0 = time() patch_size = (7, 7) data = extract_patches_2d(distorted[:, :width // 2], patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) print('done in %.2fs.' % (time() - t0)) ############################################################################### # Learn the dictionary from reference patches print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=500) V = dico.fit(data).components_ dt = time() - t0 print('done in %.2fs.' % dt) plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(V[:100]): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from face patches\n' + 'Train time %.1fs on %d patches' % (dt, len(data)), fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
def peakmem_fit(self, params): estimator = MiniBatchDictionaryLearning(**self.dl_params) estimator.fit(self.data)
class SparseApproxSpectrum(object): def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs): self.omp = OrthogonalMatchingPursuit() self.n_components = n_components self.patch_size = patch_size self.max_samples = max_samples self.D = None self.data = None self.components = None self.standardize=False def _extract_data_patches(self, X): self.X = X data = extract_patches_2d(X, self.patch_size) data = data.reshape(data.shape[0], -1) if len(data)>self.max_samples: data = np.random.permutation(data)[:self.max_samples] print(data.shape) if self.standardize: self.mn = np.mean(data, axis=0) self.std = np.std(data, axis=0) data -= self.mn data /= self.std self.data = data def extract_codes(self, X, standardize=False): self.standardize=standardize self._extract_data_patches(X) self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, alpha=1, n_iter=500) print("Dictionary learning from data...") self.D = self.dico.fit(self.data) return self def plot_codes(self, cbar=False, **kwargs): #plt.figure(figsize=(4.2, 4)) N = int(np.ceil(np.sqrt(self.n_components))) kwargs.setdefault('cmap', pl.cm.gray_r) kwargs.setdefault('origin','bottom') kwargs.setdefault('interpolation','nearest') for i, comp in enumerate(self.D.components_): plt.subplot(N, N, i + 1) comp = comp * self.std + self.mn if self.standardize else comp plt.imshow(comp.reshape(self.patch_size), **kwargs) if cbar: plt.colorbar() plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from spectrum patches\n', fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav',**kwargs): flist=glob.glob(dir_expr) self.X = np.vstack([feature_scale(LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T self.D = extract_codes(self.X, **kwargs) self.plot_codes(**kwargs) return self def _get_approximation_coefs(self,data, components): w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data]) return w def reconstruct_spectrum(self, w=None, randomize=False): data = self.data components = self.D.components_ if w is None: self.w = self._get_approximation_coefs(data, components) w = self.w if self.standardize: for comp in components: comp = comp * self.std + self.mn if randomize: components = np.random.permutation(components) recon = np.dot(w, components).reshape(-1,self.patch_size[0],self.patch_size[1]) self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape) return self def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, **kwargs): self.reconstruct_spectrum(w,randomize) w, components = self.w, self.D.components_ self.X_hat_l = [] for i in range(len(self.w.T)): r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,self.patch_size[0],self.patch_size[1]) self.X_hat_l.append(reconstruct_from_patches_2d(r, self.X.shape)) if plotting: plt.figure() for k in range(self.n_components): plt.subplot(self.n_components**0.5,self.n_components**0.5,k+1) feature_plot(self.X_hat_l[k],nofig=1,**kwargs) return self
def get_dictionary_data(n_comp=20, zero_index=False): unlabeled = util.load_unlabeled_training(flatten=False) height, width = 32, 32 n_images = 10000 patch_size = (8, 8) unlabeled = util.standardize(unlabeled) np.random.shuffle(unlabeled) print('Extracting reference patches...') patches = np.empty((0, 64)) t0 = time() for image in unlabeled[:n_images, :, :]: data = np.array(extract_patches_2d(image, patch_size, max_patches=0.01)) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 patches = np.concatenate([patches, data]) print('done in %.2fs.' % (time() - t0)) # whiten the patches z = zca.ZCA() z.fit(patches) z.transform(patches) print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1) V = dico.fit(patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) #plt.figure(figsize=(4.2, 4)) #for i, comp in enumerate(V[:100]): # plt.subplot(10, 10, i + 1) # plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, # interpolation='nearest') # plt.xticks(()) # plt.yticks(()) #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) #plt.show() labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True) labeled_data = util.standardize(labeled_data) test_data = util.load_all_test(flatten=False) test_data = util.standardize(test_data) #util.render_matrix(test_data, flattened=False) print('Reconstructing the training images...') t0 = time() reconstructed_images = np.empty((0, 32, 32)) for i, image in enumerate(labeled_data): data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_images = np.concatenate([reconstructed_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_images.shape training_images = reconstructed_images.reshape( reconstructed_images.shape[0], reconstructed_images.shape[1] * reconstructed_images.shape[2]) assert training_images.shape == (n, x * y) print('Reconstructing the test images...') t0 = time() reconstructed_test_images = np.empty((0, 32, 32)) for image in test_data: data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_test_images = np.concatenate( [reconstructed_test_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_test_images.shape test_images = reconstructed_test_images.reshape( reconstructed_test_images.shape[0], reconstructed_test_images.shape[1] * reconstructed_test_images.shape[2]) assert test_images.shape == (n, x * y) return (training_images, labels, test_images)
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ In this I check How good are AR models for AD. Created on Wed Jul 25 09:11:44 2018 @author: haroonr """ import matplotlib.pyplot as plt #%% from sklearn.decomposition import MiniBatchDictionaryLearning dico = MiniBatchDictionaryLearning(n_components=220, alpha=1, n_iter=100) train_data = days_obs.loc[datetime.date(2018,2,13):datetime.date(2018,2,28)] D = dico.fit(train_data).components_ #%% fig,ax = plt.subplots(ncols = 10, nrows = D.shape[0]/10 ) i = 0 for row in ax: for col in row: col.plot(D[i]) i = i +1
initial_patch_size = patches.shape patches = patches.reshape(-1, patch_size[0] * patch_size[1]) patches_recto.append(patches) # Change the size of patches patches_recto = np.asarray(patches_recto) patches_recto = patches_recto.reshape(-1, m * m) # Do the normalisation here patches_recto -= np.mean(patches_recto, axis=0) # remove the mean patches_recto /= np.std(patches_recto, axis=0) # normalise each patch dico_recto = MiniBatchDictionaryLearning( n_components=100, alpha=0.7, n_iter=400) #TODO:check with different parameters V_recto = dico_recto.fit(patches_recto).components_ """ # plot the dictionary plt.figure(figsize=(8, 6)) for i, comp in enumerate(V_recto[:100]): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r,interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Recto dictionary learned from patches') plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) """ print('Learning the dictionary for verso images...') patches_verso = [] for pic_set in np.arange(4):
# Group the patches, reshape the patches as simple vectors, # rescale and center the data. data = list( numpy.array(patch, numpy.float32).flatten() for patch in patches ) data = numpy.array(data) data /= 256. mean = numpy.mean(data, axis = 0) data -= mean std = numpy.std(data, axis = 0) data /= std if (not restart) or steps[restart] <= steps['FIT_MODEL']: with Timer("Fitting model ..."): # Fit the sparse model using Dictionary Learning. cols = ceil(sqrt(N_COMP)) rows = ceil(N_COMP / float(cols)) model = MiniBatchDictionaryLearning(n_components = N_COMP, alpha = 1) fit = model.fit(data) if (not restart) or steps[restart] <= steps['DISPLAY_BASIS']: with Timer("Display components ..."): # Display the basis components (aka the dictionary). pylab.ion() pylab.show() display(fit) if (not restart) or steps[restart] <= steps['COMPUTE_PROJ']: with Timer("Compute projection ..."): # Project the input patches onto the basis using Orthonormal Matching Pursuit with 2 components. model.set_params(transform_algorithm = 'omp', transform_n_nonzero_coefs = N_ATOMS) # the intention is simply this: # code = model.transform(data) # but we chunk it up and store it in a sparse matrix for efficiency code = [] CHUNK = 1000
# In[37]: data = extract_patches(img / 255, (8, 8, 3), max_patches=1000) data = data.reshape(data.shape[0], -1) mean = np.mean(data, axis=0) data -= mean data /= np.std(data, axis=0) # In[59]: print('Size of Dictionary: ', data.shape) # In[38]: dic = MiniBatchDictionaryLearning(n_components=256, alpha=1, n_iter=500) v = dic.fit(data).components_ # In[39]: patch_size = (8, 8, 3) # In[34]: plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(v[:100]): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size) * 255, cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(())
delimiter=';') sparseCode = np.loadtxt( filePath + 'sparseCodeT-MODNoisy_' + fileNameSufix, delimiter=';') elif title is 'K-HOSVD_javaORMP': dictionary = np.loadtxt(filePath + 'dictK-HOSVDNoisy_' + fileNameSufix, delimiter=';') sparseCode = np.loadtxt( filePath + 'sparseCodeK-HOSVDNoisy_' + fileNameSufix, delimiter=';') elif title is 'MiniBatchDL_OMP': miniBatch = MiniBatchDictionaryLearning(n_components=K, alpha=1, n_iter=noIt) dictionary = miniBatch.fit(noisyPatches).components_ miniBatch.set_params( transform_algorithm=transform_algorithm, **kwargs) sparseCode = miniBatch.transform(noisyPatches) reconstruction = np.dot(sparseCode, dictionary) reconstruction += noiseMean reconstruction = reconstruction.reshape( len(noisyPatches), *patch_size) reconstructions[title][:, width // 2:] = reconstruct_from_patches_2d( reconstruction, (height, width // 2)) print_comparison(reconstructions[title], face, title) results.close() # ToDo:
# 1 辞書学習 --------------------------------------------------------------------------------- # パラメータの設定 n_components = 50 alpha = 1 batch_size = 200 n_iter = 25 random_state = 2018 # インスタンスの作成 miniBatchDictLearning = MiniBatchDictionaryLearning(n_components=n_components, alpha=alpha, batch_size=batch_size, n_iter=n_iter, random_state=random_state) # 学習器の作成 miniBatchDictLearning.fit(X_train.loc[:, :10000]) # 学習器の適用 X_train_miniBatchDictLearning = miniBatchDictLearning.transform(X_train) # データフレームに変換 X_train_miniBatchDictLearning = pd.DataFrame( data=X_train_miniBatchDictLearning, index=train_index) # プロット表示 scatterPlot(X_train_miniBatchDictLearning, y_train, "Mini-batch Dictionary Learning")
def dictionary_learning_MHOF(flow_hist_H_400): from sklearn.decomposition import MiniBatchDictionaryLearning dico=MiniBatchDictionaryLearning(n_components=400,alpha=1,n_iter=500) dic=dico.fit(flow_hist_H_400).components_ #coeffs=dico.transform(flow_hist_H_400) return dic
#kitei suu num_basis = 100 #imagelist read imgArray = ImageListFile2Array('patchlist2.txt') #Dictionary syokika print 'Learning the dictionary... ' t0 = time() dico = MiniBatchDictionaryLearning(n_components=num_basis, alpha=1.0, transform_algorithm='lasso_lars', transform_alpha=1.0, fit_algorithm='lars', n_iter=500) #heikin0 hensa1 M = np.mean(imgArray, axis=0)[np.newaxis, :] whiteArray = imgArray - M whiteArray /= np.std(whiteArray, axis=0) #Dictionary keisan V = dico.fit(whiteArray).components_ #syorizikann dt = time() - t0 print 'done in %.2fs.' % dt #Dictionary save np.save('Dictionaries2.npy', V)
patches_recto.append(patches1) patches_recto.append(patches2) patches_recto = np.reshape(patches_recto, (-1, m * m)) patches_recto -= np.mean(patches_recto, axis=0) # remove the mean patches_recto /= np.std(patches_recto, axis=0) # normalize each patch dict_components = 100 # recto dictionary print('Learning the dictionary...') dico = MiniBatchDictionaryLearning(n_components=dict_components, alpha=1, n_iter=400) # fitting the recto patches V_recto = dico.fit(patches_recto).components_ #verso dictionary = flipped recto dictionary V_verso = np.reshape(V_recto, (dict_components, m, m)) for i in range(dict_components): V_verso[i] = np.fliplr(V_verso[i]) V_verso = np.reshape(V_verso, (dict_components, m * m)) def Dic_proj_recto(data, n_coef, alpha): """ The dictionary projection method """ data = patchify(data, patch_size, step) data = data.reshape(-1, patch_size[0] * patch_size[1]) intercept = np.mean(data, axis=0)
coder = MiniBatchDictionaryLearning(n_components=dict_size, transform_algorithm='omp', alpha=threshold, transform_alpha=threshold, transform_n_nonzero_coefs=int( dict_size * args.density), batch_size=batchSize, n_iter=iteration, verbose=True) elif args.method == "lasso": coder = MiniBatchDictionaryLearning(n_components=dict_size, transform_algorithm='lasso_lars', transform_alpha=threshold, batch_size=batchSize, n_iter=iteration, verbose=True) elif args.method == "svd": coder = TruncatedSVD(n_components=dict_size) coder.fit(X_train) W_learned = coder.components_ else: W_learned = np.load(file(args.dictInput))['arr_0'] coder = SparseCoder(dictionary=W_learned, transform_n_nonzero_coefs=int(dict_size * args.density)) Y_learned = coder.transform(X_test) evaluate(X_test, Y_learned, W_learned, iteration, threshold, args.method, args.resultFile, args.dictOutput)
em.run() dlog.close(True) pprint("Done") # ### Mini-Batch Dictionary Learning # # Alternative, since the EM library gives numerical errors # In[20]: from sklearn.decomposition import MiniBatchDictionaryLearning mbdic = MiniBatchDictionaryLearning(n_components=30,verbose=True) mbdic.fit(patches_flat) # ### Visualize the dictionary atoms # In[21]: V = mbdic.components_ plt.figure(figsize=(15,12)) for i,comp in enumerate(V): plt.subplot(10,10,i+1) plt.imshow(comp.reshape(patchsize).T,origin='lower',interpolation='nearest',aspect='auto',cmap='viridis') # ### Reconstruct some data with the dictionary
test_X_img_patches = image.extract_patches_2d(test_X_img, (patch_w, patch_h), max_patches=n_patches_ea_pic, random_state=0) test_X_patches[i * n_patches_ea_pic: (i + 1) * n_patches_ea_pic] = test_X_img_patches.reshape(n_patches_ea_pic, patch_w * patch_h * 3) print "test_X_patches", test_X_patches.shape ############################################################################### # Dictionary Learning n_components = 576 print("\nSparse Coding Dictionary Learning") # pca = RandomizedPCA(n_components=n_components).fit(train_X) dl = MiniBatchDictionaryLearning(n_components) dl.fit(train_X_patches) print "X_train.shape", train_X.shape print "Components shape", dl.components_.shape # components = dl.components().reshape((n_components, n_features)) components = dl.components_ # Visualizing the components as images component_titles = ["component %d" % i for i in range(components.shape[0])] plot_gallery("Visualizing top components", components, component_titles, patch_w, patch_h, n_row=24, n_col=24) plt.show() ############################################################################### # Sparse Encoding print("\nSparse Encoding")
class BoVWFeature(TransformerMixin): """ Extract BoVW Feature Parameters ---------- codebook_size : int the size of codebook, default:1000 method : str codebook's compute method , value: 'sc','km' """ def __init__(self, codebook_size=512, method='sc'): self.codebook_size = codebook_size self.method = method self.patch_num = 40000 self.patch_size = 8 self.sample = 'random' self.feature = 'raw' # raw, surf, hog def fit(self, X, y=None): # compute the codes print 'Extracting patchs...' patchs = [] num = self.patch_num // X.size for x in X: img = imread(str(x[0])) tmp = extract_patches_2d(img, (self.patch_size,self.patch_size), \ max_patches=num, random_state=np.random.RandomState()) patchs.append(tmp) data = np.vstack(patchs) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data = data/np.std(data, axis=0) print 'Learning codebook...' if self.method == 'sc': self.dico = MiniBatchDictionaryLearning(n_components=self.codebook_size, \ alpha=1, n_iter=100, batch_size =100, verbose=True) self.dico.fit(data) elif self.method=='km': # self.dico = MiniBatchKMeans(n_clusters=self.codebook_size) pass return self def transform(self, X): """ Parameters ---------- X : {array-like}, shape = [n_samples, 1] Training vectors, where n_samples is the number of samples and 1 is image path. Returns ------- array-like = [n_samples, features] Class labels predicted by each classifier. """ print 'Extracting feature...' # setting the dictionary self.dico.set_params(transform_algorithm='lars') results = [] for sample in X: img = imread(str(sample[0])) tmp = extract_patches_2d(img, (self.patch_size,self.patch_size), \ max_patches=300, random_state=np.random.RandomState()) data = tmp.reshape(tmp.shape[0], -1) data = data-np.mean(data, axis=0) data = data/np.std(data, axis=0) code = self.dico.transform(data) results.append(code.sum(axis=0)) return np.vstack(results) def get_params(self, deep=True): return {"codebook_size": self.codebook_size}
# In[19]: #normalize patches so we can learn the dictionary norm_data = stacked_patches norm_data -= np.mean(norm_data, axis=0) norm_data /= np.std(norm_data, axis=0) # In[20]: #Learn dictionary dictionary = MiniBatchDictionaryLearning(n_components=100, alpha=1, batch_size=10, n_iter=500) V = dictionary.fit(norm_data).components_ #this is the dictionary # In[21]: #pre-process noisy file distorted_patches = extract_patches_2d(distorted, patch_size) distorted_stacked_patches = distorted_patches.reshape( distorted_patches.shape[0], -1) #center the data intercept = np.mean(distorted_stacked_patches, axis=0) distorted_stacked_patches -= intercept # In[22]: #find sparse code of distorted image given the dictionary
f'{cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel' ): dico = pickle.load( open( f'{cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel', 'rb')) print( f'Use hitted {cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel' ) hit = True else: dico = DictionaryLearning(n_components=n_components, n_jobs=-3, max_iter=n_iter, verbose=True) dico.fit(images) print(f'{dico.n_iter_} iters') timer.stop(start=' ') n_iter_actual = dico.n_iter_ if cfg.save and not hit: np.save(f'{cfg.save_path}/all_{n_components}_{n_iter_actual}', dico.components_) pickle.dump( dico, open( f'{cfg.save_path}/all_{n_components}_{n_iter_actual}.sklearnmodel', 'wb')) # Calculate the mIOU based on cats for cat_id in cfg.select_cat:
if len(sys.argv) != 4: sys.stderr.write('usage: %s data_dir cosim_size dict_size\n' % sys.argv[0]) sys.exit(1) data_dir = sys.argv[1] cosim_size = int(sys.argv[2]) dict_size = int(sys.argv[3]) M = np.memmap(data_dir + '/cosimilarity.dat', dtype='float32', mode='r', shape=(cosim_size, cosim_size)) M = M[:500] d = MiniBatchDictionaryLearning(dict_size, n_iter=10, batch_size=1000, verbose=True, n_jobs=-1) d.fit(M) np.save(data_dir + '/components.bin', d.components_) component_set = set() for c in d.components_: component_set.add(tuple(c)) ids = open(data_dir + '/dictionary_indexes.txt', 'w') for (i, row) in enumerate(M): if i % 1000: print('checking to see if row %d is a dictionary element' % i) if tuple(row) in component_set: ids.write(str(i) + '\n') component_set.remove(tuple(row))
def test_dict_learning_online_estimator_shapes(): n_components = 5 dico = MiniBatchDictionaryLearning(n_components, n_iter=20, random_state=0) dico.fit(X) assert_true(dico.components_.shape == (n_components, n_features))
def get_dictionary_data(n_comp=20, zero_index=False): unlabeled = util.load_unlabeled_training(flatten=False) height, width = 32, 32 n_images = 10000 patch_size = (8, 8) unlabeled = util.standardize(unlabeled) np.random.shuffle(unlabeled) print('Extracting reference patches...') patches = np.empty((0, 64)) t0 = time() for image in unlabeled[:n_images, :, :]: data = np.array(extract_patches_2d(image, patch_size, max_patches=0.01)) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 patches = np.concatenate([patches, data]) print('done in %.2fs.' % (time() - t0)) # whiten the patches z = zca.ZCA() z.fit(patches) z.transform(patches) print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=1) V = dico.fit(patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) #plt.figure(figsize=(4.2, 4)) #for i, comp in enumerate(V[:100]): # plt.subplot(10, 10, i + 1) # plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, # interpolation='nearest') # plt.xticks(()) # plt.yticks(()) #plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) #plt.show() labeled_data, labels = util.load_labeled_training(flatten=False, zero_index=True) labeled_data = util.standardize(labeled_data) test_data = util.load_all_test(flatten=False) test_data = util.standardize(test_data) #util.render_matrix(test_data, flattened=False) print('Reconstructing the training images...') t0 = time() reconstructed_images = np.empty((0, 32, 32)) for i, image in enumerate(labeled_data): data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_images = np.concatenate([reconstructed_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_images.shape training_images = reconstructed_images.reshape(reconstructed_images.shape[0], reconstructed_images.shape[1]*reconstructed_images.shape[2]) assert training_images.shape == (n, x*y) print('Reconstructing the test images...') t0 = time() reconstructed_test_images = np.empty((0, 32, 32)) for image in test_data: data = extract_patches_2d(image, patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) + 1e-20 code = dico.transform(data) patches = np.dot(code, V) z.transform(patches) patches = patches.reshape(len(data), *patch_size) data = reconstruct_from_patches_2d(patches, (width, height)) data = data.reshape(1, 32, 32) reconstructed_test_images = np.concatenate([reconstructed_test_images, data]) print('done in %.2fs.' % (time() - t0)) # flatten n, x, y = reconstructed_test_images.shape test_images = reconstructed_test_images.reshape(reconstructed_test_images.shape[0], reconstructed_test_images.shape[1]*reconstructed_test_images.shape[2]) assert test_images.shape == (n, x*y) return (training_images, labels, test_images)
n_appliance = 1 args = model_args.loc[n_appliance - 1, :] dataset = args[1] precision = args[2] denoised = args[4] ids = args[7].split(',') datasets_dir = './data/%s.csv' data = dataset_loader(datasets_dir % dataset, ids, precision=precision, denoised=denoised) aggregate = data.WHE.tail(36 * 1440) # train data for dictionary learning train_data = data.tail(1440) model = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=30) model.fit(train_data.T) comp = model.components_ basics = comp.T n_componetss = model.n_components transforms = model.transform(train_data.T) activations = transforms.T reconstruction = np.matmul(basics, activations) print("Dictionary Learning RMSE for appliance is %s" % (mean_squared_error(reconstruction, train_data)**(.5))) ## SparseCoder for appliance model = SparseCoder(dictionary=basics.T, positive_code=True, transform_algorithm='lasso_lars') predicted_activations = model.transform(train_data.T).T print_appliance_wise_errors(predicted_activations, basics, n_componetss)
class SparseApproxSpectrum(object): def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs): self.omp = OrthogonalMatchingPursuit() self.n_components = n_components self.patch_size = patch_size self.max_samples = max_samples self.D = None self.data = None self.components = None self.standardize=False def _extract_data_patches(self, X): self.X = X data = extract_patches_2d(X, self.patch_size) data = data.reshape(data.shape[0], -1) if len(data)>self.max_samples: data = np.random.permutation(data)[:self.max_samples] print data.shape if self.standardize: self.mn = np.mean(data, axis=0) self.std = np.std(data, axis=0) data -= self.mn data /= self.std self.data = data def extract_codes(self, X, standardize=False): self.standardize=standardize self._extract_data_patches(X) self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, alpha=1, n_iter=500) print "Dictionary learning from data..." self.D = self.dico.fit(self.data) return self def plot_codes(self, cbar=False, **kwargs): #plt.figure(figsize=(4.2, 4)) N = int(np.ceil(np.sqrt(self.n_components))) kwargs.setdefault('cmap', pl.cm.gray_r) kwargs.setdefault('origin','bottom') kwargs.setdefault('interpolation','nearest') for i, comp in enumerate(self.D.components_): plt.subplot(N, N, i + 1) comp = comp * self.std + self.mn if self.standardize else comp plt.imshow(comp.reshape(self.patch_size), **kwargs) if cbar: plt.colorbar() plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from spectrum patches\n', fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav',**kwargs): flist=glob.glob(dir_expr) self.X = np.vstack([feature_scale(LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T self.D = extract_codes(self.X, **kwargs) self.plot_codes(**kwargs) return self def _get_approximation_coefs(self,data, components): w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data]) return w def reconstruct_spectrum(self, w=None, randomize=False): data = self.data components = self.D.components_ if w is None: self.w = self._get_approximation_coefs(data, components) w = self.w if self.standardize: for comp in components: comp = comp * self.std + self.mn if randomize: components = np.random.permutation(components) recon = np.dot(w, components).reshape(-1,self.patch_size[0],self.patch_size[1]) self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape) return self def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, **kwargs): self.reconstruct_spectrum(w,randomize) w, components = self.w, self.D.components_ self.X_hat_l = [] for i in range(len(self.w.T)): r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,self.patch_size[0],self.patch_size[1]) self.X_hat_l.append(reconstruct_from_patches_2d(r, self.X.shape)) if plotting: plt.figure() for k in range(self.n_components): plt.subplot(self.n_components**0.5,self.n_components**0.5,k+1) feature_plot(self.X_hat_l[k],nofig=1,**kwargs) return self
# Mini-batch dictionary learning from sklearn.decomposition import MiniBatchDictionaryLearning n_components = 28 alpha = 1 batch_size = 200 n_iter = 10 random_state = 2018 miniBatchDictLearning = MiniBatchDictionaryLearning(n_components=n_components, alpha=alpha, batch_size=batch_size, n_iter=n_iter, random_state=random_state) miniBatchDictLearning.fit(X_train) X_train_miniBatchDictLearning = miniBatchDictLearning.fit_transform(X_train) X_train_miniBatchDictLearning = pd.DataFrame( data=X_train_miniBatchDictLearning, index=X_train.index) scatterPlot(X_train_miniBatchDictLearning, y_train, "Mini-batch Dictionary Learning") # In[57]: X_train_miniBatchDictLearning_inverse = np.array( X_train_miniBatchDictLearning).dot(miniBatchDictLearning.components_) X_train_miniBatchDictLearning_inverse = pd.DataFrame( data=X_train_miniBatchDictLearning_inverse, index=X_train.index)
class SparseApproxSpectrum(object): """class for 2D patch analysis of audio files initialization: patch_size - size of time-frequency 2D patches in spectrogram units (freq,time) [(12,12)] max_samples - if num audio patches exceeds this threshold, randomly sample spectrum [1000000] **omp_args - keyword arguments to OrthogonalMatchingPursuit(...) [None] """ def __init__(self, patch_size=(12, 12), max_samples=1000000, **omp_args): self.patch_size = patch_size self.max_samples = max_samples self.omp = OrthogonalMatchingPursuit(**omp_args) self.D = None self.data = None self.components = None self.zscore = False self.log_amplitude = False def _extract_data_patches(self, X, zscore, log_amplitude): "utility method for converting spectrogram data to 2D patches " self.zscore = zscore self.log_amplitude = log_amplitude self.X = X if self.log_amplitude: X = np.log(1 + X) data = extract_patches_2d(X, self.patch_size) data = data.reshape(data.shape[0], -1) if len(data) > self.max_samples: data = np.random.permutation(data)[:self.max_samples] print data.shape if self.zscore: self.mn = np.mean(data, axis=0) self.std = np.std(data, axis=0) data -= self.mn data /= self.std self.data = data def make_gabor_field(self, X, zscore=True, log_amplitude=True, thetas=range(4), sigmas=(1, 3), frequencies=(0.05, 0.25)): """Given a spectrogram, prepare 2D patches and Gabor filter bank kernels inputs: X - spectrogram data (frequency x time) zscore - whether to zscore the ensemble of 2D patches [True] log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True] thetas - list of 2D Gabor filter orientations in units of pi/4. [range(4)] sigmas - list of 2D Gabor filter standard deviations in oriented direction [(1,3)] frequencies - list of 2D Gabor filter frequencies [(0.05,0.25)] outputs: self.data - 2D patches of input spectrogram self.D.components_ - Gabor dictionary of thetas x sigmas x frequencies atoms """ self._extract_data_patches(X, zscore, log_amplitude) self.n_components = len(thetas) * len(sigmas) * len(frequencies) self.thetas = thetas self.sigmas = sigmas self.frequencies = frequencies a, b = self.patch_size self.kernels = [] for theta in thetas: theta = theta / 4. * np.pi for sigma in sigmas: for frequency in frequencies: kernel = np.real( gabor_kernel(frequency, theta=theta, sigma_x=sigma, sigma_y=sigma)) c, d = kernel.shape if c <= a: z = np.zeros(self.patch_size) z[(a / 2 - c / 2):(a / 2 - c / 2 + c), (b / 2 - d / 2):(b / 2 - d / 2 + d)] = kernel else: z = kernel[(c / 2 - a / 2):(c / 2 - a / 2 + a), (d / 2 - b / 2):(d / 2 - b / 2 + b)] self.kernels.append(z.flatten()) class Bunch: def __init__(self, **kwds): self.__dict__.update(kwds) self.D = Bunch(components_=np.vstack(self.kernels)) def extract_codes(self, X, n_components=16, zscore=True, log_amplitude=True, **mbl_args): """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data inputs: X - spectrogram data (frequency x time) n_components - how many components to extract [16] zscore - whether to zscore the ensemble of 2D patches [True] log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True] **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None] outputs: self.data - 2D patches of input spectrogram self.D.components_ - dictionary of learned 2D atoms for sparse coding """ self._extract_data_patches(X, zscore, log_amplitude) self.n_components = n_components self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, **mbl_args) print "Dictionary learning from data..." self.D = self.dico.fit(self.data) def plot_codes(self, cbar=False, show_axis=False, **kwargs): "plot the learned or generated 2D sparse code dictionary" N = int(np.ceil(np.sqrt(self.n_components))) kwargs.setdefault('cmap', plt.cm.gray_r) kwargs.setdefault('origin', 'bottom') kwargs.setdefault('interpolation', 'nearest') for i, comp in enumerate(self.D.components_): plt.subplot(N, N, i + 1) plt.imshow(comp.reshape(self.patch_size), **kwargs) if cbar: plt.colorbar() if not show_axis: plt.axis('off') plt.xticks(()) plt.yticks(()) plt.title('%d' % (i)) plt.suptitle('Dictionary of Spectrum Patches\n', fontsize=14) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def extract_audio_dir_codes( self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav', **mbl_args): """apply dictionary learning to entire directory of audio files (requires LOTS of RAM) inputs: **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None] """ flist = glob.glob(dir_expr) self.X = np.vstack([ br.feature_scale(br.LogFrequencySpectrum(f, nbpo=24, nhop=1024).X, normalize=1).T for f in flist ]).T self.D = extract_codes(self.X, **mbl_args) def _get_approximation_coefs(self, data, components): """utility function to fit dictionary components to data inputs: data - spectrogram data (frqeuency x time) [None] components - the dictionary components to fit to the data [None] """ w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data]) return w def reconstruct_spectrum(self, w=None, randomize=False): """reconstruct by fitting current 2D dictionary to self.data inputs: w - per-component reconstruction weights [None=calculate weights] randomize - randomly permute components after getting weights [False] returns: self.X_hat - spectral reconstruction of self.data """ data = self.data components = self.D.components_ if w is None: self.w = self._get_approximation_coefs(data, components) w = self.w if randomize: components = np.random.permutation(components) recon = np.dot(w, components) if self.zscore: recon = recon * self.std recon = recon + self.mn recon = recon.reshape(-1, *self.patch_size) self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape) if self.log_amplitude: self.X_hat = np.exp(self.X_hat) - 1.0 # invert log transform def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, rectify=True, **kwargs): """fit each dictionary component to self.data inputs: w - per-component reconstruction weights [None=calculate weights] randomize - randomly permute components after getting weights [False] plotting - whether to subplot individual spectrum reconstructions [True] rectify- remove negative ("dark energy") from individual reconstructions [True] **kwargs - keyword arguments for plotting returns: self.X_hat_l - list of indvidual spectrum reconstructions per dictionary atom """ omp_args = {} self.reconstruct_spectrum(w, randomize, **omp_args) w, components = self.w, self.D.components_ self.X_hat_l = [] for i in range(len(self.w.T)): r = np.array( (np.matrix(w)[:, i] * np.matrix(components)[i, :])).reshape( -1, *self.patch_size) X_hat = reconstruct_from_patches_2d(r, self.X.shape) if self.log_amplitude: X_hat = np.exp(X_hat) - 1.0 if rectify: # half wave rectification X_hat[X_hat < 0] = 0 self.X_hat_l.append(X_hat) if plotting: self.plot_individual_spectra(**kwargs) def plot_individual_spectra(self, **kwargs): "plot individual spectrum reconstructions for self.X_hat_l" if self.X_hat_l is None: return plt.figure() rn = np.ceil(self.n_components**0.5) for k in range(self.n_components): plt.subplot(rn, rn, k + 1) br.feature_plot(self.X_hat_l[k], nofig=1, **kwargs) plt.title('%d' % (k)) plt.suptitle('Component Reconstructions\n', fontsize=14)
class Layer(object): def __init__(self, hierarchy, depth, patch_size, num_features, num_patches, multiplier): """ * depth - hierarchy level (1, 2, 3, etc.) * patch_size - number of pixels representing side of the square patch. like, 8 (8x8 patches) * num_features - how many components to learn * multiplier - num of subpatches we break patch into (0 for the first level). if 3, patch will contant 3x3 subpatches. """ self.hierarchy = hierarchy self.depth = depth self.basement_size = patch_size self.num_features = num_features self.num_patches = num_patches self.multiplier = multiplier self.learning = MiniBatchDictionaryLearning( n_components=num_features, n_iter=3000, transform_algorithm='lasso_lars', transform_alpha=0.5, n_jobs=2) self.ready = False def get_data(self, data, max_patches=None): """ Extracts raw data from patches. """ max_patches = max_patches or self.num_patches if isinstance(data, np.ndarray): # one image patches = extract_patches_2d( data, (self.basement_size, self.basement_size), max_patches=max_patches) else: patches = [] # multiple images for i in xrange(max_patches): idx = np.random.randint(len(data)) # selecting random image dx = dy = self.basement_size if data[idx].shape[0] <= dx or data[idx].shape[1] <= dy: continue x = np.random.randint(data[idx].shape[0] - dx) y = np.random.randint(data[idx].shape[1] - dy) patch = data[idx][x: x + dx, y: y + dy] patches.append(patch.reshape(-1)) patches = np.vstack(patches) patches = patches.reshape(patches.shape[0], self.basement_size, self.basement_size) print 'patches', patches.shape patches = preprocessing.scale(patches) return patches def learn(self, data): data = data.reshape(data.shape[0], -1) self.learning.fit(data) self.ready = True @property def output_size(self): return int(np.sqrt(self.num_features)) @property def input_size(self): if self.depth == 0: return self.basement_size else: prev_layer = self.hierarchy.layers[self.depth - 1] r = prev_layer.output_size * self.multiplier return r return self._input_size @property def features(self): return self.learning.components_ # def get_features(self): # # going from up to down # result = [] # layers = self.hierarchy.layers[: self.depth][::-1] # if self.depth == 0: # return self.features # previous_layer = self.hierarchy.layers[self.depth - 1] # for feature in self.features: # multiplier = self.multiplier # feature = feature.reshape(self.multiplier * previous_layer.output_size, # self.multiplier * previous_layer.output_size,) # for other_layer in layers: # expressed_feature = np.empty((multiplier * other_layer.input_size, # multiplier * other_layer.input_size)) # enc_n = other_layer.output_size # n = other_layer.input_size # for dx in range(multiplier): # for dy in range(multiplier): # encoded_subfeature = feature[dx * enc_n: (dx + 1) * enc_n, # dy * enc_n: (dy + 1) * enc_n] # prev_patch = np.dot(encoded_subfeature.reshape(-1), other_layer.features) # expressed_feature[dx * n: (dx + 1) * n, dy * n: (dy + 1) * n] = prev_patch.reshape(n, n) # feature = expressed_feature # multiplier *= other_layer.multiplier # result.append(expressed_feature.reshape(-1)) # result = np.vstack(result) # return result def get_features(self): # going from down to up. these two methods are look like the same if self.depth == 0: return self.features layers = self.hierarchy.layers[1: self.depth + 1] # down --> up features = self.hierarchy.layers[0].features # to express upper feature for i, layer in enumerate(layers, start=1): previous_layer = self.hierarchy.layers[i - 1] expressed_features = [] for feature in layer.features: n = previous_layer.output_size m = int(np.sqrt(features.shape[1])) feature = feature.reshape((layer.input_size, layer.input_size)) expressed_feature = np.empty((layer.multiplier * m, layer.multiplier * m)) for dx in range(layer.multiplier): for dy in range(layer.multiplier): subfeature = feature[dx * n: (dx + 1) * n, dy * n: (dy + 1) * n] # now that's previous_layer's code. replace it with reconstruction expressed_subfeature = np.dot(subfeature.reshape(-1), features) expressed_feature[dx * m: (dx + 1) * m, dy * m: (dy + 1) * m] = expressed_subfeature.reshape((m, m)) expressed_features.append(expressed_feature.reshape(-1)) features = np.vstack(expressed_features) return features
elif title is 'T-MOD javaORMP (Sparsity: 5)': continue elif title is 'K-HOSVD javaORMP (Sparsity: 5)': dictionary = np.loadtxt( filePath + 'dictK-HOSVDNoisy_L=46500_K=100_noIt=50_solver=javaORMP_tnz=5.csv', delimiter=';') sparseCode = np.loadtxt( filePath + 'sparseCodeK-HOSVDNoisy_L=46500_K=100_noIt=50_solver=javaORMP_tnz=5.csv', delimiter=';') else: miniBatch = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=50) dictionary = miniBatch.fit(refPatches).components_ miniBatch.set_params(transform_algorithm=transform_algorithm, **kwargs) sparseCode = miniBatch.transform(noisyPatches) recPatches = np.dot(sparseCode, dictionary) recPatches += noiseMean recPatches = recPatches.reshape(len(noisyPatches), *patch_size) if transform_algorithm == 'threshold': recPatches -= recPatches.min() recPatches /= recPatches.max() # Plot dictionaries # plt.figure(figsize=(4.2, 4)) # for i, comp in enumerate(dictionary[:100]): # plt.subplot(10, 10, i + 1) # plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, interpolation='nearest')
def main(argv=None): if argv is None: argv=sys.argv parser = OptionParser(add_help_option=False) parser.add_option("-i", dest="imgJSON") parser.add_option("-c", dest="cfgJSON") parser.add_option("-a", dest="outAtomFile") parser.add_option("-d", dest="outDiffFile") parser.add_option("-x", dest="outImagFile") parser.add_option("-s", dest="imSlice", type="int") parser.add_option("-r", dest="imScale", type="float") parser.add_option("-D", dest="dictSiz", type="int", default=5) parser.add_option("-k", dest="nearest", type="int", default=5) parser.add_option("-h", dest="doHelp", action="store_true", default=False) options, _ = parser.parse_args() if options.doHelp: usage() sys.exit(-1) imgJSON = options.imgJSON cfgJSON = options.cfgJSON outAtomFile = options.outAtomFile outImagFile = options.outImagFile outDiffFile = options.outDiffFile imSlice = options.imSlice dictSiz = options.dictSiz imScale = options.imScale nearest = options.nearest imData = json.load(open(imgJSON)) helper = regtools.regtools(cfgJSON) groupSet = set() groupMap = dict() groupLab = [] # generate numeric labels for each image for entry in imData["Data"]: groupSet.add(entry["Group"]) for cnt, group in enumerate(groupSet): groupMap[group] = cnt for entry in imData["Data"]: groupLab.append(groupMap[entry["Group"]]) imgFiles = [] [imgFiles.append(str(e["Source"])) for e in imData["Data"]] dataList = [] for i, imFile in enumerate(imgFiles): im0 = sitk.ReadImage(imFile) im1 = pbmutils.imResize(im0, imScale) imSz = sitk.GetArrayFromImage(im1).shape helper.infoMsg("Image size : (%d,%d,%d)" % imSz) if not imSlice is None: sl0 = pbmutils.imSlice(im1, [0, 0, imSlice]) dataList.append(sl0.ravel()) else: dataList.append(sitk.GetArrayFromImage(im1).ravel()) helper.infoMsg("Done with image %d!" % i) # write raw image data if not outImagFile is None: tfid = open(outImgFile, 'w') np.reshape(np.asmatrix(dataList).T,-1).astype('float32').tofile(tfid) tfid.close() # build difference images diffIm = pbmutils.groupDiff(np.asmatrix(dataList).T, groupLab, nearest) helper.infoMsg("Difference image matrix (%d x %d)" % diffIm.shape) # write raw difference data if not outDiffFile is None: outFid = open(outDiffFile, 'w') np.reshape(diffIm, -1).ravel().astype('float32').tofile(outFid) outFid.close() # create the dictionary learner and run (alpha=1) lrnObj = MiniBatchDictionaryLearning(dictSiz, 1, verbose=True) lrnRes = lrnObj.fit(np.asmatrix(diffIm).T).components_ # write dictionary atoms if not outAtomFile is None: outFid = open(outAtomFile, 'w') np.reshape(lrnRes.T, -1).ravel().astype('float32').tofile(outFid) outFid.close()
# Data pre-processing: Normalization # row_sums = train_X.sum(axis=1).astype(float) # train_X = np.true_divide(train_X, row_sums[:, np.newaxis]) # # row_sums = test_X.sum(axis=1).astype(float) # test_X = np.true_divide(test_X, row_sums[:, np.newaxis]) ############################################################################### # Dictionary Learning n_components = 100 print("\nSparse Coding Dictionary Learning") # pca = RandomizedPCA(n_components=n_components).fit(train_X) dl = MiniBatchDictionaryLearning(n_components, batch_size=50, n_jobs=4, verbose=2) dl.fit(train_X) print "X_train.shape", train_X.shape print "Components shape", dl.components_.shape # components = dl.components().reshape((n_components, n_features)) components = dl.components_ # Visualizing the components as images component_titles = ["%d" % i for i in range(components.shape[0])] plot_gallery("Visualizing top components", components, w, h, n_row=n_components / 10, n_col=10) plt.show() ############################################################################### # Sparse Encoding print("\nSparse Encoding")
print('Extracting reference patches...') t0 = time() patch_size = (7, 7) data = extract_patches_2d(distorted[:, :height // 2], patch_size) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data /= np.std(data, axis=0) print('done in %.2fs.' % (time() - t0)) ############################################################################### # Learn the dictionary from reference patches print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=500) V = dico.fit(data).components_ dt = time() - t0 print('done in %.2fs.' % dt) plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(V[:100]): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from Lena patches\n' + 'Train time %.1fs on %d patches' % (dt, len(data)), fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)
def imageDenoisingTest01(): from time import time import matplotlib.pyplot as plt import numpy as np from scipy.misc import lena from sklearn.decomposition import MiniBatchDictionaryLearning from sklearn.feature_extraction.image import extract_patches_2d from sklearn.feature_extraction.image import reconstruct_from_patches_2d #Load image and extract patches lena = lena() / 256.0 lena = lena[::2, ::2] + lena[1::2, ::2] + lena[::2, 1::2] + lena[1::2, 1::2] lena /= 4.0 height, width = lena.shape #Distort the right half of the image print "distorting image" distorted = lena.copy() distorted[:, height//2:] += 0.075 * np.random.randn(width, height // 2) #plt.imshow(distorted[:, :height//2], cmap = plt.cm.gray, interpolation = "nearest") #plt.show() print "Extacting reference patches" #这里是从distorted的左半边抽取patches t0 = time() patch_size = (7, 7) data = extract_patches_2d(distorted[:, :height//2], patch_size) #data是 30500 * 7 * 7 维矩阵 #print data #print len(data) #print len(data[0][0]) #plt.imshow(data[0], cmap = plt.cm.gray, interpolation = "nearest") #plt.show() #print distorted[:, height//2:].shape #一半是256 * 128 #下面是把patch转换为一维向量, 然后再归一化 data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis = 0) data /= np.std(data, axis = 0) print 'done in ' + str(time() - t0) # Learn the dictionary from reference patches print "Learning the dictionary" t0 = time() #这一步是开始对patches进行学习 #new 一个model dico = MiniBatchDictionaryLearning(n_components = 100, alpha = 1, n_iter = 5000) print data.shape #data是30500 * 49维矩阵 V = dico.fit(data).components_ print V.shape #V是100 * 49维矩阵 dt = time() - t0 print "done in %.2fs." % dt plt.figure(figsize = (4.2, 4)) for i, comp in enumerate(V[:100]): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size), cmap = plt.cm.gray_r, interpolation = "nearest") plt.xticks(()) plt.yticks(()) plt.suptitle("Dictionary learned from lena patches\n" + "Train time %.1fs on %d patches" % (dt, len(data)), fontsize = 16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def show_with_diff(image, reference, title): plt.figure(figsize = (5, 3.3)) plt.subplot(1, 2, 1) plt.title('Image') plt.imshow(image, vmin = 0, vmax = 1, cmap = plt.cm.gray, interpolation = "nearest") plt.xticks(()) plt.yticks(()) plt.subplot(1,2,2) difference = image - reference plt.title("difference (norm: %.2f)" % np.sqrt(np.sum(difference ** 2))) plt.imshow(difference, vmin = -0.5, vmax = 0.5, cmap = plt.cm.PuOr, interpolation = "nearest") plt.xticks(()) plt.yticks(()) plt.suptitle(title, size = 16) plt.subplots_adjust(0.02, 0.02, 0.98, 0.79, 0.02, 0.02) show_with_diff(distorted, lena, "Distorted Image") #plt.show() #Extract noisy patches and reconstruct them using the dictionary #从右半边抽取patches print('Extracting noisy pathces...') t0 = time() data = extract_patches_2d(distorted[:, height//2:], patch_size) data = data.reshape(data.shape[0], -1) intercept = np.mean(data, axis = 0) data -= intercept print "done in %.2fs. " % (time() - t0) transform_algorithms = [('Orthogonal Matching Pursuit\n1 atom', 'omp', {'transform_n_nonzero_coefs': 1}), ('Orthogonal Matching Pursuit\n2 atoms', 'omp', {'transform_n_nonzero_coefs': 2}), ('Least-angle regression\n5 atoms', 'lars', {'transform_n_nonzero_coefs': 5}), ('Thresholding\n alpha = 0.1', 'threshold', {'transform_alpha': 0.1})] reconstructions = {} for title, transform_algorithm, kwargs in transform_algorithms: print title + "..." reconstructions[title] = lena.copy() t0 = time() dico.set_params(transform_algorithm = transform_algorithm, **kwargs) code = dico.transform(data) #利用之前训练的模型来获得代表系数 -- code patches = np.dot(code, V) if transform_algorithm == "threshold": patches -= patches.min() patches /= patches.max() patches += intercept patches = patches.reshape(len(data), *patch_size) if transform_algorithm == "threshold": patches -= patches.min() patches /= patches.max() reconstructions[title][:, height // 2:] = reconstruct_from_patches_2d(patches, (width, height // 2)) dt = time() - t0 print "done in %.2fs." % dt show_with_diff(reconstructions[title], lena, title + '(time: %.1fs)' % dt) plt.show()