def test_dict_learning_online_positivity(transform_algorithm, positive_code, positive_dict): rng = np.random.RandomState(0) n_components = 8 dico = MiniBatchDictionaryLearning( n_components, transform_algorithm=transform_algorithm, random_state=0, positive_code=positive_code, positive_dict=positive_dict).fit(X) code = dico.transform(X) if positive_dict: assert_true((dico.components_ >= 0).all()) else: assert_true((dico.components_ < 0).any()) if positive_code: assert_true((code >= 0).all()) else: assert_true((code < 0).any()) code, dictionary = dict_learning_online(X, n_components=n_components, alpha=1, random_state=rng, positive_dict=positive_dict, positive_code=positive_code) if positive_dict: assert_true((dictionary >= 0).all()) else: assert_true((dictionary < 0).any()) if positive_code: assert_true((code >= 0).all()) else: assert_true((code < 0).any())
def test_dict_learning_online_readonly_initialization(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) V.setflags(write=False) MiniBatchDictionaryLearning(n_components, n_iter=1, dict_init=V, random_state=0, shuffle=False).fit(X)
def learn_sparse_components3(shapes, n_components, lmbda, batch_size, transform_n_nonzero_coefs, fit_algorithm, n_iter=5000): """Learn sparse components from a dataset of shapes.""" n_shapes = len(shapes) # Learn sparse components and predict coefficients for the dataset dl = MiniBatchDictionaryLearning( n_components=n_components, alpha=lmbda, batch_size=batch_size, n_iter=n_iter, transform_n_nonzero_coefs=transform_n_nonzero_coefs, verbose=1, fit_algorithm=fit_algorithm, transform_algorithm='lasso_cd', positive_code=True) dl.coefficients = dl.fit_transform(shapes) # Compute frequency of activations and argsort # (but do not apply argsort as we would also need to sort coefficients and all inner # stats of the sklearn object) dl.frequencies = np.count_nonzero(dl.coefficients.T, axis=1) / n_shapes dl.argsort_freqs = np.argsort(-dl.frequencies) return dl
def test_dict_learning_online_estimator_shapes(): n_components = 5 dico = MiniBatchDictionaryLearning( n_components, batch_size=4, max_iter=5, random_state=0 ) dico.fit(X) assert dico.components_.shape == (n_components, n_features)
def fit(self, X, y=None): # compute the codes print 'Extracting patchs...' patchs = [] num = self.patch_num // X.size for x in X: img = imread(str(x[0])) tmp = extract_patches_2d(img, (self.patch_size,self.patch_size), \ max_patches=num, random_state=np.random.RandomState()) patchs.append(tmp) data = np.vstack(patchs) data = data.reshape(data.shape[0], -1) data -= np.mean(data, axis=0) data = data/np.std(data, axis=0) print 'Learning codebook...' if self.method == 'sc': self.dico = MiniBatchDictionaryLearning(n_components=self.codebook_size, \ alpha=1, n_iter=100, batch_size =100, verbose=True) self.dico.fit(data) elif self.method=='km': # self.dico = MiniBatchKMeans(n_clusters=self.codebook_size) pass return self
def dict_learn(y, n_components=16, n_iter=n_iter): n = np.shape(y)[0] dico = MiniBatchDictionaryLearning(n_components=n_components, n_iter=n_iter, alpha=0.5, fit_algorithm=fit_algorithm, transform_algorithm=transform_algorithm, transform_n_nonzero_coefs=transform_n_nonzero_coefs) v = dico.fit(y).components_ A, B = dico.inner_stats_ #print(A) #print(B) x = dico.transform(y) #print("y:",y) #print("x,",x) #print("dic:",v) a = [] nz = np.nonzero(x) #print(np.nonzero(x)) #print(np.where(nz[0] == 1)) #print(len(nz), n*2) index = zip(nz[0], nz[1]) #print(index) for i in index: a.append(x[i[0], i[1]]) res = np.dot(x,v) error = np.mean(np.linalg.norm(y-res, axis=1))/10.0 #error = 0 #print(res[0][:12]) #print(y[:12,0]) #print(error) #print(index) return v, index, a, error
def test_dict_learning_online_overcomplete(): n_components = 12 dico = MiniBatchDictionaryLearning(n_components, batch_size=4, max_iter=5, random_state=0).fit(X) assert dico.components_.shape == (n_components, n_features)
def main(games_path=None): if games_path == None: games_path = 'specmine/data/go_games/2010-01.pickle.gz' with specmine.util.openz(games_path) as games_file: games = pickle.load(games_file) boards = None # numpy array nx9x9 for game in games: if boards == None: boards = games[game].grids else: boards = numpy.vstack((boards, games[game].grids)) print 'boards shape: ', boards.shape boards = boards.reshape((boards.shape[0], -1)) print 'boards reshaped: ', boards.shape print 'Learning the dictionary... ' t0 = time() dico = MiniBatchDictionaryLearning(n_atoms=100, alpha=1, n_iter=500) V = dico.fit(boards).components_ dt = time() - t0 print 'done in %.2fs.' % dt #pl.figure(figsize=(4.2, 4)) for i, comp in enumerate(V[:100]): pl.subplot(10, 10, i + 1) pl.imshow(comp, cmap=pl.cm.gray_r) # interpolation='nearest') pl.xticks(()) pl.yticks(())
def generar_diccionario(s, number_atoms, number_samples, length): print('Generando el diccionario...') #recortamos las señales a 2048 muestras s = cut_signals(s, number_samples) #creamos la matriz A de number_of_patches filas y length columnas A = extract_patches_1D(s, length, number_atoms, number_samples) #normalizamos los patches for i in range(0, number_atoms): A[i] -= np.mean(A[i]) A[i] /= np.std(A[i]) #entrenamos el diccionario print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=number_atoms, alpha=1.2 / np.sqrt(number_atoms), n_iter=1000, batch_size=4, fit_algorithm='lars', transform_algorithm='lasso_lars') V = dico.fit(A).components_ dt = time() - t0 print('done in %.2fs.' % dt) #guardo el diccionario para cargarlo directamente np.save("DICT", V) return (A, V)
def learn_dictionary(X, n_filters, filter_size, n_sample=1000, n_sample_patches=0, **kwargs): """ learn a dictionary of n_filters atoms from n_sample images from X """ n_channels = X.shape[1] # subsample n_sample images randomly rand_idx = np.random.choice(len(X), n_sample, replace=False) # extract patches patch_size = (filter_size, filter_size) patches = PatchExtractor(patch_size).transform( X[rand_idx, ...].reshape(n_sample, X.shape[2], X.shape[3], X.shape[1])) patches = patches.reshape(patches.shape[0], -1) patches -= np.mean(patches, axis=0) patches /= np.std(patches, axis=0) if n_sample_patches > 0 and (n_sample_patches < len(patches)): np.random.shuffle(patches) patches = patches[:n_sample_patches, ...] # learn dictionary print('Learning dictionary for weight initialization...') dico = MiniBatchDictionaryLearning(n_components=n_filters, alpha=1, n_iter=1000, batch_size=10, shuffle=True, verbose=True, **kwargs) W = dico.fit(patches).components_ W = W.reshape(n_filters, n_channels, filter_size, filter_size) print('Dictionary learned.') return W.astype(np.float32)
def test_dict_learning_online_initialization(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) dico = MiniBatchDictionaryLearning(n_components, n_iter=0, dict_init=V, random_state=0).fit(X) assert_array_equal(dico.components_, V)
def learn_dictionary(self, appliance_main, app_name): if appliance_main.size % self.shape != 0: extra_values = self.shape - (appliance_main.size) % (self.shape) appliance_main = list( appliance_main.values.flatten()) + [0] * extra_values appliance_main = np.array(appliance_main).reshape((-1, self.shape)).T self.power[app_name] = appliance_main if app_name not in self.dictionaries: print("Training First dictionary for ", app_name) model = MiniBatchDictionaryLearning( n_components=self.n_components, positive_code=True, positive_dict=True, transform_algorithm='lasso_lars', alpha=self.sparsity_coef) else: print("Re-training dictionary for ", app_name) model = self.dictionaries[app_name] model.fit(appliance_main.T) reconstruction = np.matmul(model.components_.T, model.transform(appliance_main.T).T) print("RMSE reconstruction for appliance %s is %s" % (app_name, mean_squared_error(reconstruction, appliance_main) **(.5))) self.dictionaries[app_name] = model
def cluster_sk_mini_batch_dictionary_learning(content): """ x """ _config = MiniBatchDictionaryLearning( n_components=content['n_components'], alpha=content['alpha'], n_iter=content['n_iter'], fit_algorithm=content['fit_algorithm'], n_jobs=1, batch_size=content['batch_size'], shuffle=content['shuffle'], dict_init=None, transform_algorithm=content['transform_algorithm'], transform_n_nonzero_coefs=None, transform_alpha=None, verbose=False, split_sign=content['split_sign'], random_state=None) _result = _config.fit_transform(content['data']) return httpWrapper( json.dumps( { 'result': _result.tolist(), 'components': _config.components_.tolist(), 'iter': _config.n_iter_ }, ignore_nan=True))
def test_minibatch_dictionary_learning_warns_and_ignore_n_iter(): """Check that we always raise a warning when `n_iter` is set even if it is ignored if `max_iter` is set. """ warn_msg = "'n_iter' is deprecated in version 1.1" with pytest.warns(FutureWarning, match=warn_msg): model = MiniBatchDictionaryLearning(batch_size=256, n_iter=2, max_iter=2).fit(X) assert model.n_iter_ == 2
def test_batch_size_default_value_future_warning(): # Check that a FutureWarning is raised if batch_size is left to its default value. # FIXME: remove in 1.3 msg = "The default value of batch_size will change" with pytest.warns(FutureWarning, match=msg): dict_learning_online(X, n_components=2, random_state=0) with pytest.warns(FutureWarning, match=msg): MiniBatchDictionaryLearning(n_components=2, random_state=0).fit(X)
def init_core_model(self): if hasattr(self, 'model'): del self.model import gc gc.collect() self.model = MiniBatchDictionaryLearning( n_components=self.n_components, alpha=self.alpha, n_jobs=16, n_iter=self.n_iter, batch_size=self.batch_size, fit_algorithm='lars', transform_algorithm='omp', verbose=True)
def test_dict_learning_online_partial_fit(): # this test was not actually passing before! raise SkipTest n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] dico1 = MiniBatchDictionaryLearning(n_components, n_iter=10, batch_size=1, shuffle=False, dict_init=V, random_state=0).fit(X) dico2 = MiniBatchDictionaryLearning(n_components, n_iter=1, dict_init=V, random_state=0) for ii, sample in enumerate(X): dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter) # if ii == 1: break assert_true(not np.all(sparse_encode(X, dico1.components_, alpha=100) == 0)) assert_array_equal(dico1.components_, dico2.components_)
def extract_codes(self, X, standardize=False): self.standardize = standardize self._extract_data_patches(X) self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, alpha=1, n_iter=500) print "Dictionary learning from data..." self.D = self.dico.fit(self.data) return self
def learn_dictionary(patch_size, step, plot_dictionary=False, *args): """ Function that normalizes the patches, learns a dictionary on them and plots it Parameters ---------- patch_size: (int, int), the size of the patches to be extracted from the images step: int, the step of the moving patches, overlap of patches = patch_size - step plot_dictionary: boolean, False by default, plots the dictionary if True Return ----------- dico: a dictionary (a set of atoms) that can best be used to represent data using a sparse code V: array, [n_components, n_features], the components of the fitted data """ argCount = len(args) assert argCount > 0, 'no image to extract the patches from' global initial_patch_size, all_patches print(f'Extracting reference patches from {argCount} images...') t0 = time() for image in args: patches = patchify(image, patch_size, step) initial_patch_size = patches.shape patches = patches.reshape(-1, patch_size[0] * patch_size[1]) all_patches.append(patches) dt = time() - t0 print('done in %.2fs.' % dt) #return all_patches all_patches = np.reshape(all_patches, (-1, patch_size[0] * patch_size[1])) all_patches -= np.mean(all_patches, axis=0) # remove the mean all_patches /= np.std(all_patches, axis=0) # normalize each patch print('Learning the dictionary...') t0 = time() dico = MiniBatchDictionaryLearning(n_components=100, alpha=1, n_iter=400) V = dico.fit(all_patches).components_ dt = time() - t0 print('done in %.2fs.' % dt) if plot_dictionary == True: # plotting the dictionary plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(V[:100]): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape(patch_size), cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from patches') plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) return dico, V
def test_dict_learning_iter_offset(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) dict1 = MiniBatchDictionaryLearning(n_components, n_iter=10, dict_init=V, random_state=0, shuffle=False) dict2 = MiniBatchDictionaryLearning(n_components, n_iter=10, dict_init=V, random_state=0, shuffle=False) dict1.fit(X) for sample in X: dict2.partial_fit(sample[np.newaxis, :]) assert dict1.iter_offset_ == dict2.iter_offset_
def test_dict_learning_online_partial_fit(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] dict1 = MiniBatchDictionaryLearning(n_components, n_iter=10 * len(X), batch_size=1, alpha=1, shuffle=False, dict_init=V, random_state=0).fit(X) dict2 = MiniBatchDictionaryLearning(n_components, alpha=1, n_iter=1, dict_init=V, random_state=0) for i in range(10): for sample in X: dict2.partial_fit(sample[np.newaxis, :]) assert not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0) assert_array_almost_equal(dict1.components_, dict2.components_, decimal=2)
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2, random_state=0) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1, random_state=0) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2, random_state=0) sys.stdout = old_stdout assert_true(dico.components_.shape == (n_components, n_features))
def scskl_dico_learning(list_pickled_array, n_atoms, maxepoch=5, maxiter=100): D = None for e in range(maxepoch): for a in list_pickled_array: data = joblib.load(a) dico = MiniBatchDictionaryLearning(n_components=n_atoms, n_iter=maxiter, dict_init=D) D = dico.fit(data).components_.astype(np.float32) return D
def test_minibatch_dict_learning_partial_fit_iter_offset_deprecated(): # check the deprecation warning of iter_offset in partial_fit # FIXME: remove in 1.3 depr_msg = ( "'iter_offset' is deprecated in version 1.1 and will be removed in version 1.3" ) est = MiniBatchDictionaryLearning(n_components=2, batch_size=4, random_state=0) with pytest.warns(FutureWarning, match=depr_msg): est.partial_fit(X, iter_offset=0)
def to_sparse(X, dim): sparse_dict = MiniBatchDictionaryLearning(dim) sparse_dict.fit(X) sparse_vectors = sparse_encode(X, sparse_dict.components_) for i in sparse_vectors: print i return sparse_vectors
def getOverallDict(): classes = pickle.load(open(path_classes, "rb")) class_names = pickle.load(open(path_class_names, "rb")) i = 0 X = np.zeros((0, patch_size)) dictionary = MiniBatchDictionaryLearning(n_components=35, alpha=20, n_iter=700, n_jobs=3) for label in classes: class_name = class_names[label] X_class = [] if class_name in ["artifacts", "artifacts_edge"]: continue for img_id in classes[label]: img = imread(path_train_cropped + "/" + img_id + ".jpg") / 255 (h, w) = img.shape if h < patch_dim[0]: temp = np.ones((patch_dim[0], w)) temp[:h, :] = img img = temp h = patch_dim[0] if w < patch_dim[1]: temp = np.ones((h, patch_dim[1])) temp[:, :w] = img img = temp # print(img.shape) img_patches = extract_patches_2d(img, patch_dim) # if img_patches.shape[0] < 10: # print(class_name+ "/" + file_name) # print(img.shape) # plt.imshow(img, cmap=cm.gray, interpolation='none') # plt.show() img_patches = img_patches.reshape(img_patches.shape[0], -1) old_shape = img_patches.shape img_patches = img_patches[~np.all(img_patches > 0.98, axis=1)] # if old_shape != img_patches.shape: # print((img_patches.shape,old_shape)) # print("--------------------------") X_class += list(img_patches) i += 1 label += 1 if (len(X_class) > 33000): X_class = sample(X_class, 30000) # X_class = np.concatenate(X_class) print(str(label) + " --- " + str(len(X_class)), flush=True) X_class = np.array(X_class) X = np.concatenate([X, X_class], axis=0) print(X.shape) pickle.dump(X, open("../data/temp.dat", "wb")) dictionary.fit(X) return dictionary
def ksvd(noisy_data): print('Updating Dictionary') t0 = time() dico = MiniBatchDictionaryLearning(n_components=n_comp, alpha=2, n_iter=n_iter) #dict_init=D) print('done in %.2fs.' % (time() - t0)) V = dico.fit(noisy_data).components_ return V, dico
def loadDico(components_, transform_algorithm='lars', kwargs={'transform_n_nonzero_coefs': 5}): n_components = components_.shape[0] dico = MiniBatchDictionaryLearning(n_components=n_components, alpha=1, n_iter=500) dico.set_params(transform_algorithm=transform_algorithm, **kwargs) dico.components_ = components_ dico.n_components_ = n_components return dico
def test_minibatch_dictionary_learning_lars(positive_dict): n_components = 8 dico = MiniBatchDictionaryLearning( n_components, transform_algorithm="lars", random_state=0, positive_dict=positive_dict, fit_algorithm='cd').fit(X) if positive_dict: assert (dico.components_ >= 0).all() else: assert (dico.components_ < 0).any()
def fit_and_save(n_iter, alpha, data): dico = MiniBatchDictionaryLearning( n_components=500, alpha=alpha, n_iter=n_iter, n_jobs=4 ) # 500 took 7.5 mins, 5000 should be an hour, 10000 took 10 mins t = time.clock() print('fitting...', n_iter, alpha) V = dico.fit(data) print('took', time.clock() - t, 'sec') np.save( 'bases/bases_iters=' + str(n_iter) + '_alpha=' + str(alpha) + '.npy', V.components_)