def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'): import os.path as osp fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd)) if osp.exists(fname): D = np.load(fname) else: from sklearn.decomposition import DictionaryLearning mnist = input_data.read_data_sets('MNIST_data', one_hot=True) im = mnist.train.next_batch(N)[0] im = im.reshape(N, 28, 28) im = [ imresize(a, (17, 17), interp='bilinear', mode='L') - .5 for a in im ] X = np.array(im).reshape(N, -1) print(X.shape) dl = DictionaryLearning(K, alpha=lmbd * N, fit_algorithm='cd', n_jobs=-1, verbose=1) dl.fit(X) D = dl.components_.reshape(K, -1) np.save(fname, D) return D
def __init__(self, num_components=10, catalog_name='unknown', alpha=0.001, transform_alpha=0.01, max_iter=2000, tol=1e-9, n_jobs=1, verbose=True, random_state=None): self._decomposition = 'Sparse Coding' self._num_components = num_components self._catalog_name = catalog_name self._alpha = alpha self._transform_alpha = 0.001 self._n_jobs = n_jobs self._random_state = random_state self._DL = DictionaryLearning(n_components=self._num_components, alpha=self._alpha, transform_alpha=self._transform_alpha, n_jobs=self._n_jobs, verbose=verbose, random_state=self._random_state)
def test_dict_learning_lassocd_readonly_data(): n_components = 12 with TempMemmap(X) as X_read_only: dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd', transform_alpha=0.001, random_state=0, n_jobs=-1) code = dico.fit(X_read_only).transform(X_read_only) assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
def learn_dictionary(data, n_components): model = DictionaryLearning(n_components=n_components, alpha=1.0, max_iter=200) model = model.fit(data.T) dictionary = model.components_.T return dictionary
def _get_stain_matrix(self, input_image: np.ndarray) -> np.ndarray: """Compute the 2x3 stain matrix with the method from the paper Args: input_image (np.array): Image to extract the stains from Returns: np.array: Extracted stains """ mask = self._notwhite_mask(input_image, threshold=self.thres).reshape( (-1, )) optical_density = self._rgb_to_od(input_image).reshape((-1, 3)) optical_density = optical_density[mask] n_features = optical_density.T.shape[1] dict_learner = DictionaryLearning( n_components=2, alpha=self.lambda_s, max_iter=10, fit_algorithm="lars", transform_algorithm="lasso_lars", transform_n_nonzero_coefs=n_features, random_state=0, positive_dict=True, ) dictionary = dict_learner.fit_transform(optical_density.T).T if dictionary[0, 0] < dictionary[1, 0]: dictionary = dictionary[[1, 0], :] dictionary = self._normalize_rows(dictionary) return dictionary
def __init__(self, model_filename=None): if model_filename is not None: self.load_model(model_filename) else: # default model params self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS[ 'n_components'] self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features'] self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter'] self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS[ 'random_state'] self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init'] self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init'] # initialize Dictionary Learning object with default params and weights self.DL_obj = DictionaryLearning(n_components=self.n_components, alpha=1, max_iter=self.max_iter, tol=1e-08, fit_algorithm='lars', transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, n_jobs=1, code_init=self.code_init, dict_init=self.dict_init, verbose=False, split_sign=False, random_state=self.random_state)
def _dictionarylearning(Y, n, k, iter_=100000): dct = DictionaryLearning(n_components=n, transform_algorithm='lars', transform_n_nonzero_coefs=k, max_iter=iter_) dct.fit(Y.T) A_new = dct.components_ return A_new.T
def test_dict_learning_split(): n_atoms = 5 dico = DictionaryLearning(n_atoms, transform_algorithm='threshold') code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
def learn_dictionary(patches, n_c=512, a=1, n_i=100, n_j=3, es=5, fit_algorithm='lars'): dic = DictionaryLearning(n_components=n_c, alpha=a, max_iter=n_i, n_jobs=n_j, fit_algorithm=fit_algorithm) print ("Start learning dictionary: n_c: "+str(n_c)+", alpha: "+str(a)+", n_i: " + str(n_i)+", es: "+str(es)+", n_j: "+str(n_j)) v2 = dic.fit(patches).components_ d2 = v2.reshape(n_c, es, es, es) # e.g. 512x5x5x5 return d2
def test_dict_learning_shapes(): n_components = 5 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert dico.components_.shape == (n_components, n_features) n_components = 1 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert dico.components_.shape == (n_components, n_features) assert dico.transform(X).shape == (X.shape[0], n_components)
def test_dict_learning_shapes(): n_components = 5 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert_equal(dico.components_.shape, (n_components, n_features)) n_components = 1 dico = DictionaryLearning(n_components, random_state=0).fit(X) assert_equal(dico.components_.shape, (n_components, n_features)) assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
def _fit(self, feature_values, n_components): # reshape to two dimensional vector (list of 1d features) feature_vector = feature_values.reshape(feature_values.shape[0], -1) dictionary = DictionaryLearning(n_components=n_components, alpha=1, max_iter=500, n_jobs=self.threads) codebook = dictionary.fit(feature_vector) return codebook
def dictionaryLearningMethod(inputVec, length): # length = len(vectors) length = length dic = DictionaryLearning(n_components=length, alpha=0.05) dictionary = dic.fit(inputVec).components_ sparseCoder = dic.fit_transform(inputVec) # sparseCoder = dic.fit(vectors).components_ # dictionary = dic.fit_transform(vectors) return dictionary, sparseCoder
def dictionaryLearningMethod(vectors): # length = len(vectors) length = 5 dic = DictionaryLearning(n_components=length, alpha=1) dictionary = dic.fit(vectors).components_ sparseCoder = dic.fit_transform(vectors) # sparseCoder = dic.fit(vectors).components_ # dictionary = dic.fit_transform(vectors) return dictionary.T , sparseCoder.T
def test_dict_learning_lars_code_positivity(): n_components = 5 dico = DictionaryLearning( n_components, transform_algorithm="lars", random_state=0, positive_code=True, fit_algorithm="cd").fit(X) err_msg = "Positive constraint not supported for '{}' coding method." err_msg = err_msg.format("lars") with pytest.raises(ValueError, match=err_msg): dico.transform(X)
def trainLowDict(buffer): print('Learning the dictionary...') t0 = time() dico = DictionaryLearning(n_components=100, alpha=1, max_iter=100,verbose=1) V = dico.fit(buffer).components_ E = dico.error_ dt = time() - t0 print('done in %.2fs.' % dt) return V,E
def test_dict_learning_split(): n_components = 5 dico = DictionaryLearning(n_components, transform_algorithm='threshold', random_state=0) code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_almost_equal(split_code[:, :n_components] - split_code[:, n_components:], code)
def sparse_code(X_mat, n_comps=12, alpha=1, out_dir=oj(config.DIR_INTERIM, 'dictionaries')): print('sparse coding...') d = DictionaryLearning(n_components=n_comps, alpha=alpha, random_state=42) d.fit(X_mat) pkl.dump(d, open(oj(out_dir, f'sc_{n_comps}_alpha={alpha}.pkl'), 'wb'))
def test_dict_learning_split(): n_components = 5 dico = DictionaryLearning(n_components, transform_algorithm='threshold', random_state=0) code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_components] - split_code[:, n_components:], code)
def test_dict_learning_reconstruction(): n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def sparse_coding(dimension, input_x, alpha, iteration, tolerance): #dl = DictionaryLearning(dimension) dl = DictionaryLearning(dimension, alpha, iteration, tolerance) dl.fit(input_x) #np.set_printoptions(precision=3, suppress=True) #print code #print dl.components_ print "error:", dl.error_[-1] return dl
def test_dict_learning_nonzero_coefs(): n_components = 4 dico = DictionaryLearning(n_components, transform_algorithm='lars', transform_n_nonzero_coefs=3, random_state=0) code = dico.fit(X).transform(X[np.newaxis, 1]) assert_true(len(np.flatnonzero(code)) == 3) dico.set_params(transform_algorithm='omp') code = dico.transform(X[np.newaxis, 1]) assert_equal(len(np.flatnonzero(code)), 3)
def test_dict_learning_reconstruction_parallel(): # regression test that parallel reconstruction works with n_jobs=-1 n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0, n_jobs=-1) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def _estimate_linear_combination(self, imgs_vec, params): estimator = DictionaryLearning(n_components=params.get('nb_labels'), max_iter=params.get('max_iter'), fit_algorithm='lars', transform_algorithm='omp', split_sign=False, tol=params.get('tol'), n_jobs=1) fit_result = estimator.fit_transform(imgs_vec) components = estimator.components_ return estimator, components, fit_result
def dictionary_learn(): x = [[1, -2, 3, 4, 5.], [3, 4, -5, 6, 7], [1, 7, 2, -6, 2], [3, 8, 6, 2, -8]] print(x) dct = DictionaryLearning(n_components=5) dct.fit(x) print(dct.components_) print(dct.transform(x)) pass
def test_DictionaryLearning(n_components): ''' 测试 DictionaryLearning 的用法 :return: None ''' X = [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [10, 9, 8, 7, 6], [5, 4, 3, 2, 1]] print("before transform:", X) dct = DictionaryLearning(n_components=n_components) dct.fit(X) print("components is :", dct.components_) print("after transform:", dct.transform(X))
def test_DictionaryLearning(): from sklearn.decomposition import DictionaryLearning x = [ [1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [10, 9, 8, 7, 6], [5, 4, 3, 2, 1] ] print("before transform:", x) dct = DictionaryLearning(n_components=3) dct.fit(x) print("components is :", dct.components_) print("after transform:", dct.transform(x))
def DL(self): dictLearn = DictionaryLearning(n_components=self.n_components, alpha=1, transform_algorithm='omp', transform_n_nonzero_coefs=20) self.dictionary = [] gamma = [] #print self.cluster[0] #print self.cluster for i in range(self.n_clusters): dictObject = dictLearn.fit(self.cluster[i]) self.dictionary.append(dictObject.components_) gamma.append(dictObject.transform(self.cluster[i]))
def trainLowDict(buffer): print('Learning the dictionary...') t0 = time() dico = DictionaryLearning(n_components=100, alpha=1, max_iter=100, verbose=1) V = dico.fit(buffer).components_ E = dico.error_ dt = time() - t0 print('done in %.2fs.' % dt) return V, E
class _DictionaryLearningImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_dict_learning_lassocd_readonly_data(): n_components = 12 with TempMemmap(X) as X_read_only: dico = DictionaryLearning( n_components, transform_algorithm="lasso_cd", transform_alpha=0.001, random_state=0, n_jobs=4, ) with ignore_warnings(category=ConvergenceWarning): code = dico.fit(X_read_only).transform(X_read_only) assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
def Dictionary_learning(X): # input: X (rows are features, columns are samples) # Outputs: U (columns are projection directions) --> note: X = UV n_dimensions = X.shape[0] n_samples = X.shape[1] k = min(n_dimensions, n_samples) # model = DictionaryLearning(n_components=k, tol=1e-20, max_iter=int(1e4)) model = DictionaryLearning(n_components=k) model.fit(X) V = model.components_ U = X.dot(V.T).dot( np.linalg.inv(V.dot(V.T)) ) # X=UV --> X:d*n, U:d*k, V:k*n --> XV' = UVV' --> U = XV'(VV')^{-1} # print(X.shape, U.shape, V.shape, k) return U, k
def __init__(self, model_filename=None): if model_filename is not None: self.load_model(model_filename) else: # default model params self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components'] self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features'] self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter'] self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state'] self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init'] self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init'] # initialize Dictionary Learning object with default params and weights self.DL_obj = DictionaryLearning(n_components=self.n_components, alpha=1, max_iter=self.max_iter, tol=1e-08, fit_algorithm='lars', transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, n_jobs=1, code_init=self.code_init, dict_init=self.dict_init, verbose=False, split_sign=False, random_state=self.random_state)
def get_dim_reds_scikit(pct_features): n_components = max(int(pct_features * num_features), 1) return [ LinearDiscriminantAnalysis(n_components=n_components), TruncatedSVD(n_components=n_components), #SparseCoder(n_components=n_components), DictionaryLearning(n_components=n_components), FactorAnalysis(n_components=n_components), SparsePCA(n_components=n_components), NMF(n_components=n_components), PCA(n_components=n_components), RandomizedPCA(n_components=n_components), KernelPCA(kernel="linear", n_components=n_components), KernelPCA(kernel="poly", n_components=n_components), KernelPCA(kernel="rbf", n_components=n_components), KernelPCA(kernel="sigmoid", n_components=n_components), KernelPCA(kernel="cosine", n_components=n_components), Isomap(n_components=n_components), LocallyLinearEmbedding(n_components=n_components, eigen_solver='auto', method='standard'), LocallyLinearEmbedding(n_neighbors=n_components, n_components=n_components, eigen_solver='auto', method='modified'), LocallyLinearEmbedding(n_neighbors=n_components, n_components=n_components, eigen_solver='auto', method='ltsa'), SpectralEmbedding(n_components=n_components) ]
def test_dictionary_learning_dtype_match( data_type, expected_type, fit_algorithm, transform_algorithm, ): # Verify preserving dtype for fit and transform in dictionary learning class dict_learner = DictionaryLearning( n_components=8, fit_algorithm=fit_algorithm, transform_algorithm=transform_algorithm, random_state=0, ) dict_learner.fit(X.astype(data_type)) assert dict_learner.components_.dtype == expected_type assert dict_learner.transform(X.astype(data_type)).dtype == expected_type
def _dictionarylearning(Y, N, k, iter_=100000): """ Perform dictionary learning on the given data set. -------------------------------------------------- Input: Y: Measurement matrix of size M x L N: Number of sources k: Number of active sources Output: D: A dictionary matrix """ dct = DictionaryLearning(n_components=N, transform_algorithm='lars', transform_n_nonzero_coefs=k, max_iter=iter_) dct.fit(Y.T) D = dct.components_ return D.T
def test_dict_learning_positivity(transform_algorithm, positive_code, positive_dict): n_components = 5 dico = DictionaryLearning( n_components, transform_algorithm=transform_algorithm, random_state=0, positive_code=positive_code, positive_dict=positive_dict).fit(X) code = dico.transform(X) if positive_dict: assert_true((dico.components_ >= 0).all()) else: assert_true((dico.components_ < 0).any()) if positive_code: assert_true((code >= 0).all()) else: assert_true((code < 0).any())
def generateOptSparseDictionary(images, patch_size, num_samples, num_features): video_patches, _ = generateVideoPatches(patch_size, images) samples = samplePatches(num_samples, video_patches) alg = DictionaryLearning(n_components=num_features) # Squeeze sample patches to be array alg.fit(samples.reshape(np.shape(samples)[0], np.shape(samples)[1]**2)) features = alg.components_ filter_size = np.shape(samples)[1] features = (features.T / np.linalg.norm(features, axis=1).T).T # features = (features.T - np.mean(features,axis=1).T).T features = features.reshape(features.shape[0], filter_size, filter_size) return features
def get_dic_per_cluster(clust_q, data_cluster, dataq, i, out_q=None, kerPCA=False): if out_q is not None: name = mpc.current_process().name print name, 'Starting' else: print 'Starting estimation of dic %i...' % i # parse the feature vectors for each cluster for q in clust_q: data_cluster = np.vstack((data_cluster, dataq[q])) # remove useless first line data_cluster = data_cluster[1:, :] # learn the sparse code for that cluster if kerPCA is False: dict_learn = DictionaryLearning(n_jobs=10) dict_learn.fit(data_cluster) else: print 'Doing kernel PCA...' print data_cluster.shape dict_learn = KernelPCA(kernel="rbf", gamma=10, n_components=3) #dict_learn = PCA(n_components=10) dict_learn.fit(data_cluster) if out_q is not None: res = {} res[i] = dict_learn out_q.put(res) print name, 'Exiting' else: print 'Finished.' return dict_learn # dict(i = dict_learn)
def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'): import os.path as osp fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd)) if osp.exists(fname): D = np.load(fname) else: from sklearn.decomposition import DictionaryLearning mnist = input_data.read_data_sets('MNIST_data', one_hot=True) im = mnist.train.next_batch(N)[0] im = im.reshape(N, 28, 28) im = [imresize(a, (17, 17), interp='bilinear', mode='L')-.5 for a in im] X = np.array(im).reshape(N, -1) print(X.shape) dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd', n_jobs=-1, verbose=1) dl.fit(X) D = dl.components_.reshape(K, -1) np.save(fname, D) return D
def __init__(self, num_components=10, catalog_name='unknown', alpha = 0.001, transform_alpha = 0.01, max_iter = 2000, tol = 1e-9, n_jobs = 1, verbose = True, random_state = None): self._decomposition = 'Sparse Coding' self._num_components = num_components self._catalog_name = catalog_name self._alpha = alpha self._transform_alpha = 0.001 self._n_jobs = n_jobs self._random_state = random_state self._DL = DictionaryLearning(n_components=self._num_components, alpha = self._alpha, transform_alpha = self._transform_alpha, n_jobs = self._n_jobs, verbose = verbose, random_state = self._random_state)
class SparseCoding: DEFAULT_MODEL_PARAMS = { 'n_components' : 10, 'n_features' : 64, 'max_iter' : 5, 'random_state' : 1, 'dict_init' : None, 'code_init' : None } def __init__(self, model_filename=None): if model_filename is not None: self.load_model(model_filename) else: # default model params self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components'] self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features'] self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter'] self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state'] self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init'] self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init'] # initialize Dictionary Learning object with default params and weights self.DL_obj = DictionaryLearning(n_components=self.n_components, alpha=1, max_iter=self.max_iter, tol=1e-08, fit_algorithm='lars', transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, n_jobs=1, code_init=self.code_init, dict_init=self.dict_init, verbose=False, split_sign=False, random_state=self.random_state) def save_model(self, filename): # save DL object to file, compress is also to prevent multiple model files. joblib.dump(self.DL_obj, filename, compress=3) def load_model(self, filename): # load DL Object from file self.DL_obj = joblib.load(filename) # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values. DL_params = self.DL_obj.get_params() for param in SparseCoding.DEFAULT_MODEL_PARAMS: if param in DL_params: setattr(self, param, DL_params[param]) else: setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param]) def learn_dictionary(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim # learn dictionary self.DL_obj.fit(whitened_patches) def get_dictionary(self): try: return self.DL_obj.components_ except AttributeError: raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \ + "Train the feature extraction model at least once to prevent this error.") def get_sparse_features(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim try: sparse_code = self.DL_obj.transform(whitened_patches) except NotFittedError: raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \ + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \ + "at least once to prevent this error.") return sparse_code def get_sign_split_features(self, sparse_features): n_samples, n_components = sparse_features.shape sign_split_features = np.empty((n_samples, 2 * n_components)) sign_split_features[:, :n_components] = np.maximum(sparse_features, 0) sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0) return sign_split_features def get_pooled_features(self, input_feature_map, filter_size=(19,19)): # assuming square filters and images filter_side = filter_size[0] # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20) input_feature_map_shape = input_feature_map.shape if input_feature_map.ndim == 2: input_feature_map_side = int(np.sqrt(input_feature_map.shape[0])) input_feature_map = input_feature_map.reshape((input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1])) assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" %input_feature_map.ndim # get windows (57,57,20) to (3,3,1,19,19,20) input_feature_map_windows = view_as_windows(input_feature_map, window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]), step=filter_size[0]) # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20) input_feature_map_windows = input_feature_map_windows.reshape((input_feature_map_windows.shape[0]**2, filter_size[0]**2, input_feature_map.shape[-1])) # calculate norms (9, 361, 20) to (9,361) input_feature_map_window_norms = np.linalg.norm(input_feature_map_windows, ord=2, axis=-1) # calculate indexes of max norms per window (9,361) to (9,1). One max index per window. max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1) # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window. pooled_features = input_feature_map_windows[np.arange(input_feature_map_windows.shape[0]), max_norm_indexes] # return pooled feature map return pooled_features # Combined Pipeline def get_pooled_features_from_whitened_patches(self, whitened_patches): sparse_features = self.get_sparse_features(whitened_patches) sign_split_features = self.get_sign_split_features(sparse_features) pooled_features = self.get_pooled_features(sign_split_features) return pooled_features
# sklearn utilities from sklearn.decomposition import DictionaryLearning from sklearn.preprocessing import normalize def interface(): args = argparse.ArgumentParser() # Required args.add_argument('-i', '--data-matrix', help='Input data matrix', required=True) # Optional args.add_argument('-d', '--dict-file', help='Dictionary encoder file (.pkl)', default='dict.pkl') args.add_argument('-n', '--num-atoms', help='Desired dictionary size', default=1000, type=int) args.add_argument('-a', '--alpha', help='Alpha (sparsity enforcement)', default=1.0, type=float) args = args.parse_args() return args if __name__=="__main__": args = interface() # Load and preprocess the data sample_ids, matrix = parse_otu_matrix(args.data_matrix) matrix = normalize(matrix) # Learn a dictionary dict_transformer = DictionaryLearning(n_components=args.num_atoms, alpha=args.alpha) dict_transformer.fit(matrix) # Save dictionary to file save_object_to_file(dict_transformer, args.dict_file)
from sklearn.decomposition import NMF nmfHOG = NMF(n_components=components) nmfHOF = NMF(n_components=components) nmfHOG.fit(np.array([x['hog'] for x in features]).T) nmfHOF.fit(np.array([x['hof'] for x in features]).T) hogComponents = icaHOG.components_.T hofComponents = icaHOF.components_.T return hogComponents, hofComponents if 0: from sklearn.decomposition import DictionaryLearning dicHOG = DictionaryLearning(25) dicHOG.fit(hogs) def displayComponents(components): sides = ceil(np.sqrt(len(components))) for i in range(len(components)): subplot(sides, sides, i+1) imshow(hog2image(components[i], imageSize=[24,24],orientations=4)) sides = ceil(np.sqrt(components.shape[1])) for i in range(components.shape[1]): subplot(sides, sides, i+1) imshow(hog2image(components[:,i], imageSize=[24,24],orientations=4))
class SC(object): """ Wrapper for sklearn package. Performs sparse coding Sparse Coding, or Dictionary Learning has 5 methods: - fit(waveforms) update class instance with Sparse Coding fit - fit_transform() do what fit() does, but additionally return the projection onto new basis space - inverse_transform(A) inverses the decomposition, returns waveforms for an input A, using Z^\dagger - get_basis() returns the basis vectors Z^\dagger - get_params() returns metadata used for fits. """ def __init__(self, num_components=10, catalog_name='unknown', alpha = 0.001, transform_alpha = 0.01, max_iter = 2000, tol = 1e-9, n_jobs = 1, verbose = True, random_state = None): self._decomposition = 'Sparse Coding' self._num_components = num_components self._catalog_name = catalog_name self._alpha = alpha self._transform_alpha = 0.001 self._n_jobs = n_jobs self._random_state = random_state self._DL = DictionaryLearning(n_components=self._num_components, alpha = self._alpha, transform_alpha = self._transform_alpha, n_jobs = self._n_jobs, verbose = verbose, random_state = self._random_state) def fit(self,waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._DL.fit(self._waveforms) def fit_transform(self,waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._A = self._DL.fit_transform(self._waveforms) return self._A def inverse_transform(self,A): # convert basis back to waveforms using fit new_waveforms = self._DL.inverse_transform(A) return new_waveforms def get_params(self): # TODO know what catalog was used! (include waveform metadata) params = self._DL.get_params() params['num_components'] = params.pop('n_components') params['Decompositon'] = self._decomposition return params def get_basis(self): """ Return the SPCA basis vectors (Z^\dagger)""" return self._DL.components_
audio = data["mfccs"] image = np.zeros((1, 75 * 50)) for i in xrange(video.shape[2]): if i + 1 < video.shape[2]: image = np.vstack( (image, np.abs((video[:, :, i].reshape((1, 75 * 50)) - video[:, :, i + 1].reshape((1, 75 * 50))))) ) idx = np.random.shuffle([i for i in xrange(image[1:].shape[0])]) image = image[idx][0] image = (image - np.min(image, axis=0)) / (np.max(image, axis=0) + 0.01) audio = audio.T[idx, :][0] print image.shape, audio.shape fusion = np.hstack((image, audio)) # sparse code video_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1) audio_learner = DictionaryLearning(n_components=10, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1) fusion_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1) video_learner.fit(image) """ # build model face_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10) audio_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10) # fit model face_rbm.fit(image) audio_rbm.fit(audio) print face_rbm.components_.shape, audio_rbm.components_.shape
new_U.dot(new_S) #array([-2.20719466, -3.16170819, -4.11622173]) tsvd = TruncatedSVD(2) tsvd.fit(iris_data) tsvd.transform(iris_data) #One advantage of TruncatedSVD over PCA is that TruncatedSVD can operate on sparse #matrices while PCA cannot #Decomposition分解 to classify分类 with DictionaryLearning from sklearn.decomposition import DictionaryLearning dl = DictionaryLearning(3) transformed = dl.fit_transform(iris_data[::2]) transformed[:5] #array([[ 0. , 6.34476574, 0. ], #[ 0. , 5.83576461, 0. ], #[ 0. , 6.32038375, 0. ], #[ 0. , 5.89318572, 0. ], #[ 0. , 5.45222715, 0. ]]) #Next, let's fit (not fit_transform) the testing set: transformed = dl.transform(iris_data[1::2]) #Putting it all together with Pipelines #Let's briefly load the iris dataset and seed it with some missing values:
pca.fit(mov) #%% import cv2 comps = np.reshape(pca.components_, [n_comps, 30, 30]) for count, comp in enumerate(comps): pl.subplot(4, 4, count + 1) blur = cv2.GaussianBlur(comp.astype(np.float32), (5, 5), 0) blur = np.array(blur / np.max(blur) * 255, dtype=np.uint8) ret3, th3 = cv2.threshold( blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) pl.imshow((th3 * comp).T) #%% n_comps = 3 dl = DictionaryLearning(n_comps, alpha=1, verbose=True) comps = dl.fit_transform(Yr.T) comps = np.reshape(comps, [30, 30, n_comps]).transpose([2, 0, 1]) for count, comp in enumerate(comps): pl.subplot(4, 4, count + 1) pl.imshow(comp) #%% N_ICA_COMPS = 8 ica = FastICA(N_ICA_COMPS, max_iter=10000, tol=10e-8) ica.fit(pca.components_) #% comps = np.reshape(ica.components_, [N_ICA_COMPS, 30, 30]) for count, comp in enumerate(comps): idx = np.argmax(np.abs(comp)) comp = comp * np.sign(comp.flatten()[idx]) pl.subplot(4, 4, count + 1)