def get_dic_per_cluster(clust_q, data_cluster, dataq, i, out_q=None, kerPCA=False): if out_q is not None: name = mpc.current_process().name print name, 'Starting' else: print 'Starting estimation of dic %i...' % i # parse the feature vectors for each cluster for q in clust_q: data_cluster = np.vstack((data_cluster, dataq[q])) # remove useless first line data_cluster = data_cluster[1:, :] # learn the sparse code for that cluster if kerPCA is False: dict_learn = DictionaryLearning(n_jobs=10) dict_learn.fit(data_cluster) else: print 'Doing kernel PCA...' print data_cluster.shape dict_learn = KernelPCA(kernel="rbf", gamma=10, n_components=3) #dict_learn = PCA(n_components=10) dict_learn.fit(data_cluster) if out_q is not None: res = {} res[i] = dict_learn out_q.put(res) print name, 'Exiting' else: print 'Finished.' return dict_learn # dict(i = dict_learn)
def sparse_coding(dimension, input_x, alpha, iteration, tolerance): #dl = DictionaryLearning(dimension) dl = DictionaryLearning(dimension, alpha, iteration, tolerance) dl.fit(input_x) #np.set_printoptions(precision=3, suppress=True) #print code #print dl.components_ print "error:", dl.error_[-1] return dl
def test_dict_learning_lassocd_readonly_data(): n_components = 12 with TempMemmap(X) as X_read_only: dico = DictionaryLearning(n_components, transform_algorithm='lasso_cd', transform_alpha=0.001, random_state=0, n_jobs=-1) code = dico.fit(X_read_only).transform(X_read_only) assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
def test_dict_learning_split(): n_atoms = 5 dico = DictionaryLearning(n_atoms, transform_algorithm='threshold') code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_atoms] - split_code[:, n_atoms:], code)
def trainLowDict(buffer): print('Learning the dictionary...') t0 = time() dico = DictionaryLearning(n_components=100, alpha=1, max_iter=100,verbose=1) V = dico.fit(buffer).components_ E = dico.error_ dt = time() - t0 print('done in %.2fs.' % dt) return V,E
def test_dict_learning_split(): n_components = 5 dico = DictionaryLearning(n_components, transform_algorithm='threshold', random_state=0) code = dico.fit(X).transform(X) dico.split_sign = True split_code = dico.transform(X) assert_array_equal(split_code[:, :n_components] - split_code[:, n_components:], code)
def test_dict_learning_reconstruction(): n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def test_dict_learning_nonzero_coefs(): n_components = 4 dico = DictionaryLearning(n_components, transform_algorithm='lars', transform_n_nonzero_coefs=3, random_state=0) code = dico.fit(X).transform(X[np.newaxis, 1]) assert_true(len(np.flatnonzero(code)) == 3) dico.set_params(transform_algorithm='omp') code = dico.transform(X[np.newaxis, 1]) assert_equal(len(np.flatnonzero(code)), 3)
def test_dict_learning_reconstruction_parallel(): # regression test that parallel reconstruction works with n_jobs=-1 n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0, n_jobs=-1) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'): import os.path as osp fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd)) if osp.exists(fname): D = np.load(fname) else: from sklearn.decomposition import DictionaryLearning mnist = input_data.read_data_sets('MNIST_data', one_hot=True) im = mnist.train.next_batch(N)[0] im = im.reshape(N, 28, 28) im = [imresize(a, (17, 17), interp='bilinear', mode='L')-.5 for a in im] X = np.array(im).reshape(N, -1) print(X.shape) dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd', n_jobs=-1, verbose=1) dl.fit(X) D = dl.components_.reshape(K, -1) np.save(fname, D) return D
def test_dict_learning_nonzero_coefs(): n_components = 4 dico = DictionaryLearning( n_components, transform_algorithm="lars", transform_n_nonzero_coefs=3, random_state=0, ) code = dico.fit(X).transform(X[np.newaxis, 1]) assert len(np.flatnonzero(code)) == 3 dico.set_params(transform_algorithm="omp") code = dico.transform(X[np.newaxis, 1]) assert len(np.flatnonzero(code)) == 3
def test_dict_learning_reconstruction_parallel(): # regression test that parallel reconstruction works with n_jobs>1 n_components = 12 dico = DictionaryLearning(n_components, transform_algorithm='omp', transform_alpha=0.001, random_state=0, n_jobs=4) code = dico.fit(X).transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X) dico.set_params(transform_algorithm='lasso_lars') code = dico.transform(X) assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
def create_dictionary_dl(lmbd, K=100, N=10000, dir_mnist='save_exp/mnist'): import os.path as osp fname = osp.join(dir_mnist, "D_mnist_K{}_lmbd{}.npy".format(K, lmbd)) if osp.exists(fname): D = np.load(fname) else: from sklearn.decomposition import DictionaryLearning mnist = input_data.read_data_sets('MNIST_data', one_hot=True) im = mnist.train.next_batch(N)[0] im = im.reshape(N, 28, 28) # im = [imresize(a, (17, 17), interp='bilinear', mode='L')-.5 # for a in im] X = np.array(im).reshape(N, -1) # model = ResNet50(weights='imagenet',include_top=False) # X = feat_extract(model,im) print(X.shape) dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd', n_jobs=-1, verbose=1) dl.fit(X) D = dl.components_.reshape(K, -1) np.save(fname, D) return D
def test_dict_learning_lassocd_readonly_data(): n_components = 12 with TempMemmap(X) as X_read_only: dico = DictionaryLearning( n_components, transform_algorithm="lasso_cd", transform_alpha=0.001, random_state=0, n_jobs=4, ) with ignore_warnings(category=ConvergenceWarning): code = dico.fit(X_read_only).transform(X_read_only) assert_array_almost_equal(np.dot(code, dico.components_), X_read_only, decimal=2)
def peakmem_fit(self, params): estimator = DictionaryLearning(**self.dl_params) estimator.fit(self.data)
func_filename = source + str('bold.nii.gz') fmri_masked = nifti_masker.fit_transform(func_filename) fmri_masked = fmri_masked[condition_mask] fmri_masked = fmri_masked[:, np.all(fmri_masked != 0, axis=0)] # DEFINING features and targets features = fmri_masked targets = target_int # Dictionary Learning on Target dict_sparse = DictionaryLearning(alpha=1, n_components=sparse_components, max_iter=3, verbose=3) dict_sparse.fit(features) Dt_0 = dict_sparse.components_ coder = SparseCoder(dictionary=Dt_0) Rt_0 = coder.transform(features) target_folder = 'C:\\Users\\Pouya\\Documents\\MATLAB\\transfer\\' + str( subjects[subs]) + '_brain_sparse.mat' sio.savemat(target_folder, {'Rt_0': Rt_0, 'targets': targets}) ## target_folder = 'C:\\Users\\Pouya\\Documents\\MATLAB\\transfer\\imagenet_fc7_pca200.mat' sio.savemat( target_folder, { 'imagenet_feat': imagenet_features, 'pca_feat': pca_feat, 'imagenet_targets': imagenet_targets
class SparseCoding: DEFAULT_MODEL_PARAMS = { 'n_components': 10, 'n_features': 64, 'max_iter': 5, 'random_state': 1, 'dict_init': None, 'code_init': None } def __init__(self, model_filename=None): if model_filename is not None: self.load_model(model_filename) else: # default model params self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS[ 'n_components'] self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features'] self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter'] self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS[ 'random_state'] self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init'] self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init'] # initialize Dictionary Learning object with default params and weights self.DL_obj = DictionaryLearning(n_components=self.n_components, alpha=1, max_iter=self.max_iter, tol=1e-08, fit_algorithm='lars', transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, n_jobs=1, code_init=self.code_init, dict_init=self.dict_init, verbose=False, split_sign=False, random_state=self.random_state) def save_model(self, filename): # save DL object to file, compress is also to prevent multiple model files. joblib.dump(self.DL_obj, filename, compress=3) def load_model(self, filename): # load DL Object from file self.DL_obj = joblib.load(filename) # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values. DL_params = self.DL_obj.get_params() for param in SparseCoding.DEFAULT_MODEL_PARAMS: if param in DL_params: setattr(self, param, DL_params[param]) else: setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param]) def learn_dictionary(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape( (whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim # learn dictionary self.DL_obj.fit(whitened_patches) def get_dictionary(self): try: return self.DL_obj.components_ except AttributeError: raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \ + "Train the feature extraction model at least once to prevent this error.") def get_sparse_features(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape( (whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" % whitened_patches.ndim try: sparse_code = self.DL_obj.transform(whitened_patches) except NotFittedError: raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \ + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \ + "at least once to prevent this error.") return sparse_code def get_sign_split_features(self, sparse_features): n_samples, n_components = sparse_features.shape sign_split_features = np.empty((n_samples, 2 * n_components)) sign_split_features[:, :n_components] = np.maximum(sparse_features, 0) sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0) return sign_split_features def get_pooled_features(self, input_feature_map, filter_size=(19, 19)): # assuming square filters and images filter_side = filter_size[0] # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20) input_feature_map_shape = input_feature_map.shape if input_feature_map.ndim == 2: input_feature_map_side = int(np.sqrt(input_feature_map.shape[0])) input_feature_map = input_feature_map.reshape( (input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1])) assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" % input_feature_map.ndim # get windows (57,57,20) to (3,3,1,19,19,20) input_feature_map_windows = view_as_windows( input_feature_map, window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]), step=filter_size[0]) # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20) input_feature_map_windows = input_feature_map_windows.reshape( (input_feature_map_windows.shape[0]**2, filter_size[0]**2, input_feature_map.shape[-1])) # calculate norms (9, 361, 20) to (9,361) input_feature_map_window_norms = np.linalg.norm( input_feature_map_windows, ord=2, axis=-1) # calculate indexes of max norms per window (9,361) to (9,1). One max index per window. max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1) # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window. pooled_features = input_feature_map_windows[ np.arange(input_feature_map_windows.shape[0]), max_norm_indexes] # return pooled feature map return pooled_features # Combined Pipeline def get_pooled_features_from_whitened_patches(self, whitened_patches): sparse_features = self.get_sparse_features(whitened_patches) sign_split_features = self.get_sign_split_features(sparse_features) pooled_features = self.get_pooled_features(sign_split_features) return pooled_features
# sklearn utilities from sklearn.decomposition import DictionaryLearning from sklearn.preprocessing import normalize def interface(): args = argparse.ArgumentParser() # Required args.add_argument('-i', '--data-matrix', help='Input data matrix', required=True) # Optional args.add_argument('-d', '--dict-file', help='Dictionary encoder file (.pkl)', default='dict.pkl') args.add_argument('-n', '--num-atoms', help='Desired dictionary size', default=1000, type=int) args.add_argument('-a', '--alpha', help='Alpha (sparsity enforcement)', default=1.0, type=float) args = args.parse_args() return args if __name__=="__main__": args = interface() # Load and preprocess the data sample_ids, matrix = parse_otu_matrix(args.data_matrix) matrix = normalize(matrix) # Learn a dictionary dict_transformer = DictionaryLearning(n_components=args.num_atoms, alpha=args.alpha) dict_transformer.fit(matrix) # Save dictionary to file save_object_to_file(dict_transformer, args.dict_file)
from sklearn.decomposition import SparsePCA from sklearn.decomposition import MiniBatchSparsePCA from sklearn.decomposition import MiniBatchDictionaryLearning ## source_folder = 'C:\\Users\\Pouya\\Documents\\MATLAB\\DECAF\\Analysis\\Movie_Genre_adaptation\\feats.mat' dict = sio.loadmat(source_folder) features = dict['features'] MovieFeatures = dict['MovieFeatures'] # Source Domain dict_sparse = DictionaryLearning(alpha=1, n_components=4, max_iter=1000, verbose=3) dict_sparse.fit(MovieFeatures) Ds_0 = dict_sparse.components_ coder = SparseCoder(dictionary=Ds_0) Rs_0 = coder.transform(MovieFeatures) # Target Domain dict_feat = [None] * 30 for subs in range(30): print(subs) feat = features[0, subs] #dict_sparse = DictionaryLearning(alpha=0.1, n_components=105, max_iter=10, transform_n_nonzero_coefs=105, verbose=3) #dict_sparse = SparsePCA(n_components=105, max_iter=3) #dict_sparse = MiniBatchDictionaryLearning(alpha=1, n_components=105, batch_size=10, n_iter=100) #dict_sparse.fit(feat)
if i + 1 < video.shape[2]: image = np.vstack( (image, np.abs((video[:, :, i].reshape((1, 75 * 50)) - video[:, :, i + 1].reshape((1, 75 * 50))))) ) idx = np.random.shuffle([i for i in xrange(image[1:].shape[0])]) image = image[idx][0] image = (image - np.min(image, axis=0)) / (np.max(image, axis=0) + 0.01) audio = audio.T[idx, :][0] print image.shape, audio.shape fusion = np.hstack((image, audio)) # sparse code video_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1) audio_learner = DictionaryLearning(n_components=10, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1) fusion_learner = DictionaryLearning(n_components=784, alpha=0.5, max_iter=50, fit_algorithm="cd", verbose=1) video_learner.fit(image) """ # build model face_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10) audio_rbm = RBM(n_components=100, verbose=2, batch_size=20, n_iter=10) # fit model face_rbm.fit(image) audio_rbm.fit(audio) print face_rbm.components_.shape, audio_rbm.components_.shape hidden = np.hstack((face_rbm.components_, audio_rbm.components_)) print hidden.shape fusion_rbm = RBM(n_components=100,verbose=2, batch_size=20, n_iter=10)
nmfHOG = NMF(n_components=components) nmfHOF = NMF(n_components=components) nmfHOG.fit(np.array([x['hog'] for x in features]).T) nmfHOF.fit(np.array([x['hof'] for x in features]).T) hogComponents = icaHOG.components_.T hofComponents = icaHOF.components_.T return hogComponents, hofComponents if 0: from sklearn.decomposition import DictionaryLearning dicHOG = DictionaryLearning(25) dicHOG.fit(hogs) def displayComponents(components): sides = ceil(np.sqrt(len(components))) for i in range(len(components)): subplot(sides, sides, i + 1) imshow(hog2image(components[i], imageSize=[24, 24], orientations=4)) sides = ceil(np.sqrt(components.shape[1])) for i in range(components.shape[1]): subplot(sides, sides, i + 1) imshow(hog2image(components[:, i], imageSize=[24, 24], orientations=4))
x_train = x_train.reshape(-1, img_rows*img_cols*channels) # each image as vector np.random.shuffle(x_train) print(x_train.shape) #dictionary file name if use_fashion: file_name = 'dictionary_fashion_mnist_undercomplete' else: file_name = 'dictionary_mnist_overcomplete' #check if dictionary exists if not path.exists(file_name): d=DictionaryLearning(n_components=2*784, max_iter=20) # train dictionary d.fit(x_train[1:10000, :]) dictionary = d.components_ print(dictionary.shape) with open(file_name, 'wb') as output: pickle.dump(d, output, pickle.HIGHEST_PROTOCOL) print("created new dictionary") else: with open(file_name, 'rb') as input: d = pickle.load(input) print("loaded dictionary") sparse_dict = np.transpose(d.components_) print("analyse pursuit") num_images_to_pursuit = 10
# Use pretrained model dico = pickle.load( open( f'{cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel', 'rb')) print( f'Use hitted {cfg.save_path}/all_{n_components}_{n_iter}.sklearnmodel' ) hit = True else: # Train a new model dico = DictionaryLearning(n_components=n_components, n_jobs=-24, max_iter=n_iter, verbose=True) dico.fit(images) n_iter_actual = dico.n_iter_ print(f'{n_iter_actual} iters') timer.stop(start=' ') # Save the model if cfg.save: np.save(f'{cfg.save_path}/all_{n_components}_{n_iter_actual}', dico.components_) pickle.dump( dico, open( f'{cfg.save_path}/all_{n_components}_{n_iter_actual}.sklearnmodel', 'wb'))
# For pickling def save_object(obj, filename): with open(filename, 'wb') as output: # Overwrites any existing file. pk.dump(obj, output, pk.HIGHEST_PROTOCOL) # Load sparse data sparse_fit1 = pk.load(open("sparse_fit1.pkl", 'rb')) sparse_fit2 = pk.load(open("sparse_fit2.pkl", 'rb')) sparse_fit3 = pk.load(open("sparse_fit3.pkl", 'rb')) sparse_fit4 = pk.load(open("sparse_fit4.pkl", 'rb')) sparse_fit1 = np.concatenate((sparse_fit1, sparse_fit2)) print(sparse_fit1.shape) sparse_fit2 = np.concatenate((sparse_fit3, sparse_fit4)) print(sparse_fit2.shape) sparse_fit = np.concatenate((sparse_fit1, sparse_fit2)) print(sparse_fit.shape) X = sparse_fit[:59478, :] print(X.shape) # Uses the dictionary learning class to transform the data atoms = DictionaryLearning(100, 1, 1000, 1e-8, 'lars', 'lasso_lars') #fit and transform data atoms.fit(X) print(atoms.components_.shape) # Pickle atoms save_object(atoms, 'atoms.pkl')
class SC(object): """ Wrapper for sklearn package. Performs sparse coding Sparse Coding, or Dictionary Learning has 5 methods: - fit(waveforms) update class instance with Sparse Coding fit - fit_transform() do what fit() does, but additionally return the projection onto new basis space - inverse_transform(A) inverses the decomposition, returns waveforms for an input A, using Z^\dagger - get_basis() returns the basis vectors Z^\dagger - get_params() returns metadata used for fits. """ def __init__(self, num_components=10, catalog_name='unknown', alpha = 0.001, transform_alpha = 0.01, max_iter = 2000, tol = 1e-9, n_jobs = 1, verbose = True, random_state = None): self._decomposition = 'Sparse Coding' self._num_components = num_components self._catalog_name = catalog_name self._alpha = alpha self._transform_alpha = 0.001 self._n_jobs = n_jobs self._random_state = random_state self._DL = DictionaryLearning(n_components=self._num_components, alpha = self._alpha, transform_alpha = self._transform_alpha, n_jobs = self._n_jobs, verbose = verbose, random_state = self._random_state) def fit(self,waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._DL.fit(self._waveforms) def fit_transform(self,waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._A = self._DL.fit_transform(self._waveforms) return self._A def inverse_transform(self,A): # convert basis back to waveforms using fit new_waveforms = self._DL.inverse_transform(A) return new_waveforms def get_params(self): # TODO know what catalog was used! (include waveform metadata) params = self._DL.get_params() params['num_components'] = params.pop('n_components') params['Decompositon'] = self._decomposition return params def get_basis(self): """ Return the SPCA basis vectors (Z^\dagger)""" return self._DL.components_
class SparseCoding: DEFAULT_MODEL_PARAMS = { 'n_components' : 10, 'n_features' : 64, 'max_iter' : 5, 'random_state' : 1, 'dict_init' : None, 'code_init' : None } def __init__(self, model_filename=None): if model_filename is not None: self.load_model(model_filename) else: # default model params self.n_components = SparseCoding.DEFAULT_MODEL_PARAMS['n_components'] self.n_features = SparseCoding.DEFAULT_MODEL_PARAMS['n_features'] self.max_iter = SparseCoding.DEFAULT_MODEL_PARAMS['max_iter'] self.random_state = SparseCoding.DEFAULT_MODEL_PARAMS['random_state'] self.dict_init = SparseCoding.DEFAULT_MODEL_PARAMS['dict_init'] self.code_init = SparseCoding.DEFAULT_MODEL_PARAMS['code_init'] # initialize Dictionary Learning object with default params and weights self.DL_obj = DictionaryLearning(n_components=self.n_components, alpha=1, max_iter=self.max_iter, tol=1e-08, fit_algorithm='lars', transform_algorithm='omp', transform_n_nonzero_coefs=None, transform_alpha=None, n_jobs=1, code_init=self.code_init, dict_init=self.dict_init, verbose=False, split_sign=False, random_state=self.random_state) def save_model(self, filename): # save DL object to file, compress is also to prevent multiple model files. joblib.dump(self.DL_obj, filename, compress=3) def load_model(self, filename): # load DL Object from file self.DL_obj = joblib.load(filename) # set certain model params as class attributes. Get values from DL Obj.get_params() or use default values. DL_params = self.DL_obj.get_params() for param in SparseCoding.DEFAULT_MODEL_PARAMS: if param in DL_params: setattr(self, param, DL_params[param]) else: setattr(self, param, SparseCoding.DEFAULT_MODEL_PARAMS[param]) def learn_dictionary(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim # learn dictionary self.DL_obj.fit(whitened_patches) def get_dictionary(self): try: return self.DL_obj.components_ except AttributeError: raise AttributeError("Feature extraction dictionary has not yet been learnt for this model. " \ + "Train the feature extraction model at least once to prevent this error.") def get_sparse_features(self, whitened_patches): # assert correct dimensionality of input data if whitened_patches.ndim == 3: whitened_patches = whitened_patches.reshape((whitened_patches.shape[0], -1)) assert whitened_patches.ndim == 2, "Whitened patches ndim is %d instead of 2" %whitened_patches.ndim try: sparse_code = self.DL_obj.transform(whitened_patches) except NotFittedError: raise NotFittedError("Feature extraction dictionary has not yet been learnt for this model, " \ + "therefore Sparse Codes cannot be extracted. Train the feature extraction model " \ + "at least once to prevent this error.") return sparse_code def get_sign_split_features(self, sparse_features): n_samples, n_components = sparse_features.shape sign_split_features = np.empty((n_samples, 2 * n_components)) sign_split_features[:, :n_components] = np.maximum(sparse_features, 0) sign_split_features[:, n_components:] = -np.minimum(sparse_features, 0) return sign_split_features def get_pooled_features(self, input_feature_map, filter_size=(19,19)): # assuming square filters and images filter_side = filter_size[0] # reshaping incoming features from 2d to 3d i.e. (3249,20) to (57,57,20) input_feature_map_shape = input_feature_map.shape if input_feature_map.ndim == 2: input_feature_map_side = int(np.sqrt(input_feature_map.shape[0])) input_feature_map = input_feature_map.reshape((input_feature_map_side, input_feature_map_side, input_feature_map_shape[-1])) assert input_feature_map.ndim == 3, "Input features dimension is %d instead of 3" %input_feature_map.ndim # get windows (57,57,20) to (3,3,1,19,19,20) input_feature_map_windows = view_as_windows(input_feature_map, window_shape=(filter_size[0], filter_size[1], input_feature_map.shape[-1]), step=filter_size[0]) # reshape windows (3,3,1,19,19,20) to (3**2, 19**2, 20) == (9, 361, 20) input_feature_map_windows = input_feature_map_windows.reshape((input_feature_map_windows.shape[0]**2, filter_size[0]**2, input_feature_map.shape[-1])) # calculate norms (9, 361, 20) to (9,361) input_feature_map_window_norms = np.linalg.norm(input_feature_map_windows, ord=2, axis=-1) # calculate indexes of max norms per window (9,361) to (9,1). One max index per window. max_norm_indexes = np.argmax(input_feature_map_window_norms, axis=-1) # max pooled features are the features that have max norm indexes (9, 361, 20) to (9,20). One max index per window. pooled_features = input_feature_map_windows[np.arange(input_feature_map_windows.shape[0]), max_norm_indexes] # return pooled feature map return pooled_features # Combined Pipeline def get_pooled_features_from_whitened_patches(self, whitened_patches): sparse_features = self.get_sparse_features(whitened_patches) sign_split_features = self.get_sign_split_features(sparse_features) pooled_features = self.get_pooled_features(sign_split_features) return pooled_features
from sklearn.linear_model import LogisticRegression from sklearn.pipeline import Pipeline def test_Pipeline(data): X_train,X_test,y_train,y_test = data steps = [('Linear_SVM',LinearSVC(C=1,penalty='l1',dual=False))] pipeline = Pipeline(steps) pipeline.fit(X_train,y_train) print('name steps : \n',pipeline.named_steps) print('Pipeline score : \n',pipeline.score(X_test,y_test)) data = load_digits() X = data.data y = data.target test_Pipeline(model_selection.train_test_split(X,y,test_size=0.25,stratify=y)) #字典学习 from sklearn.decomposition import DictionaryLearning X= [[1,2,3,4,5], [6,7,8,9,10], [10,9,8,7,6], [5,4,3,2,1]] dct = DictionaryLearning(n_components=3) dct.fit(X) dct.transform(X)
class SparseCoding(object): def __init__(self, n, transform_algorithm='lars'): self.n = n self.net = DictionaryLearning(n_components=n, alpha=0.8, max_iter=1000) self.net.set_params(transform_algorithm=transform_algorithm) def plot_B(self, B): plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(B[:self.n]): plt.subplot(10, 10, i + 1) plt.imshow(comp, cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from time series\n' + 'Train time %.1fs on %d patches' % (dt, len(data)), fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def _init(self): a = np.random.random((self.n, self.m)) b = np.random.random((self.T, self.n)) b /= sum(b) return a, b def init_weights(self, X_mat): B, A, recon = [], [], [] for app in X_mat: data = X_mat[app].reshape(1, -1) B_i = self.net.fit(data).components_ A_i = self.net.transform(data) X_hat = np.dot(A_i, B_i) B.append(B_i) A.append(A_i) recon.append(X_hat) print("MSE Error: ", np.mean((data - X_hat)**2)) return A, B, recon def DiscriminativeDisaggregation(self, appliances, B, A): x = np.array([appliances[app] for app in appliances]) x = x.T A_star = np.vstack(A) B_cat = np.hstack(B) change = 1 t = 0 print(A_star.shape) print(B_cat.shape) while t <= self.steps and self.epsilon <= change: B_cat_p = B_cat acts = self.F(x, B_cat, A=A_star) B_cat = (B_cat - self.alpha * ((x - B_cat.dot(acts)).dot(acts.T) - (x - B_cat.dot(A_star)).dot(A_star.T))) B_cat = self._pos_constraint(B_cat) B_cat /= sum(B_cat) t += 1 change = np.linalg.norm(B_cat - B_cat_p) print("Change is {} and step is {} ".format(change, t)) return B_cat def F(self, x, B, x_train=None, A=None, rp_tep=False, rp_gl=False): B = np.asarray(B) A = np.asarray(A) coder = SparseCoder(dictionary=B.T, transform_alpha=self.rp, transform_algorithm='lasso_cd') comps, acts = librosa.decompose.decompose(x, transformer=coder) acts = self._pos_constraint(acts) return acts def predict(self, A, B): print(A.shape) print(B.shape) return B.dot(A)
return feat def create_dictionary_dl(lmbd, d=2,m=100,n=20, N=10000,case=0,dir_mnist='/home/dujw/darse/save_exp/synthetic'): import os.path as osp fname = osp.join(dir_mnist, "D_synthetic_d{}_m{},n{},case{},lmbd{}.npy".format(d,m,n,case,lmbd)) if osp.exists(fname): D = np.load(fname) else: from sklearn.decomposition import DictionaryLearning aa = SyntheticProblemGenerator(d=d,m=m,n=n) X = aa.get_batch(N)[0] K = m dl = DictionaryLearning(K, alpha=lmbd*N, fit_algorithm='cd', n_jobs=-1, verbose=1) dl.fit(X) D = dl.components_.reshape(K, -1) np.save(fname, D) return D class synthetic_generate(object): def __init__(self,d,N=100000,m=100,n=20,case=0,save_exp="/home/dujw/darse/save_exp/synthetic"): self.N = N self.case = case self.m = m self.n = n self.d = d self.save_exp = save_exp self.phi = self.getPhi() self.x, self.y = self.getXY() def getXY(self): N = self.N
class DictionaryLearningMethod(BaseMethod): """Implement the dict learning method of the paper using sklearn.""" def __init__(self, width=24, stride=12, n_components=10, alpha=1, verbose=1, random_state=0, n_jobs=4, max_iter=1): self.width = width self.stride = stride self.n_components = n_components self.alpha = alpha self.verbose = verbose self.random_state = random_state self.n_jobs = n_jobs self.max_iter = max_iter self.estimator = DictionaryLearning( n_components=n_components, alpha=alpha, verbose=verbose, random_state=random_state, n_jobs=n_jobs, max_iter=max_iter, ) @staticmethod def window_split(X, s, w): """From a signal, create an array of overlapping windows.""" X = np.array(X).reshape(-1, 1) if w > X.shape[0]: raise ValueError(f'Window width bigger than signal size ({w}>{X.shape[0]}).') n_h = X.shape[0] c = int((n_h - w)/s + 1) Xs = [] for k in range(c): i = w + k*s x = X[i-w:i] Xs.append(x) return np.concatenate(Xs, axis=1) @staticmethod def window_merge(X_h, s): """From array of overlapping windows, reconstruct the original signal. Parameters: ----------- X_h : np.array of shape (w, c) Array of overlapping windows. s : int Stride Returns: -------- X : np.array of shape """ w, c = X_h.shape W = np.zeros((c, w+s*(c-1))) for i in range(c): W[i, i*s:i*s+w] = X_h[:, i] N = np.sum(W != 0, axis=0) x_hat = np.divide(np.sum(W, axis=0), N) return x_hat def fit(self, X, y=None): X_h = self.window_split(X, self.stride, self.width) self.estimator.fit(X_h.T) def transform_codes(self, X): X_h = self.window_split(X, self.stride, self.width) X_pred_codes = self.estimator.transform(X_h.T).T return X_pred_codes def codes_to_signal(self, X_codes): D = self.estimator.components_.T X_h = D@X_codes X = self.window_merge(X_h, self.stride) return X def transform(self, X): X_pred_codes = self.transform_codes(X) X_pred = self.codes_to_signal(X_pred_codes) return X_pred def get_atoms(self): return self.estimator.components_.T
n_components=n_components) D = D_fixed n_nonzero = 3 alpha = None algo = 'omp' color_1 = 'red' title = algo.upper() di = DictionaryLearning(n_components=n_components, fit_algorithm='cd', transform_algorithm='lasso_cd', positive_code=True, positive_dict=True) di.fit(comp_matrix) d = di.transform(comp_matrix) coder_1 = SparseCoder(dictionary=d.T, transform_n_nonzero_coefs=n_nonzero, transform_alpha=alpha, transform_algorithm=algo) comps, acts = librosa.decompose.decompose(comp_matrix, transformer=coder_1) plt.plot(comp_matrix[0, :], color='black', lw=2, linestyle='--', label='Original signal',
class SC(object): """ Wrapper for sklearn package. Performs sparse coding Sparse Coding, or Dictionary Learning has 5 methods: - fit(waveforms) update class instance with Sparse Coding fit - fit_transform() do what fit() does, but additionally return the projection onto new basis space - inverse_transform(A) inverses the decomposition, returns waveforms for an input A, using Z^\dagger - get_basis() returns the basis vectors Z^\dagger - get_params() returns metadata used for fits. """ def __init__(self, num_components=10, catalog_name='unknown', alpha=0.001, transform_alpha=0.01, max_iter=2000, tol=1e-9, n_jobs=1, verbose=True, random_state=None): self._decomposition = 'Sparse Coding' self._num_components = num_components self._catalog_name = catalog_name self._alpha = alpha self._transform_alpha = 0.001 self._n_jobs = n_jobs self._random_state = random_state self._DL = DictionaryLearning(n_components=self._num_components, alpha=self._alpha, transform_alpha=self._transform_alpha, n_jobs=self._n_jobs, verbose=verbose, random_state=self._random_state) def fit(self, waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._DL.fit(self._waveforms) def fit_transform(self, waveforms): # TODO make sure there are more columns than rows (transpose if not) # normalize waveforms self._waveforms = waveforms self._A = self._DL.fit_transform(self._waveforms) return self._A def inverse_transform(self, A): # convert basis back to waveforms using fit new_waveforms = self._DL.inverse_transform(A) return new_waveforms def get_params(self): # TODO know what catalog was used! (include waveform metadata) params = self._DL.get_params() params['num_components'] = params.pop('n_components') params['Decompositon'] = self._decomposition return params def get_basis(self): """ Return the SPCA basis vectors (Z^\dagger)""" return self._DL.components_
Ymean = Ynoisy.mean(axis=0) Ynoisy = Ynoisy - numpy.tile(Ymean, [Ynoisy.shape[0], 1]) # Select sample patches for training ch = numpy.random.permutation(Ynoisy.shape[1])[:N] Y = Ynoisy[:, ch].T print(Y.shape) # Training dictionary from sklearn.decomposition import DictionaryLearning dico = DictionaryLearning(n, transform_algorithm='omp', alpha=s, random_state=0, verbose=False) dico.fit(Y) # Testing the validity of the sparse representation Xt = dico.transform(Y) print(Xt.shape) numpy.testing.assert_array_almost_equal(numpy.dot(Xt, dico.components_), Y, decimal=1) # Generating sparse representation for entire image Xc = dico.transform(Ynoisy.T) print(Xc.T.shape) # D * X A = numpy.dot(Xc, dico.components_).T # Inverse centering, image restoration and output
nmfHOG = NMF(n_components=components) nmfHOF = NMF(n_components=components) nmfHOG.fit(np.array([x['hog'] for x in features]).T) nmfHOF.fit(np.array([x['hof'] for x in features]).T) hogComponents = icaHOG.components_.T hofComponents = icaHOF.components_.T return hogComponents, hofComponents if 0: from sklearn.decomposition import DictionaryLearning dicHOG = DictionaryLearning(25) dicHOG.fit(hogs) def displayComponents(components): sides = ceil(np.sqrt(len(components))) for i in range(len(components)): subplot(sides, sides, i+1) imshow(hog2image(components[i], imageSize=[24,24],orientations=4)) sides = ceil(np.sqrt(components.shape[1])) for i in range(components.shape[1]): subplot(sides, sides, i+1) imshow(hog2image(components[:,i], imageSize=[24,24],orientations=4))
def main(): start = time.time() args = sys.argv target = args[1] sub = args[2] threshold = args[3] dimention = int(args[4]) shift = int(args[5]) sample = int(args[6]) print('target : {}'.format(target)) print('subject : {}'.format(sub)) print('{} secずらし'.format(shift)) #脳活動データ読み込み with open( '../data/Brain/' + target + '/' + sub + '_train_reduced_' + threshold + '.pickle', 'rb') as f: brain_data = pickle.load(f) #意味表象データ読み込み with open('../data/srm/' + target + '_srm300_train.pickle', 'rb') as f: semantic_data = pickle.load(f) #時間差を考慮した意味表象行列取得 brain_data, semantic_data = get_time_shift_data(brain_data, semantic_data, target, sub, shift) print('brain sample : {}'.format(len(brain_data))) print('semantic_data : {}'.format(len(semantic_data))) #2つを結合した合成行列を作成 brainw2vdata = np.c_[brain_data, semantic_data] brainw2vdata = np.array(brainw2vdata) brainw2vdata = brainw2vdata[::sample] print("次元:") print(brainw2vdata.shape) #辞書学習 dict_model = DictionaryLearning(n_components=dimention, alpha=1.0, transform_algorithm='lasso_lars', transform_alpha=1.0, fit_algorithm='lars', verbose=True) dict_model.fit(brainw2vdata) #辞書 Dict = dict_model.components_ print("辞書:") print(Dict.shape) #係数 coef = dict_model.transform(brainw2vdata) print("係数:") print(coef.shape) #辞書保存 f = open( "../data/Dict/" + target + "/Dict_" + sub + "_pred" + threshold + "_base" + str(dimention) + "_sec" + str(shift) + "_sample" + str(sample) + ".pickle", "wb") pickle.dump(Dict, f) f.close() #係数保存 f = open( "../data/Dict/" + target + "/Coef_" + sub + "_pred" + threshold + "_base" + str(dimention) + "_sec" + str(shift) + "_sample" + str(sample) + ".pickle", "wb") pickle.dump(coef, f) f.close() #計算時間出力 elapsed_time = time.time() - start print(("elapsed_time:{0}".format(elapsed_time)) + "[sec]")
from matplotlib import pyplot as plt import pandas as pd def decode_image(image): decoded_image = np.empty((256, 256)) for i in range(1024): r = i % 32 c = i // 32 decoded_image[r * 8:r * 8 + 8, c * 8:c * 8 + 8] = image[i].reshape([8, 8], order="F") return decoded_image if __name__ == "__main__": MatPatchedImage = scipy.io.loadmat( r"C:\Users\ktmks\Documents\Dic_ler1\ver1_02\mono\PatchData.mat") PatchData = np.array(MatPatchedImage["PatchData"]).T decoded_image = decode_image(PatchData[:1024, :]) dico = DictionaryLearning( n_components=128, transform_n_nonzero_coefs=8, verbose=True, max_iter=1000, ) Dict = dico.fit(PatchData) print("Hello")
def test_dict_learning_unknown_fit_algorithm(): n_components = 5 dico = DictionaryLearning(n_components, fit_algorithm='<unknown>') with pytest.raises(ValueError): dico.fit(X)
import pandas as pd import dill N_COMPONENTS = 500 TRANSFORM_N_NONZERO_COEFS = 10 VERBOSE = True MAX_ITER = 10 MatBrainImage=scipy.io.loadmat(r"C:\Users\ktmks\Documents\research\tmp_results\for_python_data\brain_f_data.mat") label=MatBrainImage["label"] Y=MatBrainImage["data"] dic=DictionaryLearning(n_components = N_COMPONENTS, transform_n_nonzero_coefs = TRANSFORM_N_NONZERO_COEFS, verbose = VERBOSE, max_iter = MAX_ITER ) dic.fit(Y) D=dic.components_ X=dic.transform(Y) Y_=np.dot(X,D) filepath = r"C:\Users\ktmks\Documents\research\Python\Brain_DL"+"\\" filename = "res_"+"AtomN-" + str(N_COMPONENTS)\ +"_SparseDegree-" + str(TRANSFORM_N_NONZERO_COEFS)\ +"_MaxIter-" + str(MAX_ITER) save_filename=filepath+filename+".pkl" dill.dump_session(save_filename) scipy.io.savemat(filename+".mat",{"D":D,"X":X,"Y_":Y_,"label":label})