def test_sparse_encode_input(): n_components = 100 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] Xf = check_array(X, order='F') for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'): a = sparse_encode(X, V, algorithm=algo) b = sparse_encode(Xf, V, algorithm=algo) assert_array_almost_equal(a, b)
def test_with_sparse_code(components=np.loadtxt('components_of_convfeat.txt')): (X_train, y_train), (X_test, y_test) = util.load_feat_vec() X_train_codes = np.loadtxt('sparse_codes_of_convfeat.txt') clf = LogisticRegression(penalty='l1', multi_class='ovr') clf.fit(X_train_codes, y_train) X_test_codes = sparse_encode(X_test, components) print "mean accuracy", clf.score(X_test_codes, y_test)
def test_sparse_encode_error(): n_components = 12 V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] code = sparse_encode(X, V, alpha=0.001) assert_true(not np.all(code == 0)) assert_less(np.sqrt(np.sum((np.dot(code, V) - X) ** 2)), 0.1)
def run(dimension,raw_data_dir,out_dir): with open('{}/filename.list'.format(raw_data_dir), 'r') as fp: filenames = fp.read().splitlines() sensor_data = list() for filename in filenames: path = '{}/{}'.format(raw_data_dir, filename) with Timer('open {} with ALL sensors'.format(filename)): #data = np.genfromtxt(path, usecols=range(1,49) data = np.genfromtxt(path, usecols=[1, 4, 13, 16, 18, 26, 31, 32, 37, 38, 39, 40, 9, 11, 22, 23, 41, 10, 12, 24, 25, 29, 30, 42, 43, 44] , delimiter=',').tolist() print "# of data:", len(data) sensor_data.extend(data) with Timer('Sparse Coding...'): print "# of ALL data as a whole:", len(sensor_data) dl = sparse_coding(dimension, sensor_data,out_dir, 1, 10000, 0.00001) with open('{}/atoms'.format(out_dir), "w") as op: for component in dl.components_: line = ', '.join(str(e) for e in component) op.write( line + '\n') code = sparse_encode(input_x, dl.components_) with open('{}/codes'.format(out_dir), "w") as op: for coefficient in code: line = ', '.join(str(e) for e in coefficient) op.write( line + '\n') with open('{}/filename.list'.format(raw_data_dir), 'r') as fp: filenames = fp.read().splitlines()
def test_sparse_encode_shapes(): n_components = 12 V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] for algo in ('lasso_lars', 'lasso_cd', 'lars', 'omp', 'threshold'): code = sparse_encode(X, V, algorithm=algo) assert_equal(code.shape, (n_samples, n_components))
def test_sparse_encode_positivity(positive): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] for algo in ('lasso_lars', 'lasso_cd', 'lars', 'threshold'): code = sparse_encode(X, V, algorithm=algo, positive=positive) if positive: assert_true((code >= 0).all()) else: assert_true((code < 0).any()) try: sparse_encode(X, V, algorithm='omp', positive=positive) except ValueError: if not positive: raise
def test_sparse_encode_shapes_omp(): rng = np.random.RandomState(0) algorithms = ['omp', 'lasso_lars', 'lasso_cd', 'lars', 'threshold'] for n_components, n_samples in itertools.product([1, 5], [1, 9]): X_ = rng.randn(n_samples, n_features) dictionary = rng.randn(n_components, n_features) for algorithm, n_jobs in itertools.product(algorithms, [1, 3]): code = sparse_encode(X_, dictionary, algorithm=algorithm, n_jobs=n_jobs) assert_equal(code.shape, (n_samples, n_components))
def to_sparse(X,dim): sparse_dict = MiniBatchDictionaryLearning(dim) sparse_dict.fit(X) sparse_vectors = sparse_encode(X, sparse_dict.components_) for i in sparse_vectors: print i return sparse_vectors
def predict(self, imgs, neuron_idx=None, penalty_lambda=None, algorithm=None): """ get neuron response to images Parameters ---------- imgs Returns ------- """ imgs_array = make_2d_input_matrix(imgs) if neuron_idx is None: dict_to_use = self.w else: dict_to_use = self.w[neuron_idx:(neuron_idx + 1), :] if penalty_lambda is None: _lambda = self._lambda else: _lambda = penalty_lambda assert np.isscalar(_lambda) if algorithm is None: _algorithm = self.algorithm else: _algorithm = algorithm # let's call sparse encoder to do it! # no scaling at all! # having /nsample in objective function is exactly the same as sovling each problem separately. # the underlying function called is elastic net, and that function fits each column of y separately. # each column of y is each stimulus. This is because when passing imgs_array and dict_to_use to Elastic Net, # they are transposed. That is, y = imgs_array.T # # in the code there's also a subtle detail, where alpha is divided by number of pixels in each stimulus. # I haven't figured that out, but seems that's simply a detail for using ElasticNet to do this. if _algorithm in ['lasso_lars', 'lasso_cd']: response = sparse_encode(imgs_array, dict_to_use, alpha=_lambda, algorithm=_algorithm, max_iter=10000) else: assert _algorithm == 'spams' #print(imgs_array.dtype, dict_to_use.dtype, _lambda.shape) response = lasso(np.asfortranarray(imgs_array.T), D=np.asfortranarray(dict_to_use.T), lambda1=_lambda, mode=2) response = response.T.toarray() # because lasso returns sparse matrix... # this can be used for debugging, for comparison with SPAMS. # notice here I give per sample cost. self.last_cost_recon = 0.5 * np.sum((imgs_array - np.dot(response, dict_to_use)) ** 2, axis=1) self.last_cost_sparsity = _lambda * np.abs(response).sum(axis=1) assert self.last_cost_sparsity.shape == (imgs_array.shape[0], ) assert self.last_cost_recon.shape == (imgs_array.shape[0],) self.last_cost = np.mean(self.last_cost_recon + self.last_cost_sparsity) return response
def test_sparse_encode(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) _, dictionary, _ = decomposition.dict_learning(iris.data, 2, 1, random_state=self.random_state) result = df.decomposition.sparse_encode(dictionary) expected = decomposition.sparse_encode(iris.data, dictionary) self.assertIsInstance(result, pdml.ModelFrame) tm.assert_index_equal(result.index, df.data.index) self.assert_numpy_array_almost_equal(result.values, expected)
def sparse_coding(n_atom, input_x, out_dir): dictionary = get_dictionary(n_atom, input_x) code = sparse_encode(input_x, dictionary) np.set_printoptions(precision=3, suppress=True) #print code #print dictionary with open('{}/atoms'.format(out_dir), "w") as op: for component in dictionary: line = ', '.join(str(round(e,3)) for e in component) op.write( line + '\n') with open('{}/codes'.format(out_dir), "w") as op: for coefficient in code: line = ', '.join(str(round(e,3)) for e in coefficient) op.write( line + '\n') return code
def learning_sparse_coding(X, components=None): """ http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.DictionaryLearning.html http://scikit-learn.org/stable/modules/generated/sklearn.decomposition.sparse_encode.html """ if components is None: print('Learning the dictionary...') t0 = time() diclearner = MiniBatchDictionaryLearning(n_components=100, verbose=True) components = diclearner.fit(X).components_ np.savetxt('components_of_convfeat.txt', components) dt = time() - t0 print('done in %.2fs.' % dt) codes = sparse_encode(X, components) np.savetxt('sparse_codes_of_convfeat.txt', codes)
def test_dict_learning_online_partial_fit(): # this test was not actually passing before! raise SkipTest n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] dico1 = MiniBatchDictionaryLearning(n_components, n_iter=10, batch_size=1, shuffle=False, dict_init=V, random_state=0).fit(X) dico2 = MiniBatchDictionaryLearning(n_components, n_iter=1, dict_init=V, random_state=0) for ii, sample in enumerate(X): dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter) # if ii == 1: break assert_true(not np.all(sparse_encode(X, dico1.components_, alpha=100) == 0)) assert_array_equal(dico1.components_, dico2.components_)
def test_dict_learning_online_partial_fit(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V ** 2, axis=1)[:, np.newaxis] dict1 = MiniBatchDictionaryLearning(n_components, n_iter=10 * len(X), batch_size=1, alpha=1, shuffle=False, dict_init=V, random_state=0).fit(X) dict2 = MiniBatchDictionaryLearning(n_components, alpha=1, n_iter=1, dict_init=V, random_state=0) for i in range(10): for sample in X: dict2.partial_fit(sample[np.newaxis, :]) assert not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0) assert_array_almost_equal(dict1.components_, dict2.components_, decimal=2)
def gabor_encode(self): patches = extract_patches_2d( self.img, (self.patch_size, self.patch_size) ) patches = patches.reshape(patches.shape[0], -1) # code = sparse_encode(patches, self.kernels, algorithm='threshold', alpha=1) code = sparse_encode( patches, self.kernels, algorithm='lars', n_nonzero_coefs=2) idx = np.std(code, axis=1) > 0.3 selected_patches = patches #[idx] selected_code = code #[idx] min_code, max_code = np.min(selected_code), np.max(selected_code) # print selected_patches c = 0 s = 21 for i in xrange(selected_code.shape[0]): print i plt.subplot(s, s * 2, c) plt.xticks(()) plt.gca().set_ylim([min_code, max_code]) plt.yticks(()) plt.plot(selected_code[i]) c += 1 plt.subplot(s, s * 2, c) plt.xticks(()) plt.yticks(()) plt.imshow(selected_patches[i].reshape( self.patch_size, self.patch_size), cmap='gray', interpolation='none') c += 1 plt.show() orientations = np.argmax(code, axis=1) activations = np.std(code, axis=1) orientations[activations < self.activation_threshold] = self.zero_value # blank_batches = np.ones((patches.shape[0], self.patch_size, self.patch_size)) * orientations[:, None, None] # recon = reconstruct_from_patches_2d(blank_batches, (self.img_height, self.img_width)) # return recon return orientations.reshape(self.map_height, self.map_width)
def FindTopSCV(k, dic, Fout2, prompt): sh = (Fout2.shape[0], Fout2.shape[1], k, 3) cplist = np.zeros(sh) if prompt == 'SP': for j in range(Fout2.shape[1]): for i in range(Fout2.shape[0]): y = np.reshape(Fout2[i,j], (Fout2.shape[2],1)) if y.all() == 0: p = np.zeros(k) p[0] = 1 lc = np.zeros((k,2)) lc[0,:] = [20,20] else: try: x_hat = CSRec_SP(k, dic, y) (p,lc) = prob(k, x_hat, Fout2.shape[0]) except: p = np.zeros(k) p[0] = 1 lc = np.zeros((k,2)) lc[0,:] = [20,20] cplist[i,j,:,0] = p cplist[i,j,:,1:3] = lc print (i,j) elif prompt == 'OMP': for j in range(Fout2.shape[1]): for i in range(Fout2.shape[0]): y = np.reshape(Fout2[i,j], (Fout2.shape[2],1)) # X = code * dic y = np.reshape(Fout2,(Fout2.shape[0]*Fout2.shape[1],Fout2.shape[2])) x_hat = sparse_encode(X = y, dictionary=dic.transpose(), n_nonzero_coefs=k) x_hat = x_hat.transpose() (p,lc) = prob(k, x_hat, Fout2.shape[0]) cplist[i,j,:,0] = p cplist[i,j,:,1:3] = lc print (i,j) return cplist
def fft_handler(*args): global current_note_fft print(len(current_note_fft)) fft = args[1].split() fft = np.array([float(i) for i in fft]) n = normalize_vector(fft.reshape(1, -1))[0] if n is None: return current_note_fft += [n] if len(current_note_fft) == 10: s = sparse_encode(n.reshape(1, -1), data_per_fret, algorithm='lars', n_nonzero_coefs=NONZERO_COEFS) s = s[0] a = np.argsort(s) coeffs = [s[i] for i in a[-NONZERO_COEFS:]] coeffs = normalize_vector(np.array(coeffs))[0] pitches = [guitar_notes[i] for i in a[-NONZERO_COEFS:]] print(pitches) print(coeffs) d = get_relevant_pitches(pitches, coeffs) sendMIDI_out(d)
def test_dict_learning_online_partial_fit(): # this test was not actually passing before! raise SkipTest("Online dict-learning test fails.") n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V**2, axis=1)[:, np.newaxis] dico1 = MiniBatchDictionaryLearning(n_components, n_iter=10, batch_size=1, shuffle=False, dict_init=V, random_state=0).fit(X) dico2 = MiniBatchDictionaryLearning(n_components, n_iter=1, dict_init=V, random_state=0) for ii, sample in enumerate(X): dico2.partial_fit(sample, iter_offset=ii * dico2.n_iter) # if ii == 1: break assert_true(not np.all( sparse_encode(X, dico1.components_, alpha=100) == 0)) assert_array_equal(dico1.components_, dico2.components_)
def test_dict_learning_online_partial_fit(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init V /= np.sum(V**2, axis=1)[:, np.newaxis] dict1 = MiniBatchDictionaryLearning( n_components, n_iter=10 * len(X), batch_size=1, alpha=1, shuffle=False, dict_init=V, random_state=0, ).fit(X) dict2 = MiniBatchDictionaryLearning( n_components, alpha=1, n_iter=1, dict_init=V, random_state=0 ) for i in range(10): for sample in X: dict2.partial_fit(sample[np.newaxis, :]) assert not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0) assert_array_almost_equal(dict1.components_, dict2.components_, decimal=2)
print "X_train.shape", train_X.shape print "Components shape", dl.components_.shape # components = dl.components().reshape((n_components, n_features)) components = dl.components_ # Visualizing the components as images component_titles = ["component %d" % i for i in range(components.shape[0])] plot_gallery("Visualizing top components", components, component_titles, w, h, n_row=n_components / 10, n_col=10) plt.show() ############################################################################### # Sparse Encoding print("\nSparse Encoding") train_X_pca = np.zeros((len(train_X), n_components)) train_X_pca = sparse_encode(train_X[0:10], components, alpha=10, algorithm='omp') np.set_printoptions(precision=3, suppress=True) print train_X_pca # for i in range(len(train_X)): # train_X_pca[i] = dl.transform(train_X[i]) test_X_pca = np.zeros((len(test_X), n_components)) test_X_pca = sparse_encode(test_X[0:10], components, alpha=10, algorithm='omp') # for i in range(len(test_X)): # test_X_pca[i] = dl.transform(test_X[i]) print "train_X_pca.shape", train_X_pca.shape ############################################################################### # Visualize reconstructed images reconstructed_X = np.zeros((20, n_features))
plt.plot(trajectory['x'], trajectory['y']) trajectory['x'] = [] trajectory['y'] = [] plt.show() alpha_schedule = [.2 / 5000., .5 / 5000., 1. / 5000., 2. / 5000., 5. / 5000.] assert num_trajectories == len(trajectories) for j, alpha in enumerate(alpha_schedule): print 'j = ', j, '; alpha = ', alpha from sklearn.decomposition import sparse_encode print 'running SC ', j HS = sparse_encode(model.W.get_value(), X.T, alpha=alpha, algorithm='lasso_cd').T assert HS.shape == (5000, 1600) print 'done encoding' HS = np.abs(HS) if np.any(np.isnan(HS)): print 'has nans' if np.any(np.isinf(HS)): print 'has infs' print 'HS shape ', HS.shape print 'HS subtensor shape ', HS[0:num_trajectories].shape act_prob = (HS[:, 0:num_trajectories] > .01).mean(axis=0)
def learn_representation_for_labeled_data(labeled_examples, dictionary, max_iter): return sparse_encode(labeled_examples, dictionary, max_iter=max_iter)
def test_sparse_encode(self): """Test the sparse encode using admm behaves like sklearn's sparse_encode. After testing, we found that the order of the equations is reversed. Here is the problem that sparse_encode tries to solve: (C^*,) = argmin 0.5 || X - C D ||_2^2 + gamma * || C ||_1 (C) And here is the one that lasso_admm tries to solve (C^*,) = argmin 0.5 || X - D C ||_2^2 + gamma * || C ||_1 (C) Where D is the dictionary The best way to compare them is to transpose EVERYTHING: X = C D and X_T = D_T C_T """ from sklearn.decomposition import sparse_encode alpha = 1 n_components = 6 X = np.array([[ 1.76405235, 0.40015721, 0.97873798, 2.2408932, 1.86755799, -0.97727788, 0.95008842, -0.15135721 ], [ -0.10321885, 0.4105985, 0.14404357, 1.45427351, 0.76103773, 0.12167502, 0.44386323, 0.33367433 ], [ 1.49407907, -0.20515826, 0.3130677, -0.85409574, -2.55298982, 0.6536186, 0.8644362, -0.74216502 ], [ 2.26975462, -1.45436567, 0.04575852, -0.18718385, 1.53277921, 1.46935877, 0.15494743, 0.37816252 ], [ -0.88778575, -1.98079647, -0.34791215, 0.15634897, 1.23029068, 1.20237985, -0.38732682, -0.30230275 ], [ -1.04855297, -1.42001794, -1.70627019, 1.9507754, -0.50965218, -0.4380743, -1.25279536, 0.77749036 ], [ -1.61389785, -0.21274028, -0.89546656, 0.3869025, -0.51080514, -1.18063218, -0.02818223, 0.42833187 ], [ 0.06651722, 0.3024719, -0.63432209, -0.36274117, -0.67246045, -0.35955316, -0.81314628, -1.7262826 ], [ 0.17742614, -0.40178094, -1.63019835, 0.46278226, -0.90729836, 0.0519454, 0.72909056, 0.12898291 ], [ 1.13940068, -1.23482582, 0.40234164, -0.68481009, -0.87079715, -0.57884966, -0.31155253, 0.05616534 ]]) # start with sensible defaults dictionary = init_dictionary(X, n_components=n_components) code_sklearn = sparse_encode(X, dictionary, alpha=alpha) code_admm_T, costs = lasso_admm(X.T, dictionary.T, gamma=alpha) code_admm = code_admm_T.T # Compare the costs with svd. cost_sklearn = lasso_cost(X.T, dictionary.T, code_sklearn.T, alpha) cost_admm = lasso_cost(X.T, dictionary.T, code_admm.T, alpha) # Make sure admm is better than lars np.testing.assert_array_almost_equal(code_admm, code_sklearn) np.testing.assert_almost_equal(cost_admm, cost_sklearn)
def bow_feature_extract(self, path): des = self.raw_feature_extract(path) out = sum(sparse_encode(des, self.mbdl.components_)) out = np.array([out]) return out
def transform(self, sample): return sparse_encode(sample.T, self.dictionary.T, algorithm='omp', n_nonzero_coefs=self.n_nonzero).T
fmri_masked = fmri_masked[:, np.all(fmri_masked != 0, axis=0)] # DEFINING features and targets features = fmri_masked targets = target_int # Dictionary Learning on Target sparse_components = 200 dict_sparse = DictionaryLearning(alpha=1, n_components=sparse_components, max_iter=3, verbose=3) dict_sparse.fit(features) Dt_0 = dict_sparse.components_ Rt_0 = sparse_encode(features, dictionary=Dt_0) # Dictionary Learning on Source iter 2 sparse_components = 300 dict_sparse = MiniBatchDictionaryLearning(alpha=1, n_components=sparse_components, verbose=3, batch_size=10, n_iter=200) dict_sparse.fit(Rs_0) Ds_1 = dict_sparse.components_ #Rs_1 = sparse_encode(Rs_0,dictionary=Ds_1) Rt_1 = sparse_encode(Rt_0, dictionary=Ds_1) run_range = range(12) feat_range = range(756)
print "X_train.shape", train_X.shape print "Components shape", dl.components_.shape # components = dl.components().reshape((n_components, n_features)) components = dl.components_ # Visualizing the components as images component_titles = ["%d" % i for i in range(components.shape[0])] plot_gallery("Visualizing top components", components, w, h, n_row=n_components / 10, n_col=10) plt.show() ############################################################################### # Sparse Encoding print("\nSparse Encoding") train_X_sc = np.zeros((10, n_components)) train_X_sc = sparse_encode(train_X, components, algorithm='lars') np.set_printoptions(precision=1, suppress=False, linewidth=800) test_X_sc = np.zeros((len(test_X), n_components)) test_X_sc = sparse_encode(test_X, components, algorithm='lars') print "train_X_sc.shape", train_X_sc.shape ############################################################################### # Visualize reconstructed images reconstructed_X = np.zeros((20, n_features)) reconstructed_X_idx = np.random.choice(np.arange(len(reconstructed_X)), size=10, replace=False) reconstructed_X[reconstructed_X_idx] = train_X[reconstructed_X_idx] reconstructed_X[reconstructed_X_idx] = np.dot(train_X_sc[reconstructed_X_idx], components) print "reconstructed_X.shape", reconstructed_X.shape
a = sd.getData(256, return_decoded = True) axmin = np.min(a[0])-0.1 axmax = np.max(a[0])+0.1 for i in range(10): ax1.plot(np.linspace(start = 0, stop = sd.num_features, num = sd.num_features),a[0][a[1]==0][i].reshape(-1)) ax1.set_ylim([axmin, axmax]) print(a[0][a[1]==1].shape) for i in range(10): ax2.plot(np.linspace(start = 0, stop = sd.num_features, num = sd.num_features),a[0][a[1]==1][i].reshape(-1)) ax2.set_ylim([axmin, axmax]) plt.show() ### recoded = sparse_encode(X=decoded, dictionary=dictionary, n_nonzero_coefs= 20, alpha = 0.001) print("recoded.shape", recoded.shape) for i in range(num_datapoints): print("code/recode:") for k in range(num_codewords): print(round(codes[i,k],3), " ", round(recoded[i,k],3)) ### #def generate_codes(length):
# scipy.io.savemat('/home/jonny2/PycharmProjects/ML-algorithms/Projects/GWAS-SparseCoding/psychiatric.mat', # mdict={'tr_dat': X_train, 'tt_dat': X_test, 'trls': y_train, 'ttls': y_test}) ############################################################################### # Sparse Representation n_components = 25 # dl = DictionaryLearning(n_components, max_iter=15, n_jobs=4, verbose=2) dl = KSVDSparseCoding(n_components, max_iter=5, verbose=1, approx=True) dl.fit(X_s) eigenfaces = dl.components_.T print("Projecting the input data on the learned dictionary bases") X_train_pca = sparse_encode(X_train, eigenfaces, algorithm='lasso_lars') X_test_pca = sparse_encode(X_test, eigenfaces, algorithm='lasso_lars') print "X_train_pca.shape", X_train_pca.shape print "X_test_pca.shape", X_test_pca.shape ############################################################################### # Train a SVM classification model print("Fitting the classifier to the training set") param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],} clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid) clf = clf.fit(X_train_pca, y_train) print("Best estimator found by grid search:") print(clf.best_estimator_)
import numpy as np from sklearn.decomposition import sparse_encode HS = sparse_encode( np.random.randn(108,1600), np.random.randn(108,5000), alpha = 1./5000., algorithm='lasso_lars').T
def plot_reconstruction_detail(shape, dictionary, n_components, scaled=False, algorithm='omp', sorted=True, show_points=False, show_error=True, figsize=None): # Compute reconstruction for given shape xy = len(shape) // 2 if algorithm in ('omp', 'lars'): coefs = sparse_encode(shape[np.newaxis, :], dictionary, algorithm=algorithm, n_nonzero_coefs=n_components, ) recons = np.dot(coefs, dictionary)[0] elif algorithm == 'pca': if not isinstance(dictionary, PCA): raise ValueError('Must pass PCA object for PCA algorithm.') pca = dictionary dictionary = pca.components_ X = shape[np.newaxis, :] - pca.mean_ coefs = np.dot(X, dictionary[:n_components].T) recons = np.dot(coefs, dictionary[:n_components])[0] + pca.mean_ error = np.sum((shape - recons) ** 2) # Prepare plotting if figsize is None: figsize = ((5 * n_components), 8) fig = plt.figure(figsize=figsize) markers = {'recons': '-o' if show_points else '-', 'shapes': '--x' if show_points else '--'} xlim = 1.1 * shape[:xy].min(), 1.1 * shape[:xy].max() ylim = 1.1 * shape[xy:].min(), 1.1 * shape[xy:].max() # # Plot the reconstruction along the initial shape # plt.subplot(2, n_components+1, 1) # plt.plot(shape[:xy], shape[xy:], markers['shapes'], c='C0', lw=1.0) # plt.plot(recons[:xy], recons[xy:], markers['recons'], c='C1', lw=1.5) # plt.xlim(xlim); plt.ylim(ylim); # plt.tick_params(axis='both', bottom=False, labelbottom=False, # left=False, labelleft=False,) # plt.title('Error = {:.4f}'.format(error), fontsize=18) # Plot the components sorted by coefficient values # as well as the cumulative sum argsort = np.argsort(-np.abs(coefs[0])) if sorted else np.arange(n_components) assert len(np.where(coefs != 0)[0]) == n_components cumsum = np.zeros_like(shape) if algorithm == 'pca': cumsum += pca.mean_ for i in range(n_components): coef = coefs[0][argsort][i] comp = dictionary[argsort][i] prevsum = cumsum cumsum = cumsum + coef * comp error = np.sum((shape - cumsum) ** 2) if scaled: comp = coef * comp # if algorithm == 'pca': # comp += pca.mean_ # plt.subplot(2, n_components+1, 2+i) # plt.plot(comp[:xy], comp[xy:], markers['recons'], # c='C{}'.format((i+2)%10), lw=1.5) # plt.xlim(xlim); plt.ylim(ylim) # plt.tick_params(axis='both', bottom=False, labelbottom=False, # left=False, labelleft=False,) # plt.title('{:.4f}'.format(coef), fontsize=18) plt.subplot(2, n_components, 1 + i) plt.plot(comp[:xy], comp[xy:], markers['recons'], c='C{}'.format((i + 2) % 10), lw=1.5) # loop plt.plot(np.array([comp[:xy][0], comp[:xy][-1]]), np.array([comp[xy:][0], comp[xy:][-1]]), markers['recons'], c='C{}'.format((i + 2) % 10), lw=1.5) plt.xlim(xlim); plt.ylim(ylim) plt.tick_params(axis='both', bottom=False, labelbottom=False, left=False, labelleft=False, ) plt.title('Coefficient = {:.2f}'.format(coef), fontsize=36) # plt.subplot(2, n_components+1, (n_components+3)+i) # plt.plot(prevsum[:xy], prevsum[xy:], markers['shapes'], lw=1.0) # plt.plot(cumsum[:xy], cumsum[xy:], markers['recons'], lw=1.5) # plt.xlim(xlim); plt.ylim(ylim) # plt.tick_params(axis='both', bottom=False, labelbottom=False, # left=False, labelleft=False,) plt.subplot(2, n_components, n_components + 1 + i) plt.plot(shape[:xy], shape[xy:], markers['shapes'], c='C0', lw=1.0) # loop plt.plot(np.array([shape[:xy][0], shape[:xy][-1]]), np.array([shape[xy:][0], shape[xy:][-1]]), markers['shapes'], c='C0', lw=1.0) plt.plot(cumsum[:xy], cumsum[xy:], markers['recons'], c='C1', lw=1.5) plt.plot(np.array([cumsum[:xy][0], cumsum[:xy][-1]]), np.array([cumsum[xy:][0], cumsum[xy:][-1]]), markers['recons'], c='C1', lw=1.5) plt.xlim(xlim); plt.ylim(ylim) plt.tick_params(axis='both', bottom=False, labelbottom=False, left=False, labelleft=False, ) plt.title('Error = {:.2f}'.format(error), fontsize=36) return fig
def active_support_elastic_net(X, y, alpha, tau=1.0, algorithm='spams', support_init='knn', support_size=100, maxiter=40): """An active support based algorithm for solving the elastic net optimization problem min_{c} tau ||c||_1 + (1-tau)/2 ||c||_2^2 + alpha / 2 ||y - c X ||_2^2. Parameters ----------- X : array-like, shape (n_samples, n_features) y : array-like, shape (1, n_features) alpha : float tau : float, default 1.0 algorithm : string, default ``spams`` Algorithm for computing solving the subproblems. Either lasso_lars or lasso_cd or spams (installation of spams package is required). Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1. support_init: string, default ``knn`` This determines how the active support is initialized. It can be either ``knn`` or ``L2``. support_size: int, default 100 This determines the size of the working set. A small support_size decreases the runtime per iteration while increase the number of iterations. maxiter: int default 40 Termination condition for active support update. Returns ------- c : shape n_samples The optimal solution to the optimization problem. """ n_samples = X.shape[0] if n_samples <= support_size: # skip active support search for small scale data supp = np.arange( n_samples, dtype=int ) # this results in the following iteration to converge in 1 iteration else: if support_init == 'L2': L2sol = np.linalg.solve( np.identity(y.shape[1]) * alpha + np.dot(X.T, X), y.T) c0 = np.dot(X, L2sol)[:, 0] supp = np.argpartition(-np.abs(c0), support_size)[0:support_size] elif support_init == 'knn': supp = np.argpartition(-np.abs(np.dot(y, X.T)[0]), support_size)[0:support_size] curr_obj = float("inf") for _ in range(maxiter): Xs = X[supp, :] if algorithm == 'spams': cs = spams.lasso(np.asfortranarray(y.T), D=np.asfortranarray(Xs.T), lambda1=tau * alpha, lambda2=(1.0 - tau) * alpha) cs = np.asarray(cs.todense()).T else: cs = sparse_encode(y, Xs, algorithm=algorithm, alpha=alpha) delta = (y - np.dot(cs, Xs)) / alpha obj = tau * np.sum(np.abs(cs[0])) + (1.0 - tau) / 2.0 * np.sum( np.power(cs[0], 2.0)) + alpha / 2.0 * np.sum(np.power(delta, 2.0)) if curr_obj - obj < 1.0e-10 * curr_obj: break curr_obj = obj coherence = np.abs(np.dot(delta, X.T))[0] coherence[supp] = 0 addedsupp = np.nonzero(coherence > tau + 1.0e-10)[0] if addedsupp.size == 0: # converged break # Find the set of nonzero entries of cs. activesupp = supp[np.abs(cs[0]) > 1.0e-10] if activesupp.size > 0.8 * support_size: # this suggests that support_size is too small and needs to be increased support_size = min( [round(max([activesupp.size, support_size]) * 1.1), n_samples]) if addedsupp.size + activesupp.size > support_size: ord = np.argpartition(-coherence[addedsupp], support_size - activesupp.size)[0:support_size - activesupp.size] addedsupp = addedsupp[ord] supp = np.concatenate([activesupp, addedsupp]) c = np.zeros(n_samples) c[supp] = cs return c
# coefficient = sparse_encode(result, # self.dictionary, # algorithm="omp", # n_nonzero_coefs=None, # alpha=None) # return coefficient with open("basisShapesC64L0.1", "rb") as file: dictionary = sio.loadmat(file)['component'] with open("sorted_shapes-32.mat", "rb") as file: shapes = sio.loadmat(file)['shapes'] targets = sio.loadmat(file)['target'] targets = targets.reshape((1700, 1)) print(targets.shape) coefficients = sparse_encode(shapes, dictionary, algorithm="omp", n_nonzero_coefs=None, alpha=None) a = {"coefficients": coefficients, "targets": targets} sio.savemat("coefficients.mat", a) # for i in np.count_nonzero(coefficients, 1): # print(i) # # recons = np.dot(coefficients, dictionary) # errors = np.sum((shapes - recons) ** 2, axis=1) # print(sum(errors) / len(errors))
def encode(X, dictionary): """ Sparse coding """ return decomp.sparse_encode(X, dictionary)
import numpy as np from sklearn.decomposition import sparse_encode HS = sparse_encode(np.random.randn(108, 1600), np.random.randn(108, 5000), alpha=1. / 5000., algorithm='lasso_lars').T
trajectory['x'] = [] trajectory['y'] = [] plt.show() alpha_schedule = [ .2/5000., .5/5000., 1./5000., 2./5000., 5./5000. ] assert num_trajectories == len(trajectories) for j, alpha in enumerate(alpha_schedule): print 'j = ',j,'; alpha = ',alpha from sklearn.decomposition import sparse_encode print 'running SC ',j HS = sparse_encode( model.W.get_value(), X.T, alpha = alpha, algorithm='lasso_cd').T assert HS.shape == (5000,1600) print 'done encoding' HS = np.abs(HS) if np.any(np.isnan(HS)): print 'has nans' if np.any(np.isinf(HS)): print 'has infs' print 'HS shape ',HS.shape print 'HS subtensor shape ',HS[0:num_trajectories].shape act_prob = (HS[:,0:num_trajectories] > .01).mean(axis=0)
print(image.shape) abundance = abundance_map((.5, .33333, .25, .2), 1, (75, 75)) data = np.reshape(image, (image.shape[0] * image.shape[1], image.shape[2])) print(data.shape) dictionary, keys = convert_library(library) print(dictionary.shape) imputer_data = Imputer() imputer_dict = Imputer() imputer_data.fit(data) imputer_dict.fit(dictionary) data = imputer_data.transform(data) dictionary = imputer_dict.transform(dictionary) sparse = sparse_encode(data, dictionary, algorithm='lasso_cd', max_iter=1000, n_nonzero_coefs=20, alpha=2) print(sparse.shape) #output = np.reshape(sparse, (75,75,224)) numbers = [] for n in names: iter = 0 for keys in sorted(library.keys()): iter += 1 if n == keys: numbers.append(iter) print(numbers) used = np.zeros((data.shape[0], 0))
if 'SC' in args['dimReductionType']: #################################### # Sparse Coding # #################################### print 'Sparse Coding:' # normalize every column respectively from sklearn.preprocessing import MinMaxScaler normalizer = MinMaxScaler() # feature range (0,1) dataArray_normalized = normalizer.fit_transform(dataArray) print 'normalized data:' print dataArray_normalized # reduce to the specified dimension from learnDic import sparse_coding from sklearn.decomposition import sparse_encode dl = sparse_coding(reducedDimension, dataArray_normalized, 0.2, 1000, 0.0001) code = sparse_encode(dataArray_normalized, dl.components_) data_reduced = code print 'Reduced data:' print data_reduced print 'Dictionary:' print dl.components_ print 'iteration:', dl.n_iter_ elif 'PCA' in args['dimReductionType']: #################################### # Principal Component Analysis # #################################### from matplotlib.mlab import PCA as mlabPCA print 'PCA:' myPCA = mlabPCA(dataArray) data_reduced = myPCA.Y[:,0:reducedDimension]# reduce to the specified dimension print 'Raw data:'
def elastic_net_subspace_clustering(X, gamma=50.0, gamma_nz=True, tau=1.0, algorithm='lasso_lars', active_support=True, active_support_params=None, n_nonzero=50): if algorithm in ('lasso_lars', 'lasso_cd') and tau < 1.0 - 1.0e-10: warnings.warn( 'algorithm {} cannot handle tau smaller than 1. Using tau = 1'. format(algorithm)) tau = 1.0 if active_support == True and active_support_params == None: active_support_params = {} n_samples = X.shape[0] rows = np.zeros(n_samples * n_nonzero) cols = np.zeros(n_samples * n_nonzero) vals = np.zeros(n_samples * n_nonzero) curr_pos = 0 for i in progressbar.progressbar(range(n_samples)): y = X[i, :].copy().reshape(1, -1) X[i, :] = 0 if algorithm in ('lasso_lars', 'lasso_cd', 'spams'): if gamma_nz == True: coh = np.delete(np.absolute(np.dot(X, y.T)), i) alpha0 = np.amax( coh) / tau # value for which the solution is zero alpha = alpha0 / gamma else: alpha = 1.0 / gamma if active_support == True: c = active_support_elastic_net(X, y, alpha, tau, algorithm, **active_support_params) else: if algorithm == 'spams': c = spams.lasso(np.asfortranarray(y.T), D=np.asfortranarray(X.T), lambda1=tau * alpha, lambda2=(1.0 - tau) * alpha) c = np.asarray(c.todense()).T[0] else: c = sparse_encode(y, X, algorithm=algorithm, alpha=alpha)[0] else: warnings.warn("algorithm {} not found".format(algorithm)) index = np.flatnonzero(c) if index.size > n_nonzero: # warnings.warn("The number of nonzero entries in sparse subspace clustering exceeds n_nonzero") index = index[np.argsort(-np.absolute(c[index]))[0:n_nonzero]] rows[curr_pos:curr_pos + len(index)] = i cols[curr_pos:curr_pos + len(index)] = index vals[curr_pos:curr_pos + len(index)] = c[index] curr_pos += len(index) X[i, :] = y # affinity = sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples)) + sparse.csr_matrix((vals, (cols, rows)), shape=(n_samples, n_samples)) return sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples))
print "X_train.shape", train_X.shape print "Components shape", dl.components_.shape # components = dl.components().reshape((n_components, n_features)) components = dl.components_ # Visualizing the components as images component_titles = ["component %d" % i for i in range(components.shape[0])] plot_gallery("Visualizing top components", components, component_titles, patch_w, patch_h, n_row=24, n_col=24) plt.show() ############################################################################### # Sparse Encoding print("\nSparse Encoding") train_X_pca = np.zeros((len(train_X_patches), n_components)) train_X_pca = sparse_encode(train_X_patches, components, algorithm='omp') np.set_printoptions(precision=3, suppress=True) print train_X_pca # for i in range(len(train_X)): # train_X_pca[i] = dl.transform(train_X[i]) test_X_pca = np.zeros((len(test_X), n_components)) test_X_pca = sparse_encode(test_X_patches, components, algorithm='omp') # for i in range(len(test_X)): # test_X_pca[i] = dl.transform(test_X[i]) print "train_X_pca.shape", train_X_pca.shape ############################################################################### # Visualize reconstructed images reconstructed_X = np.zeros((20, n_features))
def elastic_net_subspace_clustering(X, gamma=50.0, gamma_nz=True, tau=1.0, algorithm='lasso_lars', active_support=True, active_support_params=None, n_nonzero=50): """Elastic net subspace clustering (EnSC) [1]. Compute self-representation matrix C from solving the following optimization problem min_{c_j} tau ||c_j||_1 + (1-tau)/2 ||c_j||_2^2 + alpha / 2 ||x_j - c_j X ||_2^2 s.t. c_jj = 0, where c_j and x_j are the j-th rows of C and X, respectively. Parameter ``algorithm`` specifies the algorithm for solving the optimization problem. ``lasso_lars`` and ``lasso_cd`` are algorithms implemented in sklearn, ``spams`` refers to the same algorithm as ``lasso_lars`` but is implemented in spams package available at http://spams-devel.gforge.inria.fr/ (installation required) In principle, all three algorithms give the same result. For large scale data (e.g. with > 5000 data points), use any of these algorithms in conjunction with ``active_support=True``. It adopts an efficient active support strategy that solves the optimization problem by breaking it into a sequence of small scale optimization problems as described in [1]. If tau = 1.0, the method reduces to sparse subspace clustering with basis pursuit (SSC-BP) [2]. If tau = 0.0, the method reduces to least squares regression (LSR) [3]. Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1. Parameters ----------- X : array-like, shape (n_samples, n_features) Input data to be clustered gamma : float gamma_nz : boolean, default True gamma and gamma_nz together determines the parameter alpha. When ``gamma_nz = False``, alpha = gamma. When ``gamma_nz = True``, then alpha = gamma * alpha0, where alpha0 is the largest number such that the solution to the optimization problem with alpha = alpha0 is the zero vector (see Proposition 1 in [1]). Therefore, when ``gamma_nz = True``, gamma should be a value greater than 1.0. A good choice is typically in the range [5, 500]. tau : float, default 1.0 Parameter for elastic net penalty term. When tau = 1.0, the method reduces to sparse subspace clustering with basis pursuit (SSC-BP) [2]. When tau = 0.0, the method reduces to least squares regression (LSR) [3]. algorithm : string, default ``lasso_lars`` Algorithm for computing the representation. Either lasso_lars or lasso_cd or spams (installation of spams package is required). Note: ``lasso_lars`` and ``lasso_cd`` only support tau = 1. n_nonzero : int, default 50 This is an upper bound on the number of nonzero entries of each representation vector. If there are more than n_nonzero nonzero entries, only the top n_nonzero number of entries with largest absolute value are kept. active_support: boolean, default True Set to True to use the active support algorithm in [1] for solving the optimization problem. This should significantly reduce the running time when n_samples is large. active_support_params: dictionary of string to any, optional Parameters (keyword arguments) and values for the active support algorithm. It may be used to set the parameters ``support_init``, ``support_size`` and ``maxiter``, see ``active_support_elastic_net`` for details. Example: active_support_params={'support_size':50, 'maxiter':100} Ignored when ``active_support=False`` Returns ------- representation_matrix_ : csr matrix, shape: n_samples by n_samples The self-representation matrix. References ----------- [1] C. You, C.-G. Li, D. Robinson, R. Vidal, Oracle Based Active Set Algorithm for Scalable Elastic Net Subspace Clustering, CVPR 2016 [2] E. Elhaifar, R. Vidal, Sparse Subspace Clustering: Algorithm, Theory, and Applications, TPAMI 2013 [3] C. Lu, et al. Robust and efficient subspace segmentation via least squares regression, ECCV 2012 """ if algorithm in ('lasso_lars', 'lasso_cd') and tau < 1.0 - 1.0e-10: warnings.warn( 'algorithm {} cannot handle tau smaller than 1. Using tau = 1'. format(algorithm)) tau = 1.0 if active_support == True and active_support_params == None: active_support_params = {} n_samples = X.shape[0] rows = np.zeros(n_samples * n_nonzero) cols = np.zeros(n_samples * n_nonzero) vals = np.zeros(n_samples * n_nonzero) curr_pos = 0 # for i in progressbar.progressbar(range(n_samples)): for i in range(n_samples): # if i % 1000 == 999: # print('SSC: sparse coding finished {i} in {n_samples}'.format(i=i, n_samples=n_samples)) y = X[i, :].copy().reshape(1, -1) X[i, :] = 0 if algorithm in ('lasso_lars', 'lasso_cd', 'spams'): if gamma_nz == True: coh = np.delete(np.absolute(np.dot(X, y.T)), i) alpha0 = np.amax( coh) / tau # value for which the solution is zero alpha = alpha0 / gamma else: alpha = 1.0 / gamma if active_support == True: c = active_support_elastic_net(X, y, alpha, tau, algorithm, **active_support_params) else: if algorithm == 'spams': c = spams.lasso(np.asfortranarray(y.T), D=np.asfortranarray(X.T), lambda1=tau * alpha, lambda2=(1.0 - tau) * alpha) c = np.asarray(c.todense()).T[0] else: c = sparse_encode(y, X, algorithm=algorithm, alpha=alpha)[0] else: warnings.warn("algorithm {} not found".format(algorithm)) index = np.flatnonzero(c) if index.size > n_nonzero: # warnings.warn("The number of nonzero entries in sparse subspace clustering exceeds n_nonzero") index = index[np.argsort(-np.absolute(c[index]))[0:n_nonzero]] rows[curr_pos:curr_pos + len(index)] = i cols[curr_pos:curr_pos + len(index)] = index vals[curr_pos:curr_pos + len(index)] = c[index] curr_pos += len(index) X[i, :] = y # affinity = sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples)) + sparse.csr_matrix((vals, (cols, rows)), shape=(n_samples, n_samples)) return sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples))
def elastic_net_subspace_clustering(X, gamma=50.0, gamma_nz=True, tau=1.0, algorithm='lasso_lars', active_support=True, active_support_params=None, n_nonzero=50): """Elastic net subspace clustering (EnSC) [1]. References ----------- [1] C. You, C.-G. Li, D. Robinson, R. Vidal, Oracle Based Active Set Algorithm for Scalable Elastic Net Subspace Clustering, CVPR 2016 [2] E. Elhaifar, R. Vidal, Sparse Subspace Clustering: Algorithm, Theory, and Applications, TPAMI 2013 [3] C. Lu, et al. Robust and efficient subspace segmentation via least squares regression, ECCV 2012 """ if algorithm in ('lasso_lars', 'lasso_cd') and tau < 1.0 - 1.0e-10: warnings.warn( f'algorithm {algorithm} cannot handle tau smaller than 1. Using tau = 1' ) tau = 1.0 if active_support == True and active_support_params == None: active_support_params = {} n_samples = X.shape[0] rows = np.zeros(n_samples * n_nonzero) cols = np.zeros(n_samples * n_nonzero) vals = np.zeros(n_samples * n_nonzero) curr_pos = 0 for i in range(n_samples): y = X[i, :].copy().reshape(1, -1) X[i, :] = 0 if algorithm in ('lasso_lars', 'lasso_cd', 'spams'): if gamma_nz == True: coh = np.delete(np.absolute(np.dot(X, y.T)), i) alpha0 = np.amax( coh) / tau # value for which the solution is zero alpha = alpha0 / gamma else: alpha = 1.0 / gamma if active_support == True: c = active_support_elastic_net(X, y, alpha, tau, algorithm, **active_support_params) else: if algorithm == 'spams': c = spams.lasso(np.asfortranarray(y.T), D=np.asfortranarray(X.T), lambda1=tau * alpha, lambda2=(1.0 - tau) * alpha) c = np.asarray(c.todense()).T[0] else: c = sparse_encode(y, X, algorithm=algorithm, alpha=alpha)[0] else: warnings.warn("algorithm {} not found".format(algorithm)) index = np.flatnonzero(c) if index.size > n_nonzero: index = index[np.argsort(-np.absolute(c[index]))[0:n_nonzero]] rows[curr_pos:curr_pos + len(index)] = i cols[curr_pos:curr_pos + len(index)] = index vals[curr_pos:curr_pos + len(index)] = c[index] curr_pos += len(index) X[i, :] = y return sparse.csr_matrix((vals, (rows, cols)), shape=(n_samples, n_samples))
def test_unknown_method(): n_components = 12 rng = np.random.RandomState(0) V = rng.randn(n_components, n_features) # random init with pytest.raises(ValueError): sparse_encode(X, V, algorithm="<unknown>")
def active_support_elastic_net(X, y, alpha, tau=1.0, algorithm='spams', support_init='knn', support_size=100, maxiter=40): n_samples = X.shape[0] if n_samples <= support_size: # skip active support search for small scale data supp = np.arange( n_samples, dtype=int ) # this results in the following iteration to converge in 1 iteration else: if support_init == 'L2': L2sol = np.linalg.solve( np.identity(y.shape[1]) * alpha + np.dot(X.T, X), y.T) c0 = np.dot(X, L2sol)[:, 0] supp = np.argpartition(-np.abs(c0), support_size)[0:support_size] elif support_init == 'knn': supp = np.argpartition(-np.abs(np.dot(y, X.T)[0]), support_size)[0:support_size] curr_obj = float("inf") for _ in range(maxiter): Xs = X[supp, :] if algorithm == 'spams': cs = spams.lasso(np.asfortranarray(y.T), D=np.asfortranarray(Xs.T), lambda1=tau * alpha, lambda2=(1.0 - tau) * alpha) cs = np.asarray(cs.todense()).T else: cs = sparse_encode(y, Xs, algorithm=algorithm, alpha=alpha) delta = (y - np.dot(cs, Xs)) / alpha obj = tau * np.sum(np.abs(cs[0])) + (1.0 - tau) / 2.0 * np.sum( np.power(cs[0], 2.0)) + alpha / 2.0 * np.sum(np.power(delta, 2.0)) if curr_obj - obj < 1.0e-10 * curr_obj: break curr_obj = obj coherence = np.abs(np.dot(delta, X.T))[0] coherence[supp] = 0 addedsupp = np.nonzero(coherence > tau + 1.0e-10)[0] if addedsupp.size == 0: # converged break # Find the set of nonzero entries of cs. activesupp = supp[np.abs(cs[0]) > 1.0e-10] if activesupp.size > 0.8 * support_size: # this suggests that support_size is too small and needs to be increased support_size = min( [round(max([activesupp.size, support_size]) * 1.1), n_samples]) if addedsupp.size + activesupp.size > support_size: ord = np.argpartition(-coherence[addedsupp], support_size - activesupp.size)[0:support_size - activesupp.size] addedsupp = addedsupp[ord] supp = np.concatenate([activesupp, addedsupp]) c = np.zeros(n_samples) c[supp] = cs return c