def omp(data, label, ll, ul, step, weight, state): kf = KFold(n_splits=10, shuffle=True, random_state=state) X = data y = label r2 = [] mse = [] pred = [] true = [] ilist = [] feature = [] pbar = tnrange(step * 10, desc='loop') for i in np.linspace(ll, ul, step).astype(int): r2_single = [] mse_single = [] pred_single = [] true_single = [] feature_single = [] for train_index, test_index in kf.split(X): y_train, y_test = y[train_index], y[test_index] X_train_tmp, X_test_tmp = X[train_index], X[test_index] clf = OrthogonalMatchingPursuit(n_nonzero_coefs=i, normalize=False) clf.fit(X_train_tmp, np.ravel(y_train)) feature_index = np.where(clf.coef_ > 0)[0] X_train = X_train_tmp[:, feature_index] X_test = X_test_tmp[:, feature_index] svr = svm.SVR(kernel='linear') svr.fit(X_train, np.ravel(y_train)) y_test_pred = svr.predict(X_test) feature_single.append(feature_index) pred_single.append(y_test_pred) true_single.append(np.ravel(y_test)) r2_single.append(r2_score(y_test, y_test_pred)) mse_single.append(mean_squared_error(y_test, y_test_pred)) pbar.update(1) r2.append(r2_single) mse.append(mse_single) pred.append(pred_single) true.append(true_single) feature.append(feature_single) ilist.append(i) r2 = np.array(r2) r2_mean = np.average(r2, axis=1, weights=weight) pbar.close() plt.figure() plt.plot(np.linspace(ll, ul, step), r2_mean) plt.xlabel('$non-zero coefficients$') plt.ylabel('$R^2$') a = np.where(r2_mean == max(r2_mean))[0] pred = np.array(pred)[a[0]] true = np.array(true)[a[0]] r2 = r2[a[0]] mse = np.array(mse)[a[0]] feature = np.array(feature)[a[0]] a = ilist[a[0]] print('max r2_score=', np.max(r2_mean), ', number of non-zero coefs=', a) feature = feature[np.where(r2 == max(r2))][0] print('number of selected features:', len(feature)) return pred, true, r2, mse, feature
def OMP(problem, **kwargs): r"""High level description. Parameters ---------- problem : type Description kwargs : dictionary kwargs['choose'] must be a positive integer kwargs['coef_tolerance'] must be a nonnegative float Returns ------- """ data_list = [datum['data']['values'] for datum in problem.data] data = numpy.array(data_list) OMP = OrthogonalMatchingPursuit(n_nonzero_coefs=kwargs['choose']) OMP.fit(data.T, problem.goal['data']['values']) OMP_coefficients = OMP.coef_ optimum = [ problem.data[index] for index, element in enumerate(OMP_coefficients) if abs(element) > kwargs['coef_tolerance'] ] maximum = OMP.score(data.T, problem.goal['data']['values']) return (optimum, maximum)
def restore_cs1_signal(non_zero_features, sdm_signal, transformation, error_handler=print) -> np.ndarray: try: len_non_zero_features = len(non_zero_features[0]) if len_non_zero_features == 0: raise ValueError("No features in array") set_sdm_signal = set(sdm_signal) if set_sdm_signal == {0}: cs1_signal = [0] * transformation.shape[1] else: omp = OrthogonalMatchingPursuit( n_nonzero_coefs=len_non_zero_features) omp.fit(transformation, sdm_signal) cs1_signal = omp.coef_ cs1_signal[cs1_signal != 0] = 1 return cs1_signal except Exception as error: if callable(error_handler): error_handler(error) else: print(error)
def _omp(*, train, test, x_predict=None, metrics, n_nonzero_coefs=None, tol=None, fit_intercept=True, normalize=True, precompute='auto'): """For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.OrthogonalMatchingPursuit.html#sklearn.linear_model.OrthogonalMatchingPursuit """ model = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs, tol=tol, fit_intercept=fit_intercept, normalize=normalize, precompute=precompute) model.fit(train[0], train[1]) model_name = 'OrthogonalMatchingPursuit' y_hat = model.predict(test[0]) if metrics == 'mse': accuracy = _mse(test[1], y_hat) if metrics == 'rmse': accuracy = _rmse(test[1], y_hat) if metrics == 'mae': accuracy = _mae(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def Linear_Regression(R_data): # return data """ The R_data is with nXm matrix with n observations and m factors. Each column will be the time series for each ticker name """ # even though we change the order of getting data #ticker_list = R_data.columns.values #Depend_sid = ticker_list[sid1] #Indep_sids = ticker_list[sid2] sid_list = [] for i in range(0, len(factors)): sid_list.append(R_data[factors[i]]) Y = R_data[securities[0]] # del R_data[securities[0]] # indep = R_data.ix[:,1:len(securities)] indep = pd.concat(sid_list, axis=1) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=len(factors), fit_intercept=True) omp.fit(indep, Y) # coef = omp.coef_ # idx_r, = coef.nonzero() # X = sm.add_constant(indep, prepend=True) # lm_Result = sm.OLS(Y, X).fit() return omp
def fit_predict_omp(self, X, y=None): n_sample = X.shape[0] H = NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X) C = np.zeros((n_sample, n_sample)) # solve sparse self-expressive representation for i in range(n_sample): y_i = H[i] H_i = np.delete(H, i, axis=0) # H_T = H_i.transpose() # M x (N-1) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=int(n_sample * 0.5), tol=1e20) omp.fit(H_i.transpose(), y_i) # Normalize the columns of C: ci = ci / ||ci||_ss. coef = omp.coef_ / np.max(np.abs(omp.coef_)) C[:i, i] = coef[:i] C[i + 1:, i] = coef[i:] # compute affinity matrix L = 0.5 * (np.abs(C) + np.abs(C.T)) # affinity graph # L = 0.5 * (C + C.T) self.affinity_matrix = L # spectral clustering sc = SpectralClustering(n_clusters=self.n_clusters, affinity='precomputed') sc.fit(self.affinity_matrix) return sc.labels_
def omp0(y, A, normalize=False, tol=1.0e-6, verbose=False): r"""omp Arguments --------------------- y {[type]} -- [description] A {[type]} -- [description] Keyword Arguments --------------------- alpha {float, optional} -- Constant that multiplies the L1 term. (default: {0.5}) normalize {boolean} -- If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm. (default: {True}) max_iter {int} -- The maximum number of iterations (default: {200}) tol {float} -- The tolerance for the optimization (default: {1.0e-6}) """ if verbose: print("================in omp================") print("===Do OMP...") rgr_omp = OrthogonalMatchingPursuit(normalize=normalize, tol=tol) rgr_omp.fit(A, y) x = rgr_omp.coef_ if verbose: print("===Done!") return x
def plot_omp(): n_components, n_features = 512, 100 n_nonzero_coefs = 17 # generate the data # y = Xw # |x|_0 = n_nonzero_coefs y, X, w = make_sparse_coded_signal(n_samples=1, n_components=n_components, n_features=n_features, n_nonzero_coefs=n_nonzero_coefs, random_state=0) idx, = w.nonzero() # distort the clean signal y_noisy = y + 0.05 * np.random.randn(len(y)) # plot the sparse signal plt.figure(figsize=(7, 7)) plt.subplot(4, 1, 1) plt.xlim(0, 512) plt.title("Sparse signal") plt.stem(idx, w[idx], use_line_collection=True) # plot the noise-free reconstruction omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y) coef = omp.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 2) plt.xlim(0, 512) plt.title("Recovered signal from noise-free measurements") plt.stem(idx_r, coef[idx_r], use_line_collection=True) # plot the noisy reconstruction omp.fit(X, y_noisy) coef = omp.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 3) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements") plt.stem(idx_r, coef[idx_r], use_line_collection=True) # plot the noisy reconstruction with number of non-zeros set by CV omp_cv = OrthogonalMatchingPursuitCV() omp_cv.fit(X, y_noisy) coef = omp_cv.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 4) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements with CV") plt.stem(idx_r, coef[idx_r], use_line_collection=True) plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38) plt.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit', fontsize=16) plt.show()
def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) assert omp.coef_.shape == (n_features, ) assert omp.intercept_.shape == () assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs omp.fit(X, y) assert omp.coef_.shape == (n_targets, n_features) assert omp.intercept_.shape == (n_targets, ) assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs coef_normalized = omp.coef_[0].copy() omp.set_params(fit_intercept=True, normalize=False) omp.fit(X, y[:, 0]) assert_array_almost_equal(coef_normalized, omp.coef_) omp.set_params(fit_intercept=False, normalize=False) omp.fit(X, y[:, 0]) assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs assert omp.coef_.shape == (n_features, ) assert omp.intercept_ == 0 omp.fit(X, y) assert omp.coef_.shape == (n_targets, n_features) assert omp.intercept_ == 0 assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_.shape, ()) assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets,)) assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs coef_normalized = omp.coef_[0].copy() omp.set_params(fit_intercept=True, normalize=False) omp.fit(X, y[:, 0]) assert_array_almost_equal(coef_normalized, omp.coef_) omp.set_params(fit_intercept=False, normalize=False) omp.fit(X, y[:, 0]) assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_, 0) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_, 0) assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
def Linear_Regression(R_data):# return data """ The R_data is with nXm matrix with n observations and m factors. Each column will be the time series for each ticker name """ # even though we change the order of getting data #ticker_list = R_data.columns.values #Depend_sid = ticker_list[sid1] #Indep_sids = ticker_list[sid2] sid_list = [] for i in range(0,len(factors)): sid_list.append(R_data[factors[i]]) Y = R_data[securities[0]] # del R_data[securities[0]] # indep = R_data.ix[:,1:len(securities)] indep = pd.concat(sid_list, axis=1) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=len(factors), fit_intercept= True) omp.fit(indep, Y) # coef = omp.coef_ # idx_r, = coef.nonzero() # X = sm.add_constant(indep, prepend=True) # lm_Result = sm.OLS(Y, X).fit() return omp
def csper(t, y, fmin=None, fmax=None, nfreqs=5000, nsines=4, polyorder=2, sig=5): trange = np.nanmax(t) - np.nanmin(t) dt = np.abs(np.nanmedian(t - np.roll(t, -1))) nt = np.size(t) # make defaults if fmin is None: fmin = 1. / trange if fmax is None: fmax = 2. / dt freqs = np.linspace(fmin, fmax, nfreqs) df = np.abs(np.nanmedian(freqs - np.roll(freqs, -1))) X = np.zeros((nt, nfreqs * 2 + polyorder)) # set up matrix of sines and cosines for j in range(nfreqs): X[:, j] = np.sin(t * freqs[j]) X[:, nfreqs + j] = np.cos(t * freqs[j]) # now do polynomial bits for j in range(polyorder): X[:, -j] = t**(polyorder - j) n_components, n_features = nfreqs, nt n_nonzero_coefs = nsines + polyorder omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y - np.nanmedian(y)) coef = omp.coef_ idx_r, = coef[:-polyorder].nonzero() sines = freqs[idx_r[idx_r < nfreqs]] cosines = freqs[idx_r[idx_r > nfreqs] - nfreqs] print 'Sine components:', sines print 'Cosine components:', cosines amp_raw = np.sqrt(coef[:nfreqs]**2. + coef[nfreqs:-polyorder]**2) amp = gaussian_filter1d(amp_raw, sig) recon = np.dot(X, coef) output = { 'Frequencies': freqs, 'Raw_Amplitudes': coef[:-polyorder], 'Polynomial': coef[-polyorder:], 'Reconstruction': recon, 'Amplitude': amp } return output
class image_data_repo: def __init__(self, name='image_', image_feature_dict={}, reconsitution_element_nums=6, error_limit=0.1): self.name = name self.image_feature_dict = image_feature_dict.copy() self.reconsitution_element_nums=reconsitution_element_nums self.error_limit=error_limit self.omp=OrthogonalMatchingPursuit(n_nonzero_coefs=reconsitution_element_nums) def image_nums(self): return np.size(self.image_feature_dict.keys()) def add_element(self, keys, values): self.image_feature_dict[keys]=values def use_image(self, image_feature): data = np.array(self.image_feature_dict.values()).T self.omp.fit(data, image_feature) err = 1 - self.omp.score(data, image_feature) if err<self.error_limit: return False,err else: return True,err def update(self): image_list = self.image_feature_dict.items() data = np.array([i[1] for i in image_list]) name = [i[0] for i in image_list] similar_coef = np.amax( np.dot(data.T,data)) filename = name[ np.argmax( similar_coef )] dst_filename = ''; self.image_feature_dict.pop( filename) os.system('cp ~/caffe/{} ~/rubbish/'.format(filename)) os.system('rm -f ~/caffe/{}'.format(filename)) return
def omp_batch_recon(signal, ind, target_pts, n_nonzero_coefs=20, transform='dct', retCoefs=False): """ Performs an Orthogonal Matching Pursuit technique, with batch approach This algorithm is based on Compressed sensing theory and works as a greedy algorithm to find the sparsest coefficients in a given transform that fit the input signal. Then it returns the inverse transform of these coefficients Parameters ---------- signal : list the downsampled signal to reconstruct ind : list the list of indices corresponding to the position of the downsampled points target_pts : integer the number of points the reconstructed signal should have n_nonzero_coefs : integer the number of nonzeros that are supposed to be in the original signal's transform. transform : 'dct' or 'dst' the type of transform to use (discrete cosine or sine transform) retCeofs : boolean if True, will return the coefficients of the transform Returns ------- x : list the reconstructed signal coef : list the coefficients of the reconstructed signal's transform """ if transform == 'dst': phi = spfft.idst(np.identity(target_pts), axis=0) else: phi = spfft.idct(np.identity(target_pts), axis=0) phi = phi[ind] omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(phi, signal) coef = omp.coef_ if transform == 'dst': x = spfft.idst(coef, axis=0) + np.mean(signal) else: x = spfft.idct(coef, axis=0) + np.mean(signal) x = utils.normalize(x) if retCoefs: return (x, coef) else: return x
def orthogonal_matching_pursuit(A, y, sparsity_level, **kwargs): """ Orthogonal matching pursuit wrapper for scipy. """ start_time = timer() omp = OrthogonalMatchingPursuit(n_nonzero_coefs=sparsity_level) omp.fit(A, y) elapsed_time = timer() - start_time coefs = omp.coef_ support = coefs.nonzero()[0] return coefs, elapsed_time, support
def test_omp_cv(): y_ = y[:, 0] gamma_ = gamma[:, 0] ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False, max_iter=10, cv=5) ompcv.fit(X, y_) assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs) assert_array_almost_equal(ompcv.coef_, gamma_) omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_) omp.fit(X, y_) assert_array_almost_equal(ompcv.coef_, omp.coef_)
def GetNeighborDims(data, paras): ndata, ndim=data.shape kND=paras["kND"] objOMP=OMP(n_nonzero_coefs=kND) idxDict=npy.ones(ndim, dtype=npy.bool) w=npy.zeros((ndim-1, ndim), dtype=npy.float32) for kk in range(ndim): idxDict.fill(True) idxDict[kk]=False objOMP.fit(data[:,idxDict], data[:,kk]) w[:,kk]=objOMP.coef_.astype(npy.float32) return w
def test_omp_reaches_least_squares(): # Use small simple data; it's a sanity check but OMP can stop early rng = check_random_state(0) n_samples, n_features = (10, 8) n_targets = 3 X = rng.randn(n_samples, n_features) Y = rng.randn(n_samples, n_targets) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features) lstsq = LinearRegression() omp.fit(X, Y) lstsq.fit(X, Y) assert_array_almost_equal(omp.coef_, lstsq.coef_)
def test_omp_reaches_least_squares(): # Use small simple area_data; it's a sanity check but OMP can stop early rng = check_random_state(0) n_samples, n_features = (10, 8) n_targets = 3 X = rng.randn(n_samples, n_features) Y = rng.randn(n_samples, n_targets) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features) lstsq = LinearRegression() omp.fit(X, Y) lstsq.fit(X, Y) assert_array_almost_equal(omp.coef_, lstsq.coef_)
def _estimate_X(self,Y,A): if self.num_of_NZ is None: n_nonzero_coefs = np.ceil(0.1 * A.shape[1]) else: n_nonzero_coefs = self.num_of_NZ omp = OrthogonalMatchingPursuit(n_nonzero_coefs = int(n_nonzero_coefs)) for j in range(A.shape[1]): A[:,j] /= max(np.linalg.norm(A[:,j]),1e-20) omp.fit(A,Y) return omp.coef_.T
def classify_OMP(train, test): from sklearn.linear_model import OrthogonalMatchingPursuit as OMP x, y = train ydim = np.unique(y).shape[0] y = [tovec(yi, ydim) for yi in y] clf = OMP() clf.fit(x, y) x, y = test proba = clf.predict(x) return proba
def constrained_binary_solve( w, psi, fit_intercept=True, normalize=True, precompute="auto" ): if ndim(w) != 1: raise ValueError( f"w must be a 1D vector; received a vector of dimension {ndim(w)}" ) model = OrthogonalMatchingPursuit( tol=0, fit_intercept=fit_intercept, normalize=normalize, precompute=precompute ) model.fit(psi, w) return model.coef_
def fit_model_14(self,toWrite=False): model = OrthogonalMatchingPursuit() for data in self.cv_data: X_train, X_test, Y_train, Y_test = data model.fit(X_train,Y_train) pred = model.predict(X_test) print("Model 14 score %f" % (logloss(Y_test,pred),)) if toWrite: f2 = open('model14/model.pkl','w') pickle.dump(model,f2) f2.close()
def _solver_OMP(A, b, K): """ Find a K-sparse solution to Ax = b. @param K Sparsity of the solution. """ from sklearn.linear_model import OrthogonalMatchingPursuit as OMP omp = OMP(n_nonzero_coefs=K) omp.fit(A, b) x = omp.coef_ return x
def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features, )) assert_equal(omp.intercept_.shape, ()) assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets, )) assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) omp.set_params(fit_intercept=False, normalize=False) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') omp.fit(X, y[:, 0], Gram=G, Xy=Xy[:, 0]) assert_equal(omp.coef_.shape, (n_features, )) assert_equal(omp.intercept_, 0) assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) assert_true(len(w) == 2) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') omp.fit(X, y, Gram=G, Xy=Xy) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_, 0) assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) assert_true(len(w) == 2)
def cross_OMP(D1,D2,Y1,Y2): ''' Input: D1: Dictionary of Train [256 * n_components] D2: Dictionary of Clapping [256*components] Y1: Data matrix of SINGLE sample of Train [256*n] Y2: Data matrix of SINGLE sample of Train [256*n] Output: X1, X2: Concatenated feature vectors [2 * n_components * n] ** n = 861 ** ''' # X11 omp11 = OrthogonalMatchingPursuit(n_nonzero_coefs=5) omp11.fit(D1,Y1) X11 = omp11.coef_ # X12 omp12 = OrthogonalMatchingPursuit(n_nonzero_coefs=5) omp12.fit(D1,Y2) X12 = omp12.coef_ # X21 omp21 = OrthogonalMatchingPursuit(n_nonzero_coefs=5) omp21.fit(D2,Y1) X21 = omp21.coef_ # X22 omp22 = OrthogonalMatchingPursuit(n_nonzero_coefs=5) omp22.fit(D2,Y2) X22 = omp22.coef_ # concatenate X1 = np.hstack((X11,X12)).T X2 = np.hstack((X21,X22)).T return X1, X2
class _OrthogonalMatchingPursuitImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def test_omp_cv(): # FIXME: This test is unstable on Travis, see issue #3190 for more detail. check_skip_travis() y_ = y[:, 0] gamma_ = gamma[:, 0] ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False, max_iter=10, cv=5) ompcv.fit(X, y_) assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs) assert_array_almost_equal(ompcv.coef_, gamma_) omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_) omp.fit(X, y_) assert_array_almost_equal(ompcv.coef_, omp.coef_)
def solve_preconditioned_orthogonal_matching_pursuit(basis_matrix_func, samples,values, precond_func, tol=1e-8): from sklearn.linear_model import OrthogonalMatchingPursuit basis_matrix = basis_matrix_func(samples) weights = precond_func(basis_matrix,samples) basis_matrix = basis_matrix*weights[:,np.newaxis] rhs = values*weights[:,np.newaxis] omp = OrthogonalMatchingPursuit(tol=tol,fit_intercept=False) omp.fit(basis_matrix, rhs) coef = omp.coef_ print('nnz_terms',np.count_nonzero(coef)) return coef[:,np.newaxis]
def omp_2D(dictionary, samples, n_nonzero_coefs, params=[]): """2D Orthogonal Matching Pursuit""" ompfun = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs, fit_intercept=False, normalize=False, precompute=True) samples_vec = np.zeros( (samples.shape[1] * samples.shape[2], samples.shape[0])) for i in range(samples.shape[0]): samples_vec[:, i] = samples[i, :, :].T.reshape(samples.shape[1] * samples.shape[2]) dictionary_vec = np.kron(dictionary[1], dictionary[0]) codes_vec = ompfun.fit(dictionary_vec, samples_vec).coef_.T #codes_vec = np.zeros((dictionary[0].shape[1]*dictionary[1].shape[1], samples.shape[0])) #for i in range(samples.shape[0]): # codes_vec[:, i] = ompfun.fit(dictionary_vec, samples[i, :, :].T.reshape((samples.shape[1]*samples.shape[2]))).coef_.T err = np.linalg.norm(samples_vec - dictionary_vec @ codes_vec, 'fro')**2 codes = np.zeros( (samples.shape[0], dictionary[0].shape[1], dictionary[1].shape[1])) for i in range(samples.shape[0]): codes[i, :, :] = codes_vec[:, i].reshape( (dictionary[0].shape[1], dictionary[1].shape[1])).T return codes, err
def CS_(real2, imag2): gc.collect() omp1 = OrthogonalMatchingPursuit() omp1.fit(Q, real2) coefreal = omp1.coef_ omp2 = OrthogonalMatchingPursuit() omp2.fit(Q, imag2) coefimag = omp2.coef_ realQ = coefreal[0] realW = coefreal[M - 1] realX = coefreal[1:M - 1] realY = realX[::-1] realZ = [] realZ = np.append(realZ, realQ) realZ = np.append(realZ, realX) realZ = np.append(realZ, realW) realZ = np.append(realZ, realY) imagQ = coefimag[0] imagW = coefimag[M - 1] imagX = coefimag[1:M - 1] imagY = imagX[::-1] * -1 imagZ = [] imagZ = np.append(imagZ, imagQ) imagZ = np.append(imagZ, imagX) imagZ = np.append(imagZ, imagW) imagZ = np.append(imagZ, imagY) array = [] for x in range(N): com = complex(realZ[x], imagZ[x]) array = np.append(array, com) ftx = [] ftx = np.fft.ifft(array) arr = [] arr = np.array(ftx.real) arr2 = [] arr2 = arr.tolist() myList = [] myList = [int(x) for x in arr2] return myList
def fit_predict_omp(self, X, y=None): n_sample = X.transpose().shape[0] H = X.transpose( ) #NRP_ELM(self.n_hidden, sparse=False).fit(X).predict(X) C = np.zeros((n_sample, n_sample)) # solve sparse self-expressive representation for i in range(n_sample): y_i = H[i] H_i = np.delete(H, i, axis=0) # H_T = H_i.transpose() # M x (N-1) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=int(n_sample * 0.5), tol=1e20) omp.fit(H_i.transpose(), y_i) # Normalize the columns of C: ci = ci / ||ci||_ss. coef = omp.coef_ / np.max(np.abs(omp.coef_)) C[:i, i] = coef[:i] C[i + 1:, i] = coef[i:] # # compute affinity matrix # L = 0.5 * (np.abs(C) + np.abs(C.T)) # affinity graph # # L = 0.5 * (C + C.T) # self.affinity_matrix = L # # spectral clustering # sc = SpectralClustering(n_clusters=self.n_clusters, affinity='precomputed') # sc.fit(self.affinity_matrix) # K-means clustering kmeans = KMeans(n_clusters=self.n_clusters, max_iter=500).fit(C) label = kmeans.labels_ C_ = C band_index = [] for i in np.unique(label): index__ = np.nonzero(label == i) centroids_ = C_[index__] centroids = centroids_.mean(axis=0) dis = pairwise_distances( centroids_, centroids.reshape( (1, centroids_.shape[1]))).flatten() index_min = np.argmin(dis) C_bestrow = centroids_[index_min, :] index = np.nonzero(np.all(C_ == C_bestrow, axis=1)) band_index.append(index[0][0]) BandData = X[:, band_index] # BandData = self.X[:, band_index] print('selected band:', band_index) return BandData #sc.labels_
def SparseDeconvolution(x, y, p, rtype='omp'): from numpy import zeros, hstack, floor, array, shape, sign from scipy.linalg import toeplitz, norm from sklearn.linear_model import OrthogonalMatchingPursuit, Lasso xm = x[abs(x).argmax()] # x = (x.copy())/xm x = (x.copy()) / xm x = x / norm(x) y = (y.copy()) / xm Nx = len(x) Ny = len(y) X = toeplitz(hstack((x, zeros(Nx + Ny - 2))), r=zeros(Ny + Nx - 1)) Y = hstack((zeros(Nx - 1), y, zeros(Nx - 1))) if (rtype == 'omp') & (type(p) == int): model = OrthogonalMatchingPursuit(n_nonzero_coefs=p, normalize=True) elif (rtype == 'omp') & (p < 1.0): model = OrthogonalMatchingPursuit(tol=p, normalize=True) elif (rtype == 'lasso'): model = Lasso(alpha=p) model.fit(X, Y) h = model.coef_ b = model.intercept_ r = Y - b r = r[int(len(x) / 2) - 1:int(len(x) / 2) - 1 + len(y)] h = h[int(len(x) / 2) - 1:int(len(x) / 2) - 1 + len(y)] return r, h
def ICA_decompose(image, block_size, basis, tol): """ Uses Orthogonal Matching Pursuit to decompose an image to a given tolerance Takes as input a single image, a block size, a basis, and a tolerance. Returns an array of coefficients, a list of intercepts, and a list of number of coefficients used per block. """ from sklearn.linear_model import OrthogonalMatchingPursuit from blocks import vectorizeBlocks, blockDecompose import numpy as np omp = OrthogonalMatchingPursuit(tol=tol, normalize=True) blocks = blockDecompose(image, block_size) vectorized_blocks = vectorizeBlocks(blocks, block_size) omp.fit(np.transpose(np.matrix(basis)), np.transpose(vectorized_blocks)) return omp.coef_, omp.intercept_, omp.n_iter_
def update(self, x, t): """ Updates skill success conditions. :param x: input :param t: target (bool) :return: None """ # Don't add duplicates for i in range(len(self.all_x)): if self.all_t[i] == t and np.all(self.all_x[i] == x): return # Update skill dataset self.all_x.append(x) self.all_t.append(t) # Can't learn from too few elements n_x = len(self.all_x) n_unique_t = np.unique(self.all_t).shape[0] if n_x <= 2 or n_unique_t < 2: return # apply OMP all_x = np.array(self.all_x) all_t = np.array(self.all_t).reshape(-1, 1) omp = OrthogonalMatchingPursuit(tol=self.omp_tol) omp.fit(all_x, all_t) # Trim data self.used_dims, = omp.coef_.nonzero() x_trim = all_x[:, self.used_dims] x_true = x_trim[np.where(all_t)[0], :] if x_true.shape[0] >= 2 and x_true.shape[1] > 0: # train GMM self.gmm = GaussianMixture( n_components=min(len(self.used_dims), self.n_gmm_components)) # Add a bit of noise to avoid duplicate points x_noisy = x_true + 0.01 * np.random.normal(size=x_true.shape) self.gmm.fit(x_noisy) self.fitted = True self.gmm_samples_dim, self.gmm_samples_values = \ self.__generate_gmm_samples(20)
def SparseDeconvolution(x,y,p,rtype='omp'): from numpy import zeros, hstack, floor, array, shape, sign from scipy.linalg import toeplitz, norm from sklearn.linear_model import OrthogonalMatchingPursuit, Lasso xm = x[abs(x).argmax()] # x = (x.copy())/xm x = (x.copy())/xm x = x/norm(x) y = (y.copy())/xm Nx=len(x) Ny=len(y) X = toeplitz(hstack((x,zeros(Nx+Ny-2))),r=zeros(Ny+Nx-1)) Y = hstack((zeros(Nx-1),y,zeros(Nx-1))) if (rtype=='omp')&(type(p)==int): model = OrthogonalMatchingPursuit(n_nonzero_coefs=p,normalize=False) elif (rtype=='omp')&(p<1.0): model = OrthogonalMatchingPursuit(tol=p,normalize=False) elif (rtype=='lasso'): model = Lasso(alpha=p) model.fit(X,Y) h = model.coef_ b = model.intercept_ return Y-b,X,h
def CSSK(h,const=5.0,noise=0.0000001): """Compressed Sensing replacement of Fourier Transform on 1D array h * REQUIRES CVXPY PACKAGE * h = sampled time signal const = scalar multiple dimension of h, larger values give greater resolution albeit with increased cost. noise = scalar constant to account for numerical noise returns: g = fourier transform h to frequency domain using CS technique """ h = np.asarray(h, dtype=float) Nt = len(h) Nw = int(const*Nt) t = np.arange(Nt) w = np.arange(Nw) #F = np.sin(2 * np.pi * np.outer(t,w) / Nw) F = (1/np.float(Nw))*np.sin(2.0*np.pi*np.outer(t,w)/np.float(Nw)) #omp_cv = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) #omp_cv = OrthogonalMatchingPursuitCV(verbose=True,normalize=True) omp_cv = OrthogonalMatchingPursuit(tol=noise) omp_cv.fit(F, h) coef = omp_cv.coef_ #idx_r, = coef.nonzero() g = coef ### begin using cvxpy #g = cvx.Variable(Nw) ## min |g|_1 subject to |F.g - h|_2 < noise #objective = cvx.Minimize(cvx.norm(g,1)) #constraints = [cvx.norm(F*g - h,2) <= noise] #prob = cvx.Problem(objective, constraints) #prob.solve(solver='SCS',verbose=True) #g = np.asarray(g.value) #g = g[:,0] ### end using cvxpy return g
def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_.shape, ()) assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets,)) assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) omp.set_params(fit_intercept=False, normalize=False) assert_warns(DeprecationWarning, omp.fit, X, y[:, 0], Gram=G, Xy=Xy[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_, 0) assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) assert_warns(DeprecationWarning, omp.fit, X, y, Gram=G, Xy=Xy) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_, 0) assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)
def test_estimator_shapes(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_.shape, ()) assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets,)) assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) omp.fit(X, y[:, 0], Gram=G, Xy=Xy[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_.shape, ()) assert_true(count_nonzero(omp.coef_) <= n_nonzero_coefs) omp.fit(X, y, Gram=G, Xy=Xy) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets,)) assert_true(count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)
def test_estimator(): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_.shape, ()) assert_true(np.count_nonzero(omp.coef_) <= n_nonzero_coefs) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_.shape, (n_targets,)) assert_true(np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs) omp.set_params(fit_intercept=False, normalize=False) omp.fit(X, y[:, 0]) assert_equal(omp.coef_.shape, (n_features,)) assert_equal(omp.intercept_, 0) assert_true(np.count_nonzero(omp.coef_) <= n_nonzero_coefs) omp.fit(X, y) assert_equal(omp.coef_.shape, (n_targets, n_features)) assert_equal(omp.intercept_, 0) assert_true(np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs)
def orthogonal_matching_pursuit(y, D): omp = OrthogonalMatchingPursuit() omp.fit(D, y) return omp
clf = SVC(kernel='linear', C=1.) feature_selection = SelectKBest(f_classif, k=50) anova_svc = Pipeline([('anova', feature_selection), ('svc', clf)]) anova_svc.fit(X_train, y_train[i, :]) pipelines.append(anova_svc) """ """ f_classif 100 + Ridge """ from sklearn.linear_model import OrthogonalMatchingPursuit as OMP clf = OMP(n_nonzero_coefs=20) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) """ clf.fit(X_train, y_train_tall.T) y_pred_tall = clf.predict(X_test) clf.fit(X_train, y_train_large.T) y_pred_large = clf.predict(X_test) clf.fit(X_train, y_train_big.T) y_pred_big = clf.predict(X_test) """
def test_scaling_with_gram(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") # Use only 1 nonzero coef to be faster and to avoid warnings omp1 = OrthogonalMatchingPursuit(n_nonzero_coefs=1, fit_intercept=False, normalize=False) omp2 = OrthogonalMatchingPursuit(n_nonzero_coefs=1, fit_intercept=True, normalize=False) omp3 = OrthogonalMatchingPursuit(n_nonzero_coefs=1, fit_intercept=False, normalize=True) omp1.fit(X, y, Gram=G) omp1.fit(X, y, Gram=G, Xy=Xy) assert_true(len(w) == 0) omp2.fit(X, y, Gram=G) assert_true(len(w) == 1) omp2.fit(X, y, Gram=G, Xy=Xy) assert_true(len(w) == 2) omp3.fit(X, y, Gram=G) assert_true(len(w) == 3) omp3.fit(X, y, Gram=G, Xy=Xy) assert_true(len(w) == 4)
class SparseApproxSpectrum(object): def __init__(self, n_components=49, patch_size=(8,8), max_samples=1000000, **kwargs): self.omp = OrthogonalMatchingPursuit() self.n_components = n_components self.patch_size = patch_size self.max_samples = max_samples self.D = None self.data = None self.components = None self.standardize=False def _extract_data_patches(self, X): self.X = X data = extract_patches_2d(X, self.patch_size) data = data.reshape(data.shape[0], -1) if len(data)>self.max_samples: data = np.random.permutation(data)[:self.max_samples] print data.shape if self.standardize: self.mn = np.mean(data, axis=0) self.std = np.std(data, axis=0) data -= self.mn data /= self.std self.data = data def extract_codes(self, X, standardize=False): self.standardize=standardize self._extract_data_patches(X) self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, alpha=1, n_iter=500) print "Dictionary learning from data..." self.D = self.dico.fit(self.data) return self def plot_codes(self, cbar=False, **kwargs): #plt.figure(figsize=(4.2, 4)) N = int(np.ceil(np.sqrt(self.n_components))) kwargs.setdefault('cmap', pl.cm.gray_r) kwargs.setdefault('origin','bottom') kwargs.setdefault('interpolation','nearest') for i, comp in enumerate(self.D.components_): plt.subplot(N, N, i + 1) comp = comp * self.std + self.mn if self.standardize else comp plt.imshow(comp.reshape(self.patch_size), **kwargs) if cbar: plt.colorbar() plt.xticks(()) plt.yticks(()) plt.suptitle('Dictionary learned from spectrum patches\n', fontsize=16) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav',**kwargs): flist=glob.glob(dir_expr) self.X = np.vstack([feature_scale(LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T self.D = extract_codes(self.X, **kwargs) self.plot_codes(**kwargs) return self def _get_approximation_coefs(self,data, components): w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data]) return w def reconstruct_spectrum(self, w=None, randomize=False): data = self.data components = self.D.components_ if w is None: self.w = self._get_approximation_coefs(data, components) w = self.w if self.standardize: for comp in components: comp = comp * self.std + self.mn if randomize: components = np.random.permutation(components) recon = np.dot(w, components).reshape(-1,self.patch_size[0],self.patch_size[1]) self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape) return self def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, **kwargs): self.reconstruct_spectrum(w,randomize) w, components = self.w, self.D.components_ self.X_hat_l = [] for i in range(len(self.w.T)): r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,self.patch_size[0],self.patch_size[1]) self.X_hat_l.append(reconstruct_from_patches_2d(r, self.X.shape)) if plotting: plt.figure() for k in range(self.n_components): plt.subplot(self.n_components**0.5,self.n_components**0.5,k+1) feature_plot(self.X_hat_l[k],nofig=1,**kwargs) return self
def sparse_encode(self, X, dictionary, n_nonzero_coefs=None, verbose=0): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(dictionary, X.T) new_code = omp.coef_.T return new_code
########################## y_noisy = y + 0.05 * np.random.randn(len(y)) # plot the sparse signal ######################## pl.figure(figsize=(7, 7)) pl.subplot(4, 1, 1) pl.xlim(0, 512) pl.title("Sparse signal") pl.stem(idx, w[idx]) # plot the noise-free reconstruction #################################### omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y) coef = omp.coef_ idx_r, = coef.nonzero() pl.subplot(4, 1, 2) pl.xlim(0, 512) pl.title("Recovered signal from noise-free measurements") pl.stem(idx_r, coef[idx_r]) # plot the noisy reconstruction ############################### omp.fit(X, y_noisy) coef = omp.coef_ idx_r, = coef.nonzero() pl.subplot(4, 1, 3) pl.xlim(0, 512) pl.title("Recovered signal from noisy measurements")
class SparseApproxSpectrum(object): """class for 2D patch analysis of audio files initialization: patch_size - size of time-frequency 2D patches in spectrogram units (freq,time) [(12,12)] max_samples - if num audio patches exceeds this threshold, randomly sample spectrum [1000000] **omp_args - keyword arguments to OrthogonalMatchingPursuit(...) [None] """ def __init__(self, patch_size=(12,12), max_samples=1000000, **omp_args): self.patch_size = patch_size self.max_samples = max_samples self.omp = OrthogonalMatchingPursuit(**omp_args) self.D = None self.data = None self.components = None self.zscore=False self.log_amplitude=False def _extract_data_patches(self, X, zscore, log_amplitude): "utility method for converting spectrogram data to 2D patches " self.zscore=zscore self.log_amplitude=log_amplitude self.X = X if self.log_amplitude: X = np.log(1+X) data = extract_patches_2d(X, self.patch_size) data = data.reshape(data.shape[0], -1) if len(data)>self.max_samples: data = np.random.permutation(data)[:self.max_samples] print data.shape if self.zscore: self.mn = np.mean(data, axis=0) self.std = np.std(data, axis=0) data -= self.mn data /= self.std self.data = data def make_gabor_field(self, X, zscore=True, log_amplitude=True, thetas=range(4), sigmas=(1,3), frequencies=(0.05, 0.25)) : """Given a spectrogram, prepare 2D patches and Gabor filter bank kernels inputs: X - spectrogram data (frequency x time) zscore - whether to zscore the ensemble of 2D patches [True] log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True] thetas - list of 2D Gabor filter orientations in units of pi/4. [range(4)] sigmas - list of 2D Gabor filter standard deviations in oriented direction [(1,3)] frequencies - list of 2D Gabor filter frequencies [(0.05,0.25)] outputs: self.data - 2D patches of input spectrogram self.D.components_ - Gabor dictionary of thetas x sigmas x frequencies atoms """ self._extract_data_patches(X, zscore, log_amplitude) self.n_components = len(thetas)*len(sigmas)*len(frequencies) self.thetas = thetas self.sigmas = sigmas self.frequencies = frequencies a,b = self.patch_size self.kernels = [] for theta in thetas: theta = theta / 4. * np.pi for sigma in sigmas: for frequency in frequencies: kernel = np.real(gabor_kernel(frequency, theta=theta, sigma_x=sigma, sigma_y=sigma)) c,d = kernel.shape if c<=a: z = np.zeros(self.patch_size) z[(a/2-c/2):(a/2-c/2+c),(b/2-d/2):(b/2-d/2+d)] = kernel else: z = kernel[(c/2-a/2):(c/2-a/2+a),(d/2-b/2):(d/2-b/2+b)] self.kernels.append(z.flatten()) class Bunch: def __init__(self, **kwds): self.__dict__.update(kwds) self.D = Bunch(components_ = np.vstack(self.kernels)) def extract_codes(self, X, n_components=16, zscore=True, log_amplitude=True, **mbl_args): """Given a spectrogram, learn a dictionary of 2D patch atoms from spectrogram data inputs: X - spectrogram data (frequency x time) n_components - how many components to extract [16] zscore - whether to zscore the ensemble of 2D patches [True] log_amplitude - whether to apply log(1+X) scaling of spectrogram data [True] **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None] outputs: self.data - 2D patches of input spectrogram self.D.components_ - dictionary of learned 2D atoms for sparse coding """ self._extract_data_patches(X, zscore, log_amplitude) self.n_components = n_components self.dico = MiniBatchDictionaryLearning(n_components=self.n_components, **mbl_args) print "Dictionary learning from data..." self.D = self.dico.fit(self.data) def plot_codes(self, cbar=False, show_axis=False, **kwargs): "plot the learned or generated 2D sparse code dictionary" N = int(np.ceil(np.sqrt(self.n_components))) kwargs.setdefault('cmap', plt.cm.gray_r) kwargs.setdefault('origin','bottom') kwargs.setdefault('interpolation','nearest') for i, comp in enumerate(self.D.components_): plt.subplot(N, N, i+1) plt.imshow(comp.reshape(self.patch_size), **kwargs) if cbar: plt.colorbar() if not show_axis: plt.axis('off') plt.xticks(()) plt.yticks(()) plt.title('%d'%(i)) plt.suptitle('Dictionary of Spectrum Patches\n', fontsize=14) plt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23) def extract_audio_dir_codes(self, dir_expr='/home/mkc/exp/FMRI/stimuli/Wav6sRamp/*.wav', **mbl_args): """apply dictionary learning to entire directory of audio files (requires LOTS of RAM) inputs: **mbl_args - keyword arguments for MiniBatchDictionaryLearning.fit(...) [None] """ flist=glob.glob(dir_expr) self.X = np.vstack([br.feature_scale(br.LogFrequencySpectrum(f, nbpo=24, nhop=1024).X,normalize=1).T for f in flist]).T self.D = extract_codes(self.X, **mbl_args) def _get_approximation_coefs(self, data, components): """utility function to fit dictionary components to data inputs: data - spectrogram data (frqeuency x time) [None] components - the dictionary components to fit to the data [None] """ w = np.array([self.omp.fit(components.T, d.T).coef_ for d in data]) return w def reconstruct_spectrum(self, w=None, randomize=False): """reconstruct by fitting current 2D dictionary to self.data inputs: w - per-component reconstruction weights [None=calculate weights] randomize - randomly permute components after getting weights [False] returns: self.X_hat - spectral reconstruction of self.data """ data = self.data components = self.D.components_ if w is None: self.w = self._get_approximation_coefs(data, components) w = self.w if randomize: components = np.random.permutation(components) recon = np.dot(w, components) if self.zscore: recon = recon * self.std recon = recon + self.mn recon = recon.reshape(-1, *self.patch_size) self.X_hat = reconstruct_from_patches_2d(recon, self.X.shape) if self.log_amplitude: self.X_hat = np.exp(self.X_hat) - 1.0 # invert log transform def reconstruct_individual_spectra(self, w=None, randomize=False, plotting=False, rectify=True, **kwargs): """fit each dictionary component to self.data inputs: w - per-component reconstruction weights [None=calculate weights] randomize - randomly permute components after getting weights [False] plotting - whether to subplot individual spectrum reconstructions [True] rectify- remove negative ("dark energy") from individual reconstructions [True] **kwargs - keyword arguments for plotting returns: self.X_hat_l - list of indvidual spectrum reconstructions per dictionary atom """ omp_args = {} self.reconstruct_spectrum(w, randomize, **omp_args) w, components = self.w, self.D.components_ self.X_hat_l = [] for i in range(len(self.w.T)): r=np.array((np.matrix(w)[:,i]*np.matrix(components)[i,:])).reshape(-1,*self.patch_size) X_hat = reconstruct_from_patches_2d(r, self.X.shape) if self.log_amplitude: X_hat = np.exp(X_hat) - 1.0 if rectify: # half wave rectification X_hat[X_hat<0] = 0 self.X_hat_l.append(X_hat) if plotting: self.plot_individual_spectra(**kwargs) def plot_individual_spectra(self, **kwargs): "plot individual spectrum reconstructions for self.X_hat_l" if self.X_hat_l is None: return plt.figure() rn = np.ceil(self.n_components**0.5) for k in range(self.n_components): plt.subplot(rn,rn,k+1) br.feature_plot(self.X_hat_l[k], nofig=1, **kwargs) plt.title('%d'%(k)) plt.suptitle('Component Reconstructions\n', fontsize=14)
def sparse_encode(self, n_nonzero_coefs): omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(self.dictionary, self.X_residual.T) new_code = omp.coef_.T return new_code
def sparse_code(Y, D, X = None): if X is None: y_cols, d_cols = Y.shape[1], D.shape[1] X = np.asmatrix(np.empty((d_cols, y_cols)) x_rows, x_cols = X.shape for k in range(x_cols): omp = OMP() omp.fit(D, y[:, k]) X[:,k] = np.asmatrix(omp.coef_).T return X """ Forms a matrix for a given vector x to enforce that the new update x will be sparse. Here N is the columns of Y. Returns the matrix omega. """ def form_omega(x, N): w = [] for i, x_i in enumerate(np.nditer(x)): if abs(x_i) > 0: w.append((i, x_i)) W = np.asmatrix(np.zeroes((N, len(w)) for w_i, i in w: W[w_i, i] = 1 return W """ Update the dictionary D and the matrix X (phase 2) """ def update_dictionary(Y, D, X): n, K = D.shape # Dhat = np.asmatrix(np.zeroes((n, K))) # Form E_k for k in range(K): j = 0 while j < K: if j != k: E_k = Y - D[:,j]*X[j,:] j += 1 else: j += 1 # Form E_kr to ensure that the update will be sparse. Call form_omega omega_k = form_omega(X[k,:]) E_kr = E_k * omega_k # Form SVD of E_kr and update matrices U, sig, V = np.linalg.svd(E_kr, full_matrices = True) x_kr = sig[0, 0]*V[0,:] # Dhat[k,:] = U[0,:] D[k,:] = U[0,:] # Dhat = D def main(): pass if __name__ == '__main__': main()