def OMP_cv(problem, **kwargs): r"""High level description. Requirements ------------ kwargs['choose'] must be a positive integer kwargs['coef_tolerance'] must be a nonnegative float Returns ------- output : tuple (optimum, maximum) """ data_list = [datum['data']['values'] for datum in problem.data] data = numpy.array(data_list) OMP = OrthogonalMatchingPursuitCV(max_iter=kwargs['choose']) OMP.fit(data.T, problem.goal['data']['values']) OMP_coefficients = OMP.coef_ optimum = [ problem.data[index] for index, element in enumerate(OMP_coefficients) if abs(element) > kwargs['coef_tolerance'] ] maximum = OMP.score(data.T, problem.goal['data']['values']) output = (optimum, maximum) return output
def plot_omp(): n_components, n_features = 512, 100 n_nonzero_coefs = 17 # generate the data # y = Xw # |x|_0 = n_nonzero_coefs y, X, w = make_sparse_coded_signal(n_samples=1, n_components=n_components, n_features=n_features, n_nonzero_coefs=n_nonzero_coefs, random_state=0) idx, = w.nonzero() # distort the clean signal y_noisy = y + 0.05 * np.random.randn(len(y)) # plot the sparse signal plt.figure(figsize=(7, 7)) plt.subplot(4, 1, 1) plt.xlim(0, 512) plt.title("Sparse signal") plt.stem(idx, w[idx], use_line_collection=True) # plot the noise-free reconstruction omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(X, y) coef = omp.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 2) plt.xlim(0, 512) plt.title("Recovered signal from noise-free measurements") plt.stem(idx_r, coef[idx_r], use_line_collection=True) # plot the noisy reconstruction omp.fit(X, y_noisy) coef = omp.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 3) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements") plt.stem(idx_r, coef[idx_r], use_line_collection=True) # plot the noisy reconstruction with number of non-zeros set by CV omp_cv = OrthogonalMatchingPursuitCV() omp_cv.fit(X, y_noisy) coef = omp_cv.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 4) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements with CV") plt.stem(idx_r, coef[idx_r], use_line_collection=True) plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38) plt.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit', fontsize=16) plt.show()
def test_omp_cv(): y_ = y[:, 0] gamma_ = gamma[:, 0] ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False, max_iter=10, cv=5) ompcv.fit(X, y_) assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs) assert_array_almost_equal(ompcv.coef_, gamma_) omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_) omp.fit(X, y_) assert_array_almost_equal(ompcv.coef_, omp.coef_)
def test_omp_cv(): # FIXME: This test is unstable on Travis, see issue #3190 for more detail. check_skip_travis() y_ = y[:, 0] gamma_ = gamma[:, 0] ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False, max_iter=10, cv=5) ompcv.fit(X, y_) assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs) assert_array_almost_equal(ompcv.coef_, gamma_) omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_) omp.fit(X, y_) assert_array_almost_equal(ompcv.coef_, omp.coef_)
class _OrthogonalMatchingPursuitCVImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def predict(self): """ trains the scikit-learn python machine learning algorithm library function https://scikit-learn.org then passes the trained algorithm the features set and returns the predicted y test values form, the function then compares the y_test values from scikit-learn predicted to y_test values passed in then returns the accuracy """ n_nonzero_coefs = 17 algorithm = OrthogonalMatchingPursuitCV() algorithm.fit(self.X_train, self.y_train) y_pred = list(algorithm.predict(self.X_test)) self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test) return self.acc
def _ompcv(*, train, test, x_predict=None, metrics, copy=True, fit_intercept=True, normalize=True, max_iter=None, cv=None, n_jobs=None, verbose=False): """For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.OrthogonalMatchingPursuitCV.html#sklearn.linear_model.OrthogonalMatchingPursuitCV """ model = OrthogonalMatchingPursuitCV(fit_intercept=fit_intercept, copy=copy, normalize=normalize, max_iter=max_iter, cv=cv, n_jobs=n_jobs, verbose=verbose) model.fit(train[0], train[1]) model_name = 'OrthogonalMatchingPursuitCV' y_hat = model.predict(test[0]) if metrics == 'mse': accuracy = _mse(test[1], y_hat) if metrics == 'rmse': accuracy = _rmse(test[1], y_hat) if metrics == 'mae': accuracy = _mae(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def get_feature_coefficients(self, norm_prior=1): """ get feature coefficients using linear regression. Linear models penalized with the L1 norm have sparse solutions: many of their estimated coefficients are zero. Args: norm_prior: 1 for L1-norm as default. use L0 to get the sparsest result. """ model = None alphas = np.logspace(-4, -0.5, 30) tuned_parameters = [{'alpha': alphas}] coefficient_value = None if norm_prior == 0: # L0-norm model = OrthogonalMatchingPursuitCV() model.fit(self.X_df.values, self.y_df.values) coefficient_value = model.coef_ elif norm_prior == 1: # L1-norm # Lasso lasso = Lasso(random_state=0) n_folds = 3 gridsearch = GridSearchCV(lasso, tuned_parameters, cv=n_folds, refit=False) gridsearch.fit(self.X_df.values, self.y_df.values) coefficient_value = gridsearch.best_estimator_.coef_ elif norm_prior == 2: # L2-norm # Ridge ridge = Ridge(random_state=0) n_folds = 3 gridsearch = GridSearchCV(ridge, tuned_parameters, cv=n_folds, refit=False) gridsearch.fit(self.X_df.values, self.y_df.values) coefficient_value = gridsearch.best_estimator_.coef_ else: print("invalid norm!") self.coef_ = coefficient_value return coefficient_value
pl.subplot(4, 1, 2) pl.xlim(0, 512) pl.title("Recovered signal from noise-free measurements") pl.stem(idx_r, coef[idx_r]) # plot the noisy reconstruction ############################### omp.fit(X, y_noisy) coef = omp.coef_ idx_r, = coef.nonzero() pl.subplot(4, 1, 3) pl.xlim(0, 512) pl.title("Recovered signal from noisy measurements") pl.stem(idx_r, coef[idx_r]) # plot the noisy reconstruction with number of non-zeros set by CV ################################################################## omp_cv = OrthogonalMatchingPursuitCV() omp_cv.fit(X, y_noisy) coef = omp_cv.coef_ idx_r, = coef.nonzero() pl.subplot(4, 1, 4) pl.xlim(0, 512) pl.title("Recovered signal from noisy measurements with CV") pl.stem(idx_r, coef[idx_r]) pl.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38) pl.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit', fontsize=16) pl.show()
# the constraint condition x_i = 1 for i in xrange(n_features): x[i] = 1.0 # x = np.arange(1,n_features+1,dtype = float) G = np.dot(A_.T, A_) # b = np.zeros((n_features)) b = np.dot(A, x) + np.random.rand(n_samples) * 0.1 # A^X^ = b^ b_head = np.zeros((n_samples, )) A_head = np.eye(n_samples, n_features) omp = OrthogonalMatchingPursuitCV() # omp.fit (A_head, b_head) omp.fit(A, b) coef = omp.coef_ # coef = omp.n_nonzeros_coefs # idx_r, = coef.nonzero() # print coef.shape idx_r, = coef.nonzero() print "the index of the nonzero x_i" print idx_r print "the nonzeros solution of the AX = b" print coef[idx_r] print "\n" print "all the solutions" print coef print "\n"
def RunMP(aligned_data_root_path, output_path): do_compute_individual_k_motifs = True do_compute_anchored_chains = False do_compute_semantic_segmentation = False do_compute_multimodal_mp = False window_size = 1300 #window_size = 1500 data_dict = LoadAlignedTILESData(aligned_data_root_path) #plt.ion() pids = list(data_dict.keys())[0:1] streams = ['HeartRatePPG', 'StepCount'] # Compute motifs from the individual MP using a greedy method if do_compute_individual_k_motifs: num_motifs = 2 for pid in pids: fitbit_df = data_dict[pid]['fitbit'] fitbit_df = fitbit_df.iloc[0:10000, :] # HACK for stream in streams: exclusion_signal = fitbit_df[stream].copy() # Keep a NaN'd version for MP and interpolated one for OMP #nan_replace_value = -1000000 #fitbit_df[stream][np.isnan(fitbit_df[stream])] = nan_replace_value #fitbit_df_smooth = fitbit_df[stream].interpolate(method='linear', axis=0, inplace=False) #fitbit_df_smooth = fitbit_df[stream].copy() fitbit_df_smooth = exclusion_signal.copy() if np.isnan(fitbit_df_smooth[0] ): # Fill NaNs at the beginning and end idx = 0 while np.isnan(fitbit_df_smooth[idx]): idx += 1 fitbit_df_smooth[0:idx] = fitbit_df_smooth[idx] if np.isnan(fitbit_df_smooth[fitbit_df_smooth.shape[0] - 1]): idx = fitbit_df_smooth.shape[0] - 1 while np.isnan(fitbit_df_smooth[idx]): idx -= 1 fitbit_df_smooth[idx:] = fitbit_df_smooth[idx] # Use Matrix Profile methods to learn a motif dictionary motifs = [] while len(motifs) < num_motifs: #fitbit_mp = stumpy.stump(fitbit_df[stream], m=window_size) # TODO - use the exclusion_signal fitbit_mp = stumpy.stump( exclusion_signal, m=window_size) # TODO - use the exclusion_signal fitbit_mp_argsort = np.array(fitbit_mp[:, 0]).argsort() for motif_idx in range(len(fitbit_mp_argsort)): stream_motif_idx = fitbit_mp_argsort[motif_idx] num_nan = np.sum( np.isnan(exclusion_signal. values[stream_motif_idx:stream_motif_idx + window_size])) # Avoid finding bad motifs if num_nan >= 5.0 * window_size / 6.0: continue if stream == 'HeartRatePPG': pass break motif_left_idx = fitbit_mp_argsort[motif_idx] motif = fitbit_df_smooth[motif_left_idx:motif_left_idx + window_size] motif[motif == 0] = 1e-12 # OMP requires non-zeros in the support motifs.append(motif) plt.plot(range(motif_left_idx, motif_left_idx + window_size), motifs[-1], 'g-', linewidth=5) # Build a redundant dictionary from the motifs num_repetitions = len(fitbit_df_smooth) - window_size dictionary_mat = csr_matrix( (len(motifs) * num_repetitions, len(fitbit_df_smooth))) for motif_idx in range(len(motifs)): motif_values = motifs[motif_idx].values for repeat_idx in range(num_repetitions): # SLOW: TODO - find better way of generating this matrix. Maybe I can change the sparse encoding directly and just push extra zeros in front of the motif sequence? Better yet, why not abandon the matrix representation and just use a list of motifs and their starting index in the signal dictionary_mat[motif_idx * num_repetitions + repeat_idx, repeat_idx:repeat_idx + window_size] = motif_values # Reconstruct the signal using the motif dictionary # TODO : Write my own OMP with exclusion of each atom's support. Gram mat? # TODO : Use L1 optimization (Lasso)? #omp = OrthogonalMatchingPursuit(n_nonzero_coefs=2, fit_intercept=False) omp = OrthogonalMatchingPursuitCV(fit_intercept=False) omp.fit(dictionary_mat.T, fitbit_df_smooth) intercept = omp.intercept_ coef = omp.coef_ idx_r = coef.nonzero() num_nonzero = omp.n_nonzero_coefs_ #max_nonzero = 20 #skip_nan_percent = 0.1 #coef = np.zeros((dictionary_mat.T.shape[1],1)) #intercept = np.zeros((dictionary_mat.T.shape[0],1)) #for num_nonzero in range(1,max_nonzero+1): # # Reconstruct the signal using the motif dictionary # best_dict_idx = -1 # best_error = np.inf # best_dict_support = None # for dict_idx in range(dictionary_mat.shape[0]): # # SLOW # dict_vec = dictionary_mat[dict_idx,:].toarray().reshape(-1,) # # Find the support # left_support_idx = 0 # right_support_idx = len(dict_vec)-1 # while dict_vec[left_support_idx] == 0 and left_support_idx < len(dict_vec): # left_support_idx += 1 # while dict_vec[right_support_idx] == 0 and right_support_idx >= 0: # right_support_idx -= 1 # # Skip mostly NaN regions # if np.sum(np.isnan(exclusion_signal[left_support_idx:right_support_idx+1])) > skip_nan_percent*(right_support_idx-left_support_idx+1): # continue # # Find the best match # residual = exclusion_signal[left_support_idx:right_support_idx+1] - dict_vec[left_support_idx:right_support_idx+1] # np.nan_to_num(residual, copy=False) # Replace NaN with zero # error = np.dot(residual, residual) # if error < best_error: # best_error = error # coef_val = 1 # TODO - constrain between 0.5 and 2? # best_dict_idx = dict_idx # best_dict_support = (left_support_idx, right_support_idx) # if best_dict_idx < 0: # print("No best next dictionary element found") # break # # Update coef # coef_nonzero = (coef != 0).reshape(-1,) # if np.sum(coef_nonzero) > 0: # dictionary_mat_reduced = dictionary_mat[coef_nonzero, :] # coef_reduced = coef[coef_nonzero] # #prev_fit_signal = np.matmul(dictionary_mat.T, coef) # prev_fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(), coef_reduced) # prev_residual = fitbit_df_smooth - prev_fit_signal.reshape(-1,) # np.nan_to_num(prev_residual, copy=False) # Replace NaN with zero # prev_error = np.dot(prev_residual, prev_residual) # coef[best_dict_idx] = coef_val # #fit_signal = np.matmul(dictionary_mat.T, coef) # fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(), coef_reduced) # fit_residual = fitbit_df_smooth - fit_signal.reshape(-1,) # np.nan_to_num(fit_residual, copy=False) # Replace NaN with zero # fit_error = np.dot(fit_residual, fit_residual) # else: # prev_residual = fitbit_df_smooth- np.zeros(len(fitbit_df_smooth)) # np.nan_to_num(prev_residual, copy=False) # Replace NaN with zero # prev_error = np.dot(prev_residual, prev_residual) # coef[best_dict_idx] = coef_val # coef_nonzero = (coef != 0).reshape(-1,) # dictionary_mat_reduced = dictionary_mat[coef_nonzero, :] # coef_reduced = coef[coef_nonzero] # fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(), coef_reduced) # fit_residual = fitbit_df_smooth - fit_signal.reshape(-1,) # np.nan_to_num(fit_residual, copy=False) # Replace NaN with zero # fit_error = np.dot(fit_residual, fit_residual) # if best_dict_support is not None: # exclusion_signal[best_dict_support[0]:best_dict_support[1]+1] = np.inf # if prev_error < fit_error: # print("Avoiding overfitting...") # coef[best_dict_idx,0] = 0 # break coef_nonzero = (coef != 0).reshape(-1, ) dictionary_mat_reduced = dictionary_mat[coef_nonzero, :] coef_reduced = coef[coef_nonzero] fit_signal = np.matmul(dictionary_mat_reduced.T.toarray(), coef_reduced) + intercept plt.plot(range(fitbit_df[stream].shape[0]), fitbit_df[stream], 'b-') #plt.plot(range(fitbit_df_smooth.shape[0]), fitbit_df_smooth, 'k-') plt.plot(range(fitbit_df[stream].shape[0]), fit_signal, 'r--') plt.title('OMP (%d coefs) + MP Motifs (%d motifs)' % (num_nonzero, num_motifs)) plt.xlabel('Time') plt.ylabel(stream) plt.show() return pdb.set_trace() # Compute individual matrix profiles (stump) if do_compute_anchored_chains or do_compute_semantic_segmentation: for pid in pids: fitbit_df = data_dict[pid]['fitbit'] for stream in streams: fitbit_mp = stumpy.stump(fitbit_df[stream], m=window_size) if do_compute_anchored_chains: left_mp_idx = fitbit_mp[:, 2] right_mp_idx = fitbit_mp[:, 3] #atsc_idx = 10 #anchored_chain = stumpy.atsc(left_mp_idx, right_mp_idx, atsc_idx) all_chain_set, unanchored_chain = stumpy.allc( left_mp_idx, right_mp_idx) if do_compute_semantic_segmentation: subseq_len = window_size correct_arc_curve, regime_locations = stumpy.fluss( fitbit_mp[:, 1], L=subseq_len, n_regimes=2, excl_factor=5) # Find the first motif with nearly no NaN values in the stream signal fitbit_mp_argsort = np.array(fitbit_mp[:, 0]).argsort() for motif_idx in range(len(fitbit_mp_argsort)): stream_motif_idx = fitbit_mp_argsort[motif_idx] num_nan = np.sum( np.isnan(fitbit_df[stream]. values[stream_motif_idx:stream_motif_idx + window_size])) # Avoid finding bad motifs if num_nan >= 5.0 * window_size / 6.0: continue if stream == 'HeartRatePPG': pass # Check for flat heart rate #nan_like_value = 70 #num_valid = np.count_nonzero((fitbit_df[stream] - nan_like_value)[stream_motif_idx:stream_motif_idx+window_size]) #if num_valid < window_size - 2: # continue # Check for linear heart rate over time #residual_threshold = window_size*(4.0**2) #p, res, rank, sing_vals, rcond = np.polyfit(range(window_size), fitbit_df[stream][stream_motif_idx:stream_motif_idx+window_size], deg=1, full=True) #if res < residual_threshold: # continue break num_subplots = 3 if do_compute_semantic_segmentation else 2 fig, axs = plt.subplots(num_subplots, sharex=True, gridspec_kw={'hspace': 0}) plt.suptitle('Matrix Profile, %s, PID: %s' % (stream, pid), fontsize='30') axs[0].plot(fitbit_df[stream].values) rect = plt.Rectangle((fitbit_mp_argsort[motif_idx], 0), window_size, 2000, facecolor='lightgrey') axs[0].add_patch(rect) rect = plt.Rectangle((fitbit_mp_argsort[motif_idx + 1], 0), window_size, 2000, facecolor='lightgrey') axs[0].add_patch(rect) axs[0].set_ylabel(stream, fontsize='20') axs[1].plot(fitbit_mp[:, 0]) axs[1].axvline(x=fitbit_mp_argsort[motif_idx], linestyle="dashed") axs[1].axvline(x=fitbit_mp_argsort[motif_idx + 1], linestyle="dashed") axs[1].set_ylabel('Matrix Profile', fontsize='20') if do_compute_anchored_chains: for i in range(unanchored_chain.shape[0]): y = fitbit_df[stream].iloc[ unanchored_chain[i]:unanchored_chain[i] + window_size] x = y.index.values axs[0].plot(x, y, linewidth=3) if do_compute_semantic_segmentation: axs[2].plot(range(correct_arc_curve.shape[0]), correct_arc_curve, color='C1') axs[0].axvline(x=regime_locations[0], linestyle="dashed") axs[2].axvline(x=regime_locations[0], linestyle="dashed") plt.show() # Compute multi-dimensional matrix profiles (mstump) if do_compute_multimodal_mp: for pid in pids: fitbit_df = data_dict[pid]['fitbit'] data = fitbit_df.loc[:, streams].values mp, mp_indices = stumpy.mstump(data.T, m=window_size) #print("Stumpy's mstump function does not handle NaN values. Skipping multi-dimensional MP") #break # TODO - This code is copied from above. Fix and finish it once mstump supports NaN # Find the first motif with nearly no NaN values in the stream signal fitbit_mp_argsort = np.array(fitbit_mp[:, 0]).argsort() for motif_idx in range(len(fitbit_mp_argsort)): stream_motif_idx = fitbit_mp_argsort[motif_idx] num_nan = np.sum( np.isnan(fitbit_df[stream]. values[stream_motif_idx:stream_motif_idx + window_size])) # Avoid finding bad motifs if num_nan >= 2: continue if stream == 'HeartRatePPG': # Check for flat heart rate nan_like_value = 70 num_valid = np.count_nonzero( (fitbit_df[stream] - nan_like_value)[stream_motif_idx:stream_motif_idx + window_size]) if num_valid < window_size - 2: continue # Check for linear heart rate over time residual_threshold = window_size * (4.0**2) p, res, rank, sing_vals, rcond = np.polyfit( range(window_size), fitbit_df[stream][stream_motif_idx:stream_motif_idx + window_size], deg=1, full=True) if res < residual_threshold: continue break fig, axs = plt.subplots(2, sharex=True, gridspec_kw={'hspace': 0}) plt.suptitle('Matrix Profile, %s, PID: %s' % (stream, pid), fontsize='30') axs[0].plot(fitbit_df[stream].values) rect = plt.Rectangle((fitbit_mp_argsort[motif_idx], 0), window_size, 2000, facecolor='lightgrey') axs[0].add_patch(rect) rect = plt.Rectangle((fitbit_mp_argsort[motif_idx + 1], 0), window_size, 2000, facecolor='lightgrey') axs[0].add_patch(rect) axs[0].set_ylabel(stream, fontsize='20') axs[1].plot(fitbit_mp[:, 0]) axs[1].axvline(x=fitbit_mp_argsort[motif_idx], linestyle="dashed") axs[1].axvline(x=fitbit_mp_argsort[motif_idx + 1], linestyle="dashed") axs[1].set_ylabel('Matrix Profile', fontsize='20') plt.show() plt.ioff() plt.figure() plt.plot() plt.title('Dummy plot') plt.show() return
def OMP(Xtrain, Ytrain, OMP_options={}, *args, **kwargs): OMPModel = OrthogonalMatchingPursuitCV(**OMP_options) OMPModel.fit(Xtrain, Ytrain) return OMPModel
def getActionFromSparseRecovery(self, Qtable,SparseRecFlag,w,trainingOn): n_nonzero_coefs =31 if SparseRecFlag==0: Train_index = np.where(Qtable<0) R=Qtable[Train_index] #Phi = [] for i in range(len(Train_index[0])): State_array = self.stateNum2Info(Train_index[0][i]) Action_arr = self.ActionNum2Info(Train_index[1][i]) '''[self.TTNV, self.speed, self.AdjFreeLane, self.RLane, self.Turn, self.Lane, self.Location, self.DTAC, self.SlowerTNV, self.FasterTNV, self.DistanceToCrossCar, self.IntersectionOpen, 1]#self.Priority]''' Psi= np.array([]) Psi = np.concatenate((Psi,State_array[0:2])) for j in range(2,7): Psi = np.concatenate((Psi,self.convertToOneHot(np.array([State_array[j],agent_optionCounts[j]-1]))[0])) Psi = np.concatenate((Psi, State_array[7:11])) Psi = np.concatenate((Psi, self.convertToOneHot(np.array([State_array[11],agent_optionCounts[11]-1]))[0])) for k in range(3): Psi = np.concatenate((Psi,self.convertToOneHot(np.array([Action_arr[k],2]))[0])) if i==0 : Phi = Psi else: Phi = np.vstack((Phi,Psi)) omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) omp.fit(Phi, R) w = omp.coef_ idx_r, = w.nonzero() omp_cv = OrthogonalMatchingPursuitCV(copy=True, fit_intercept=True, normalize=True, max_iter=n_nonzero_coefs, cv=None, n_jobs=1, verbose=False) omp_cv.fit(Phi, R) w1 = omp_cv.coef_ idx_r_cv, = w1.nonzero() # return w #use to predict the other columns ActionpArr = np.ravel(np.where(Qtable[self.state,:]==np.amax(Qtable[self.state,:]))) assert(len(ActionpArr) >= 1) if trainingOn: ActionpInd = np.random.choice(ActionpArr) else: ActionpInd = ActionpArr[0] if Qtable[self.state, ActionpInd]== 0 : for i in np.ravel(np.where(Qtable[self.state,:]==0)): State_array = np.array(self.stateNum2Info(int(self.state))) if self.Location ==0 or self.Location==2: action = i elif self.Location ==1: action = i + 9 else: action = i + 18 Action_arr = self.ActionNum2Info(action) '''[self.TTNV, self.speed, self.AdjFreeLane, self.RLane, self.Turn, self.Lane, self.Location, self.DTAC, self.SlowerTNV, self.FasterTNV, self.DistanceToCrossCar, self.IntersectionOpen, 1]#self.Priority]''' Psi= np.array([]) Psi = np.concatenate((Psi,State_array[0:2])) for j in range(2,7): Psi = np.concatenate((Psi,self.convertToOneHot(np.array([State_array[j],agent_optionCounts[j]-1]).astype(int))[0])) Psi = np.concatenate((Psi, State_array[7:11])) Psi = np.concatenate((Psi, self.convertToOneHot(np.array([State_array[11],agent_optionCounts[11]-1]).astype(int))[0])) for k in range(3): Psi = np.concatenate((Psi,self.convertToOneHot(np.array([Action_arr[k],2]))[0])) print w print Psi Qtable[self.state , i ]= np.dot(Psi,w) ActionpArr = np.ravel(np.where(Qtable[self.state,:]==np.amax(Qtable[self.state,:]))) assert(len(ActionpArr) >= 1) if trainingOn: ActionpInd = np.random.choice(ActionpArr) else: ActionpInd = ActionpArr[0] if Qtable[self.state, ActionpInd]== 0: self.NotTrainedFlag = True return [w , ActionpInd]
test_Y_pred = omp.predict(test_X) print "测试集得分:", omp.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred) tss, rss, ess, r2 = xss(Y, omp.predict(X)) print "TSS(Total Sum of Squares): ", tss print "RSS(Residual Sum of Squares): ", rss print "ESS(Explained Sum of Squares): ", ess print "R^2: ", r2 print "\n**********测试OrthogonalMatchingPursuitCV类**********" ompCV = OrthogonalMatchingPursuitCV(cv=5) # 拟合训练集 ompCV.fit(train_X, train_Y.values.ravel()) # 打印最好的n_nonzero_coefs值 print "最好的n_nonzero_coefs值: ", ompCV.n_nonzero_coefs_ # 打印模型的系数 print "系数:", ompCV.coef_ print "截距:", ompCV.intercept_ print '训练集R2: ', r2_score(train_Y, ompCV.predict(train_X)) # 对于线性回归模型, 一般使用均方误差(Mean Squared Error,MSE)或者 # 均方根误差(Root Mean Squared Error,RMSE)在测试集上的表现来评该价模型的好坏. test_Y_pred = ompCV.predict(test_X) print "测试集得分:", ompCV.score(test_X, test_Y) print "测试集MSE:", mean_squared_error(test_Y, test_Y_pred) print "测试集RMSE:", np.sqrt(mean_squared_error(test_Y, test_Y_pred)) print "测试集R2:", r2_score(test_Y, test_Y_pred)
idx_r, = coef.nonzero() plt.subplot(4, 1, 2) plt.xlim(0, 512) plt.title("Recovered signal from noise-free measurements") plt.stem(idx_r, coef[idx_r]) # plot the noisy reconstruction ############################### omp.fit(X, y_noisy) coef = omp.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 3) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements") plt.stem(idx_r, coef[idx_r]) # plot the noisy reconstruction with number of non-zeros set by CV ################################################################## omp_cv = OrthogonalMatchingPursuitCV() omp_cv.fit(X, y_noisy) coef = omp_cv.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 4) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements with CV") plt.stem(idx_r, coef[idx_r]) plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38) plt.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit', fontsize=16) plt.show()
opm = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs) opm.fit(X, y) coef = opm.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 2) plt.xlim(0, 512) plt.title("Recovered signal from noise-free measurements") plt.stem(idx_r, coef[idx_r]) opm.fit(X, y_noise) coef = opm.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 3) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements") plt.stem(idx_r, coef[idx_r]) opm_cv = OrthogonalMatchingPursuitCV() opm_cv.fit(X, y_noise) coef = opm_cv.coef_ idx_r, = coef.nonzero() plt.subplot(4, 1, 4) plt.xlim(0, 512) plt.title("Recovered signal from noisy measurements with CV") plt.stem(idx_r, coef[idx_r]) plt.subplots_adjust(0.06, 0.04, 0.94, 0.90, 0.20, 0.38) plt.suptitle('Sparse signal recovery with Orthogonal Matching Pursuit', fontsize=16) plt.show()