def make_lasso_cv(): lasso = Lasso(random_state=0) #X=StandardScaler().fit_transform(all_training_data) X=all_training_data y=train_labels scores = list() scores_std = list() n_folds = 3 for alpha in alphas: lasso.alpha = alpha this_scores = cross_val_score(lasso, X, y, cv=n_folds, n_jobs=1) scores.append(np.mean(this_scores)) scores_std.append(np.std(this_scores)) clf = Lasso(random_state=0) clf.alpha = alpha clf.fit(X,y) print(clf.coef_) make_prediction(clf, all_training_data, train_labels) scores, scores_std = np.array(scores), np.array(scores_std) plt.figure().set_size_inches(8, 6) plt.semilogx(alphas, scores) # plot error lines showing +/- std. errors of the scores std_error = scores_std / np.sqrt(n_folds) plt.semilogx(alphas, scores + std_error, 'b--') plt.semilogx(alphas, scores - std_error, 'b--') # alpha=0.2 controls the translucency of the fill color plt.fill_between(alphas, scores + std_error, scores - std_error, alpha=0.2) plt.ylabel('CV score +/- std error') plt.xlabel('alpha') plt.axhline(np.max(scores), linestyle='--', color='.5') print(scores.argmax(axis=0)) plt.xlim([alphas[0], alphas[-1]]) #TRAIN STATISTICS #[ 5.78095079e-08 0.00000000e+00 -6.66919151e-10 0.00000000e+00 #-1.46713844e-04] #R2 score: 0.000554848134686 #Mean square error for model: 0.000379071509183 #TEST STATISTICS #[ 8.66518676e-08 0.00000000e+00 -5.82401268e-10 0.00000000e+00 #-1.04665831e-04] #R2 score: -0.00600409234752 #Mean square error for model: 0.000360113552396 plt.show()
def compare(X, y, ringe_alpha, lasso_alpha, k, plot): kf = KFold(n_splits=10) kf.get_n_splits(X) knn_errors = [] ridge_errors = [] lasso_errors = [] for train_index, test_index in kf.split(X): X_train, X_test = X.iloc[train_index], X.iloc[test_index] y_train, y_test = y.iloc[train_index], y.iloc[test_index] knn = KNeighborsClassifier(n_neighbors=k) knn.fit(X_train, y_train) pred_y = knn.predict(X_test) knn_errors.append(mean_squared_error(y_true=y_test, y_pred=pred_y)) lasso = Lasso(normalize=True) lasso.alpha = lasso_alpha lasso.fit(X_train, y_train) pred_y = lasso.predict(X_test) lasso_errors.append(mean_squared_error(y_true=y_test, y_pred=pred_y)) ridge = Ridge(normalize=True) ridge.alpha = ringe_alpha ridge.fit(X_train, y_train) pred_y = ridge.predict(X_test) ridge_errors.append(mean_squared_error(y_true=y_test, y_pred=pred_y)) if plot: plt.plot([0, 1, 2], [np.mean(knn_errors), np.mean(ridge_errors), np.mean(lasso_errors)], 'ro') plt.title("Comparison") plt.xlabel('models (knn - 0, ridge - 2, lasso - 3)') plt.ylabel('MSE') # plt.xscale('log') plt.show() return np.mean(knn_errors), np.mean(ridge_errors), np.mean(lasso_errors)
def process_optimized_lasso(data): c_alpha = 0.001 step = 0.01 max_alpha = 20 min_mean_sqr_error = 10000000 max_r2_score = 0 global optimized_lasso_alpha while c_alpha <= max_alpha: model = Lasso() model.alpha = c_alpha model.fit(data["X_train"], data["y_train"]) predicted_values = model.predict(data["X_test"]) mean_sqr_error = mean_squared_error(data["y_test"], predicted_values) r2_score_calc = r2_score(data["y_test"], predicted_values) if max_r2_score < abs(r2_score_calc): min_mean_sqr_error = mean_sqr_error max_r2_score = r2_score_calc optimized_lasso_alpha = c_alpha c_alpha = c_alpha + step return { "name": "LASSO", "data": { "alpha": optimized_lasso_alpha }, "mean_sqr_err": min_mean_sqr_error, "r2_score": max_r2_score }
def RunLinearRegression(X, Y, X_test): from sklearn.linear_model import LinearRegression, Lasso # model = LinearRegression() alphas = np.logspace(-4, -1, 6) model = Lasso().set_params(alpha=alphas) scores = [ model.set_params(alpha=alpha).fit(X, Y).score(X, Y) for alpha in alphas ] best_alpha = alphas[scores.index(max(scores))] model.alpha = best_alpha model.fit(X, Y) print("Training score: ", model.score(X, Y) * 100, "%") runCrossValidation(model, X, Y, X_test) Y_test = model.predict(X_test) return Y_test, 'linear_regression'
def process_optimized_lasso_step2(data): model = Lasso() model.alpha = optimized_lasso_alpha model.fit(data["X_train"], data["y_train"]) predicted_values = model.predict(data["X_test"]) mean_sqr_error = mean_squared_error(data["y_test"], predicted_values) r2_score_calc = r2_score(data["y_test"], predicted_values) return { "name": "LASSO", "data": { "alpha": optimized_lasso_alpha }, "mean_sqr_err": mean_sqr_error, "r2_score": r2_score_calc }
def graphGen(n_folds): start_time = time.time() dataaa = datasets.load_boston( ) #BOSTON HOUSE PRICES FROM THE INSTITUTE OF () #OR USE THIS FOR DIABETES DATA #dataaa = datasets.load_diabetes() X = dataaa.data[:150] y = dataaa.target[:150] lasso = Lasso(random_state=0) allAlphas = np.logspace(-4, -0.5, 30) scores = list() stdrdScores = list() for alpha in allAlphas: lasso.alpha = alpha this_scores = cross_val_score(lasso, X, y, cv=n_folds, n_jobs=1) scores.append(np.mean(this_scores)) stdrdScores.append(np.std(this_scores)) scores, stdrdScores = np.array(scores), np.array(stdrdScores) plt.figure().set_size_inches(8, 6) plt.semilogx(allAlphas, scores) standarderror = stdrdScores / np.sqrt(n_folds) plt.semilogx(allAlphas, scores + standarderror, 'b--') plt.semilogx(allAlphas, scores - standarderror, 'b--') plt.fill_between(allAlphas, scores + standarderror, scores - standarderror, alpha=0.2, color='red') plt.ylabel( 'Score +- standard error, which ideally approaches 0 and doesnt deviate away from 0 as alpha does.' ) plt.xlabel('alpha') plt.axhline(np.max(scores), linestyle='--', color='.8') plt.xlim([allAlphas[0], allAlphas[-1]]) plt.savefig(str(n_folds) + ".png") print(str(n_folds) + " takes " + (time.time() - start_time))
def calc_lasso(X, y, alphas, plot): kf = KFold(n_splits=10) kf.get_n_splits(X) mses = [] for alpha in alphas: errors = [] for train_index, test_index in kf.split(X): X_train, X_test = X.iloc[train_index], X.iloc[test_index] y_train, y_test = y.iloc[train_index], y.iloc[test_index] lasso = Lasso(normalize=True) lasso.alpha = alpha lasso.fit(X_train, y_train) pred_y = lasso.predict(X_test) errors.append(mean_squared_error(y_true=y_test, y_pred=pred_y)) mses.append(np.mean(errors)) if plot: plt.plot(alphas, mses, 'ro') plt.title("MSE for different alpha levels for Lasso Regression") plt.xlabel('alpha') plt.ylabel('MSE') plt.xscale('log') plt.show() return mses
poly = PolynomialFeatures(degree=10) modified_xTrain = poly.fit_transform(xTrain) modified_xTest = poly.fit_transform(xTest) train_err = [] test_err = [] lamda_vals = [] reg_weights = [] reg = Lasso(normalize=True) for i in range(1, 11, 1): lamda = i * 0.01 lamda_vals.append(lamda) reg.alpha = lamda #, tol=0.00000001, max_iter=10000) reg.fit(modified_xTrain, yTrain) reg_weights.append(reg.coef_) train_error = math.sqrt(mean_squared_error(yTrain, reg.predict(modified_xTrain))) test_error = math.sqrt(mean_squared_error(yTest, reg.predict(modified_xTest))) #print('For lamda=',lamda,': train_error=', train_error) #print('For lamda=',lamda,': test_error=', test_error) train_err.append(math.sqrt(mean_squared_error(yTrain, reg.predict(modified_xTrain)))) test_err.append(math.sqrt(mean_squared_error(yTest, reg.predict(modified_xTest)))) # In[46]: plt.title('Lasso Regression: RMSE vs Lamda') plt.xlabel('Lamda')
from sklearn.model_selection import cross_val_score diabetes = datasets.load_diabetes() X = diabetes.data[:150] y = diabetes.target[:150] lasso = Lasso(random_state=0) alphas = np.logspace(-4,-0.5,30) scores = list() scores_std =list() n_folds=3 for alpha in alphas: lasso.alpha = alpha this_scores = cross_val_score(lasso,X,y,cv=n_folds,n_jobs=1) scores.append(np.mean(this_scores)) scores_std.append(np.std(this_scores)) scores,scores_std = np.array(scores),np.array(scores_std) #plot goes following plt.figure().set_size_inches(8,6) plt.semilogx(alphas,scores) std_error = scores_std/np.sqrt(n_folds) plt.semilogx(alphas,scores+std_error,'b--') plt.semilogx(alphas,scores-std_error,'b--')
def channel_selection(inputs, module, sparsity=0.5, method='greedy'): """ 현재 모듈의 입력 채널중, 중요도가 높은 채널을 선택합니다. 기존의 output을 가장 근접하게 만들어낼 수 있는 입력 채널을 찾아냅니댜. :param inputs: torch.Tensor, input features map :param module: torch.nn.module, layer :param sparsity: float, 0 ~ 1 how many prune channel of output of this layer :param method: str, how to select the channel :return: list of int, indices of channel to be selected and pruned """ num_channel = inputs.size(1) # 채널 수 num_pruned = int(math.floor(num_channel * sparsity)) # 입력된 sparsity 에 맞춰 삭제되어야 하는 채널 수 if method == 'greedy': indices_pruned = [] while len(indices_pruned) < num_pruned: min_diff = 1e10 min_idx = 0 for idx in range(num_channel): if idx in indices_pruned: continue indices_try = indices_pruned + [idx] inputs_try = torch.zeros_like(inputs) inputs_try[:, indices_try, ...] = inputs[:, indices_try, ...] output_try = module(inputs_try) output_try_norm = output_try.norm(2) if output_try_norm < min_diff: min_diff = output_try_norm min_idx = idx indices_pruned.append(min_idx) indices_stayed = list( set([i for i in range(num_channel)]) - set(indices_pruned)) elif method == 'lasso': y = module(inputs) if module.bias is not None: # bias.shape = [N] bias_size = [1] * y.dim() # bias_size: [1, 1, 1, 1] bias_size[1] = -1 # [1, -1, 1, 1] bias = module.bias.view( bias_size) # bias.view([1, -1, 1, 1] = [1, N, 1, 1]) y -= bias # output feature 에서 bias 만큼을 빼줌 (y - b) else: bias = 0. y = y.view(-1).data.cpu().numpy() # flatten all of outputs y_channel_spread = [] for i in range(num_channel): x_channel_i = torch.zeros_like(inputs) x_channel_i[:, i, ...] = inputs[:, i, ...] y_channel_i = module(x_channel_i) - bias y_channel_spread.append(y_channel_i.data.view(-1, 1)) y_channel_spread = torch.cat(y_channel_spread, dim=1).cpu() alpha = 1e-7 solver = Lasso(alpha=alpha, warm_start=True, selection='random', random_state=0) # 원하는 수의 채널이 삭제될 때까지 alpha 값을 조금씩 늘려나감 alpha_l, alpha_r = 0, alpha num_pruned_try = 0 while num_pruned_try < num_pruned: alpha_r *= 2 solver.alpha = alpha_r solver.fit(y_channel_spread, y) num_pruned_try = sum(solver.coef_ == 0) # 충분하게 pruning 되는 alpha 를 찾으면, 이후 alpha 값의 좌우를 좁혀 나가면서 좀 더 정확한 alpha 값을 찾음 num_pruned_max = int(num_pruned * 1.1) while True: alpha = (alpha_l + alpha_r) / 2 solver.alpha = alpha solver.fit(y_channel_spread, y) num_pruned_try = sum(solver.coef_ == 0) if num_pruned_try > num_pruned_max: alpha_r = alpha elif num_pruned_try < num_pruned: alpha_l = alpha else: break # 마지막으로, lasso coeff를 index로 변환 indices_stayed = np.where(solver.coef_ != 0)[0].tolist() indices_pruned = np.where(solver.coef_ == 0)[0].tolist() else: raise NotImplementedError return indices_stayed, indices_pruned # 선택된 채널의 인덱스를 리턴
plt.show() #LASSO from sklearn.linear_model import Lasso alpha_space = np.logspace(-4, 0.5, 5) R2_test = [] R2_train = [] alpha_serie = [] lasso = Lasso(normalize = True) for alpha in alpha_space: lasso.alpha = alpha lasso.fit(X_train_std, y_train_std) y_train_pred = lasso.predict(X_train_std) y_test_pred = lasso.predict(X_test_std) print('alpha = %.4f'%alpha) print('\tIntercept:\t%.3f' % lasso.intercept_) # print('%.3f' % lasso.intercept_) for i in range (13): # print('%.3f' %(lasso.coef_[i])) print('\tSlope #%.0f:\t%.3f' %(i+1, lasso.coef_[i])) print('\tMSE train: %.3f, test: %.3f \tR^2 train: %.3f, test: %.3f' % (mean_squared_error(y_train_std, y_train_pred), mean_squared_error(y_test_std, y_test_pred), r2_score(y_train_std, y_train_pred),
Looking for best alpha parameter for Lasso estimator """ from sklearn import datasets from sklearn.linear_model import Lasso from sklearn.model_selection import GridSearchCV, train_test_split import pandas as pd import numpy as np diabetes = datasets.load_diabetes() X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names) y = pd.Series(diabetes.target) X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1) lasso = Lasso() gscv = GridSearchCV(lasso,{'alpha':np.logspace(-4,-0.5,30)}) gscv.fit(X_train, y_train) gscv_score = gscv.cv_results_['mean_test_score'] best_alpha = gscv.best_params_['alpha'] print('best alpha value is {}'.format(best_alpha)) lasso.alpha = best_alpha lasso.fit(X_train, y_train) lasso_score = lasso.score(X_test, y_test) print('best lasso score is {}'.format(lasso_score))
def channel_selection(inputs, layer, sparsity, method="lasso", data_format="channels_last"): """ 입력된 레이어의 출력 채널 중 프루닝 할 채널을 선택합니다. 이 함수를 통해 현재 레이어에서 프루닝 할 채널과 해당 채널을 만드는 필터를 선택한 후 해당 채널의 인덱스를 리턴합니다. 채널 선택을 위해, 다음 레이어의 input (X) 과 다음 레이어의 output (Y) 을 비교하게 됩니다. :param sparsity: float, 0 ~ 1 how many prune channel of output of this layer :param inputs: Tensor, input features map for next layer (corresponding to output of this layer) :param layer: module of the next layer (conv) :param method: str, how to select the channel :return: list of int, indices of filters to be pruned """ num_channel = inputs.shape[ -1] if data_format == "channels_last" else inputs.shape[ 1] # 채널 수, next_inputs -> NHWC num_pruned = int(math.floor(num_channel * sparsity)) # 입력된 sparsity 에 맞춰 삭제되어야 하는 채널 수 # lasso 방식의 channel selection if method == "lasso": if layer.use_bias: bias = layer.get_weights()[1].reshape((1, 1, 1, -1)) else: bias = np.zeros((1, 1, 1, -1)) outputs = layer(inputs).numpy() outputs = outputs - bias y = np.reshape(outputs, -1) x = [] for i in range(num_channel): inputs_channel_i = np.zeros_like(inputs) inputs_channel_i[:, :, :, i] = inputs[:, :, :, i] outputs_channel_i = layer(inputs_channel_i).numpy() outputs_channel_i = outputs_channel_i - bias x.append(np.reshape(outputs_channel_i, -1)) x = np.stack(x, axis=1) x = x[np.nonzero(y)] y = y[np.nonzero(y)] alpha = 1e-7 solver = Lasso(alpha=alpha, warm_start=True, selection='random', random_state=0) # 원하는 수의 채널이 삭제될 때까지 alpha 값을 조금씩 늘려나감 alpha_l, alpha_r = 0, alpha num_pruned_try = 0 while num_pruned_try < num_pruned: alpha_r *= 2 solver.alpha = alpha_r solver.fit(x, y) num_pruned_try = sum(solver.coef_ == 0) # 충분하게 pruning 되는 alpha 를 찾으면, 이후 alpha 값의 좌우를 좁혀 나가면서 좀 더 정확한 alpha 값을 찾음 num_pruned_max = int(num_pruned * 1.1) while True: alpha = (alpha_l + alpha_r) / 2 solver.alpha = alpha solver.fit(x, y) num_pruned_try = sum(solver.coef_ == 0) if num_pruned_try > num_pruned_max: alpha_r = alpha elif num_pruned_try < num_pruned: alpha_l = alpha else: break # 마지막으로, lasso coeff를 index로 변환 indices_stayed = np.where(solver.coef_ != 0)[0].tolist() indices_pruned = np.where(solver.coef_ == 0)[0].tolist() # greedy 방식의 channel selection elif method == "greedy": channels_norm = [] for i in range(num_channel): inputs_channel_i = np.zeros_like(inputs) inputs_channel_i[:, :, :, i] = inputs[:, :, :, i] outputs_channel_i = layer(inputs_channel_i).numpy() outputs_channel_i_norm = np.linalg.norm(outputs_channel_i) channels_norm.append(outputs_channel_i_norm) indices_pruned = np.argsort(channels_norm)[:num_pruned] mask = np.ones(num_channel, np.bool) mask[indices_pruned] = 0 indices_stayed = np.arange(num_channel)[mask].tolist() else: raise NotImplementedError return indices_pruned, indices_stayed # 선택된 채널의 인덱스를 리턴
def channel_selection(sparsity, output_feature, fn_next_output_feature, method='greedy'): """ select channel to prune with a given metric :param sparsity: float, pruning sparsity :param output_feature: torch.(cuda.)Tensor, output feature map of the layer being pruned :param fn_next_output_feature: function, function to calculate the next output feature map :param method: str 'greedy': select one contributed to the smallest next feature after another 'lasso': select pruned channels by lasso regression 'random': randomly select :return: list of int, indices of filters to be pruned """ num_channel = output_feature.size(1) num_pruned = int(math.floor(num_channel * sparsity)) if method == 'greedy': indices_pruned = [] while len(indices_pruned) < num_pruned: min_diff = 1e10 min_idx = 0 for idx in range(num_channel): if idx in indices_pruned: continue indices_try = indices_pruned + [idx] output_feature_try = torch.zeros_like(output_feature) output_feature_try[:, indices_try, ...] = output_feature[:, indices_try, ...] output_feature_try = fn_next_output_feature(output_feature_try) output_feature_try_norm = output_feature_try.norm(2) if output_feature_try_norm < min_diff: min_diff = output_feature_try_norm min_idx = idx indices_pruned.append(min_idx) elif method == 'lasso': next_output_feature = fn_next_output_feature(output_feature) num_el = next_output_feature.numel() next_output_feature = next_output_feature.data.view(num_el).cpu() next_output_feature_divided = [] for idx in range(num_channel): output_feature_try = torch.zeros_like(output_feature) output_feature_try[:, idx, ...] = output_feature[:, idx, ...] output_feature_try = fn_next_output_feature(output_feature_try) next_output_feature_divided.append( output_feature_try.data.view(num_el, 1)) next_output_feature_divided = torch.cat(next_output_feature_divided, dim=1).cpu() alpha = 5e-5 solver = Lasso(alpha=alpha, warm_start=True, selection='random') # first, try to find a alpha that provides enough pruned channels alpha_l, alpha_r = 0, alpha num_pruned_try = 0 while num_pruned_try < num_pruned: alpha_r *= 2 solver.alpha = alpha_r solver.fit(next_output_feature_divided, next_output_feature) num_pruned_try = sum(solver.coef_ == 0) # then, narrow down alpha to get more close to the desired number of pruned channels num_pruned_max = int(num_pruned * num_pruned_tolerate_coeff) while True: alpha = (alpha_l + alpha_r) / 2 solver.alpha = alpha solver.fit(next_output_feature_divided, next_output_feature) num_pruned_try = sum(solver.coef_ == 0) if num_pruned_try > num_pruned_max: alpha_r = alpha elif num_pruned_try < num_pruned: alpha_l = alpha else: break # finally, convert lasso coeff to indices indices_pruned = solver.coef_.nonzero()[0].tolist() elif method == 'random': indices_pruned = random.sample(range(num_channel), num_pruned) else: raise NotImplementedError return indices_pruned
beta, prediRidge = ridge_regression(x_train, x_test, y_train, val) mseRidge = mse(prediRidge, y_test) if (mseRidge <= MSE_R): alphaR = val Beta_R = beta pred_Ridge = prediRidge MSE_R = mseRidge #Print results print('\n--Ridge regression--') print('The best value for alpha = ', alphaR) print(tabulate(all_values(pred_Ridge, y_test))) #--------------Lasso Regression------------- from sklearn.linear_model import Lasso #Using Sklearn as I did in 1st project alphasL = np.logspace(-4, 5, 10) regr = Lasso() scores = [ regr.set_params(alpha=alpha).fit(x_train, y_train).score(x_test, y_test) for alpha in alphasL ] best_alpha = alphasL[scores.index(max(scores))] regr.alpha = best_alpha regr.fit(x_train, y_train) pred_Lasso = regr.predict(x_test) #Print results print('\n--Lasso regression--') print('The best value for alpha = ', best_alpha) print(tabulate(all_values(pred_Lasso, y_test)))
def oppgave_6(o=15, seed=4, test=True): # Load the terrain terrain = imread("{}SRTM_data_Norway_1.tif".format(image_path)) # Show the terrain plt.figure() plt.title('Terrain Norway 1, Original') plt.imshow(terrain, cmap='gray') plt.xlabel('X') plt.ylabel('Y') plt.show() #Pick out small square to analyze if test is set to True if test: #Pick out small square to analyze square_size = 100 x_shift = np.random.randint(0, 1801 - square_size) y_shift = np.random.randint(0, 3601 - square_size) terrain = terrain[y_shift:y_shift + square_size, x_shift:x_shift + square_size] plt.figure() plt.title('Terrain part 1, Original {} pt box'.format(square_size)) plt.imshow(terrain, cmap='gray') plt.xlabel('X') plt.ylabel('Y') plt.show() else: #Use settings determined by analysing small squares for analysis #on entire dataset. Attemting to rebuild the image from #a model based on evenly spaced datapoints #Set model parameters order = 15 #Ridge parameter lmd = 0.0001 #Lasso parameter alph = 0.0001 #Set the coarseness of the sample grid coarseness = 5 x_dimension_original = len(terrain[0, :]) y_dimension_original = len(terrain[:, 0]) x_dimension = x_dimension_original // coarseness y_dimension = y_dimension_original // coarseness terrain_points = np.zeros((y_dimension, x_dimension)) for x_axis in range(x_dimension): for y_axis in range(y_dimension): terrain_points[y_axis, x_axis] = terrain[y_axis * coarseness, x_axis * coarseness] #Create mesh grid for training data, selected points x = np.linspace(0, 1, x_dimension) y = np.linspace(0, 1, y_dimension) x_grid, y_grid = np.meshgrid(x, y) #Create meshgrid for original data x_original = np.linspace(0, 1, x_dimension_original) y_original = np.linspace(0, 1, y_dimension_original) x_grid_original, y_grid_original = np.meshgrid(x_original, y_original) #Flatten grids data = np.ravel(terrain_points) data_original = np.ravel(terrain) x = np.ravel(x_grid) y = np.ravel(y_grid) x_original = np.ravel(x_grid_original) y_original = np.ravel(y_grid_original) #Creates a scaler to normalize data scaler = MinMaxScaler() print("Running time: {} seconds".format(time() - t0)) #Normalizing data scaler.fit(data.reshape(-1, 1)) #Normalizing training data normalized_data = scaler.transform(data.reshape(-1, 1)) normalized_data = normalized_data[:, 0] #Normalizing original data --------not used? normalized_data_original = scaler.transform( data_original.reshape(-1, 1)) normalized_data_original = normalized_data_original[:, 0] #Initiate instances of the regressors linear_regression = LinearRegression() ridge_regression = Ridge(solver="svd", alpha=lmd) lasso_regression = Lasso(alpha=alph) print("Running time: {} seconds".format(time() - t0)) #Create training matrix A = design_matrix(order, x, y) #Remove intercept A = A[:, 1:] print("Running time: {} seconds".format(time() - t0)) #Create prediction matrix X_test = design_matrix(order, x_original, y_original) X_test = X_test[:, 1:] print("Running time: {} seconds".format(time() - t0)) #Make prediction using OLS model linear_regression.fit(A, normalized_data) rebuilt = linear_regression.predict(X_test) print("OLS MSE: ", MSE(normalized_data_original, rebuilt)) rebuilt = scaler.inverse_transform(rebuilt.reshape(-1, 1)) rebuilt = np.reshape(rebuilt, y_grid_original.shape) fig_rebuild = plt.figure(figsize=(9, 5)) ax1 = fig_rebuild.add_subplot(131) ax2 = fig_rebuild.add_subplot(132) ax3 = fig_rebuild.add_subplot(133) plt.title('Terrain Norway 1, rebuild') ax1.imshow(rebuilt, cmap='gray') plt.xlabel('X') plt.ylabel('Y') #Make prediction using Ridge model ridge_regression.fit(A, normalized_data) rebuilt = ridge_regression.predict(X_test) print("Ridge MSE: ", MSE(normalized_data_original, rebuilt)) rebuilt = scaler.inverse_transform(rebuilt.reshape(-1, 1)) print("Running time: {} seconds".format(time() - t0)) rebuilt = np.reshape(rebuilt, y_grid_original.shape) ax2.imshow(rebuilt, cmap='gray') #Make prediction using LASSO model lasso_regression.fit(A, normalized_data) rebuilt = lasso_regression.predict(X_test) print("LASSO MSE: ", MSE(normalized_data_original, rebuilt)) rebuilt = scaler.inverse_transform(rebuilt.reshape(-1, 1)) print("Running time: {} seconds".format(time() - t0)) rebuilt = np.reshape(rebuilt, y_grid_original.shape) ax3.imshow(rebuilt, cmap='gray') fig_rebuild.savefig("{}TerrainRebuilOrder{}P4.png".format( plots_path, order)) return () #Get dimensions of data set and make a grid to base the model on y_dimension = len(terrain[:, 0]) x_dimension = len(terrain[0, :]) x = np.linspace(0, 1, x_dimension) y = np.linspace(0, 1, y_dimension) x_grid, y_grid = np.meshgrid(x, y) #Flatten grid data = np.ravel(terrain) x = np.ravel(x_grid) y = np.ravel(y_grid) #set random seed np.random.seed(seed) #Creates a scaler to normalize data scaler = MinMaxScaler() #Normalizing data scaler.fit(data.reshape(-1, 1)) normalized_data = scaler.transform(data.reshape(-1, 1)) normalized_data = normalized_data[:, 0] #Create instances of sklearn kFold klass to split data for kfoldcv splits = 5 kfold = KFold(n_splits=splits, shuffle=True) #Sets a range of polynomial orders to fit to the data polynomial_order = np.arange(o) + 1 #---------OLS------------------------------ #------------------------------------------ #Solve using OLS linear_regression = LinearRegression() dta = list() for order in polynomial_order: print("Using polynomial order {}".format(order)) #Creating designmatrix A = design_matrix(order, x, y) mse_test = np.zeros(splits) mse_train = np.zeros(splits) counter = 0 #Initiating kfold cv for train_index, test_index in kfold.split(normalized_data): print("Calculating fold {} of {}".format(counter + 1, splits)) X_train, X_test = A[train_index], A[test_index] y_train, y_test = normalized_data[train_index], normalized_data[ test_index] #Using current polynomial order and fold to solve using OLS linear_regression.fit(X_train, y_train) ytilde = linear_regression.predict(X_train) ypredict = linear_regression.predict(X_test) #Get MSE metric for training and testing data mse_test[counter] = MSE(y_test, ypredict) mse_train[counter] = MSE(y_train, ytilde) counter = counter + 1 print(counter) print("Running time: {} seconds".format(time() - t0)) dta.append(["{}".format(order), mse_test.mean(), mse_train.mean()]) ''' rebuilt = linear_regression.predict(A) rebuilt = scaler.inverse_transform(rebuilt.reshape(-1,1)) rebuilt = np.reshape(rebuilt,y_grid.shape) plt.figure() plt.title('Terrain Norway 1, rebuild') plt.imshow(rebuilt, cmap='gray') plt.xlabel('X') plt.ylabel('Y') plt.show() ''' df = pd.DataFrame( dta, columns=["Polynomial", "MSE test set", "MSE training set"]) plt.figure() fig1 = plt.figure(figsize=(8, 4)) ax1 = fig1.add_subplot(111) ax1.set_position([0.1, 0.1, 0.6, 0.8]) ax1.set_xlabel("Polynomial order") ax1.set_ylabel("Training MSE") fig2 = plt.figure(figsize=(8, 4)) ax2 = fig2.add_subplot(111) ax2.set_position([0.1, 0.1, 0.6, 0.8]) ax2.set_xlabel("Polynomial order") ax2.set_ylabel("Testing MSE") ax1.plot(df["Polynomial"], df["MSE training set"], label="Training OLS") ax2.plot(df["Polynomial"], df["MSE test set"], label="Test OLS") fig1.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) fig2.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) fig1.savefig("{}TerrainOLStrainSeed{}.png".format(plots_path, seed)) fig2.savefig("{}TerrainOLStestSeed{}.png".format(plots_path, seed)) #---------RIDGE---------------------------- #------------------------------------------ #Creates a dictionary to store dataframes for each Ridge parameter dataframe_dic = dict() ridge_regression = Ridge(solver="svd") #Set a range og shrinkage factors for the Ridge regression lambdas = np.logspace(-5, -1, 10) for lmd in lambdas: print("Calculating Ridge, lambda: {}".format(lmd)) #Creates a list to store the results of each iteration in dta = list() for order in polynomial_order: print("Using polynomial order {}".format(order)) #Creating designmatrix A = design_matrix(order, x, y) #Removing intercept A = A[:, 1:] lambda_mse_test = np.zeros(splits) lambda_mse_train = np.zeros(splits) counter = 0 #Initiating kfold cv for train_index, test_index in kfold.split(normalized_data): X_train, X_test = A[train_index], A[test_index] y_train, y_test = normalized_data[ train_index], normalized_data[test_index] #Using current lambda and polynomial order solve using Ridge ridge_regression.alpha = lmd ridge_regression.fit(X_train, y_train) #Estimate testing and training data ypredict = ridge_regression.predict(X_test) ytilde = ridge_regression.predict(X_train) #Get MSE metric for training and testing data lambda_mse_test[counter] = MSE(y_test, ypredict) lambda_mse_train[counter] = MSE(y_train, ytilde) print("Calculating fold {} of {}".format(counter + 1, splits)) counter = counter + 1 print("Running time: {} seconds".format(time() - t0)) dta.append([ "{}".format(order), lambda_mse_test.mean(), lambda_mse_train.mean() ]) ''' rebuilt = ridge_regression.predict(A) rebuilt = scaler.inverse_transform(rebuilt.reshape(-1,1)) rebuilt = np.reshape(rebuilt,y_grid.shape) plt.figure() plt.title('Terrain Norway 1, rebuild') plt.imshow(rebuilt, cmap='gray') plt.xlabel('X') plt.ylabel('Y') plt.show() ''' df = pd.DataFrame( dta, columns=["Polynomial", "MSE test set", "MSE training set"]) dataframe_dic[lmd] = df cmap = plt.get_cmap('jet_r') plt.figure() fig1 = plt.figure(figsize=(8, 4)) ax1 = fig1.add_subplot(111) ax1.set_position([0.1, 0.1, 0.6, 0.8]) ax1.set_xlabel("Polynomial order") ax1.set_ylabel("Training MSE") fig2 = plt.figure(figsize=(8, 4)) ax2 = fig2.add_subplot(111) ax2.set_position([0.1, 0.1, 0.6, 0.8]) ax2.set_xlabel("Polynomial order") ax2.set_ylabel("Testing MSE") n = 0 for df in dataframe_dic: ax1.plot(dataframe_dic[df]["Polynomial"], dataframe_dic[df]["MSE training set"], color=cmap(float(n) / len(lambdas)), label="Alpha=%10.2E" % (df)) ax2.plot(dataframe_dic[df]["Polynomial"], dataframe_dic[df]["MSE test set"], color=cmap(float(n) / len(lambdas)), label="Alpha=%10.2E" % (df)) n = n + 1 fig1.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) fig2.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) fig1.savefig("{}TerrainRidgetrainSeed{}.png".format(plots_path, seed)) fig2.savefig("{}TerrainRidgetestSeed{}.png".format(plots_path, seed)) #---------LASSO---------------------------- #------------------------------------------ #Create an instance of the Lasso class from sklearn lasso_regression = Lasso() #Set a range og shrinkage factors for the LASSO regression alphas = np.logspace(-5, -2, 10) dataframe_dic = dict() for alph in alphas: print("Calculating LASSO, alpha: {}".format(alph)) #Creates a list to store the results of each iteration in dta = list() for order in polynomial_order: print("Using polynomial order {}".format(order)) #Creating designmatrix A = design_matrix(order, x, y) #Removing intercept A = A[:, 1:] alpha_mse_test = np.zeros(splits) alpha_mse_train = np.zeros(splits) counter = 0 #Initiating kfold cv for train_index, test_index in kfold.split(normalized_data): X_train, X_test = A[train_index], A[test_index] y_train, y_test = normalized_data[ train_index], normalized_data[test_index] #Using current aplha and polynomial order solve using Lasso lasso_regression.alpha = alph lasso_regression.fit(X_train, y_train) #Estimate testing and training data ypredict = lasso_regression.predict(X_test) ytilde = lasso_regression.predict(X_train) #Get MSE metric for training and testing data alpha_mse_test[counter] = MSE(y_test, ypredict) alpha_mse_train[counter] = MSE(y_train, ytilde) print("Calculating fold {} of {}".format(counter + 1, splits)) counter = counter + 1 print("Running time: {} seconds".format(time() - t0)) dta.append([ "{}".format(order), alpha_mse_test.mean(), alpha_mse_train.mean() ]) df = pd.DataFrame( dta, columns=["Polynomial", "MSE test set", "MSE training set"]) dataframe_dic[alph] = df cmap = plt.get_cmap('jet_r') plt.figure() fig1 = plt.figure(figsize=(8, 4)) ax1 = fig1.add_subplot(111) ax1.set_position([0.1, 0.1, 0.6, 0.8]) ax1.set_xlabel("Polynomial order") ax1.set_ylabel("Training MSE") fig2 = plt.figure(figsize=(8, 4)) ax2 = fig2.add_subplot(111) ax2.set_position([0.1, 0.1, 0.6, 0.8]) ax2.set_xlabel("Polynomial order") ax2.set_ylabel("Testing MSE") n = 0 for df in dataframe_dic: ax1.plot(dataframe_dic[df]["Polynomial"], dataframe_dic[df]["MSE training set"], color=cmap(float(n) / len(alphas)), label="Alpha=%10.2E" % (df)) ax2.plot(dataframe_dic[df]["Polynomial"], dataframe_dic[df]["MSE test set"], color=cmap(float(n) / len(alphas)), label="Alpha=%10.2E" % (df)) n = n + 1 fig1.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) fig2.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) fig1.savefig("{}TerrainLASSOtrainSeed{}.png".format(plots_path, seed)) fig2.savefig("{}TerrainLASSOtestSeed{}.png".format(plots_path, seed))
def channel_selection(inputs, module, sparsity=0.5, method='greedy'): """ 현재 모듈의 입력 채널중, 중요도가 높은 채널을 선택합니다. 기존의 output을 가장 근접하게 만들어낼 수 있는 입력 채널을 찾아냅니댜. :param inputs: torch.Tensor, input features map :param module: torch.nn.module, layer :param sparsity: float, 0 ~ 1 how many prune channel of output of this layer :param method: str, how to select the channel :return: list of int, indices of channel to be selected and pruned """ num_channel = inputs.size(1) # 채널 수 num_pruned = int(math.ceil(num_channel * sparsity)) # 입력된 sparsity 에 맞춰 삭제되어야 하는 채널 수 num_stayed = num_channel - num_pruned print('num_pruned', num_pruned) if method == 'greedy': indices_pruned = [] while len(indices_pruned) < num_pruned: min_diff = 1e10 min_idx = 0 for idx in range(num_channel): if idx in indices_pruned: continue indices_try = indices_pruned + [idx] inputs_try = torch.zeros_like(inputs) inputs_try[:, indices_try, ...] = inputs[:, indices_try, ...] output_try = module(inputs_try) output_try_norm = output_try.norm(2) if output_try_norm < min_diff: min_diff = output_try_norm min_idx = idx indices_pruned.append(min_idx) print('indices_pruned !!! ', indices_pruned) indices_stayed = list( set([i for i in range(num_channel)]) - set(indices_pruned)) elif method == 'greedy_GM': indices_stayed = [] while len(indices_stayed) < num_stayed: max_farthest_channel_norm = 1e-10 farthest_channel_idx = 0 for idx in range(num_channel): if idx in indices_stayed: continue indices_try = indices_stayed + [idx] inputs_try = torch.zeros_like(inputs) inputs_try[:, indices_try, ...] = inputs[:, indices_try, ...] output_try = module(inputs_try).view( num_channel, -1).cpu().detach().numpy() similar_matrix = distance.cdist(output_try, output_try, 'euclidean') similar_sum = np.sum(np.abs(similar_matrix), axis=0) similar_large_index = similar_sum.argsort()[-1] farthest_channel_norm = np.linalg.norm( similar_sum[similar_large_index]) if max_farthest_channel_norm < farthest_channel_norm: max_farthest_channel_norm = farthest_channel_norm farthest_channel_idx = idx print(farthest_channel_idx) indices_stayed.append(farthest_channel_idx) print('indices_stayed !!! ', indices_stayed) indices_pruned = list( set([i for i in range(num_channel)]) - set(indices_stayed)) elif method == 'lasso': y = module(inputs) if module.bias is not None: # bias.shape = [N] bias_size = [1] * y.dim() # bias_size: [1, 1, 1, 1] bias_size[1] = -1 # [1, -1, 1, 1] bias = module.bias.view( bias_size) # bias.view([1, -1, 1, 1] = [1, N, 1, 1]) y -= bias # output feature 에서 bias 만큼을 빼줌 (y - b) else: bias = 0. y = y.view(-1).data.cpu().numpy() # flatten all of outputs y_channel_spread = [] for i in range(num_channel): x_channel_i = torch.zeros_like(inputs) x_channel_i[:, i, ...] = inputs[:, i, ...] y_channel_i = module(x_channel_i) - bias y_channel_spread.append(y_channel_i.data.view(-1, 1)) y_channel_spread = torch.cat(y_channel_spread, dim=1).cpu() alpha = 1e-7 solver = Lasso(alpha=alpha, warm_start=True, selection='random', random_state=0) # choice_idx = np.random.choice(y_channel_spread.size()[0], 2000, replace=False) # selected_y_channel_spread = y_channel_spread[choice_idx, :] # new_output = y[choice_idx] # # del y_channel_spread, y # 원하는 수의 채널이 삭제될 때까지 alpha 값을 조금씩 늘려나감 alpha_l, alpha_r = 0, alpha num_pruned_try = 0 while num_pruned_try < num_pruned: alpha_r *= 2 solver.alpha = alpha_r # solver.fit(selected_y_channel_spread, new_output) solver.fit(y_channel_spread, y) num_pruned_try = sum(solver.coef_ == 0) # 충분하게 pruning 되는 alpha 를 찾으면, 이후 alpha 값의 좌우를 좁혀 나가면서 좀 더 정확한 alpha 값을 찾음 num_pruned_max = int(num_pruned) while True: alpha = (alpha_l + alpha_r) / 2 solver.alpha = alpha # solver.fit(selected_y_channel_spread, new_output) solver.fit(y_channel_spread, y) num_pruned_try = sum(solver.coef_ == 0) if num_pruned_try > num_pruned_max: alpha_r = alpha elif num_pruned_try < num_pruned: alpha_l = alpha else: break # 마지막으로, lasso coeff를 index로 변환 indices_stayed = np.where(solver.coef_ != 0)[0].tolist() indices_pruned = np.where(solver.coef_ == 0)[0].tolist() else: raise NotImplementedError inputs = inputs.cuda() module = module.cuda() return indices_stayed, indices_pruned # 선택된 채널의 인덱스를 리턴
def fit(self, y, fill_to_max=True, verbose=False): """fill_to_max: whether or not to return `max_non_zero_entry` items. i.e.: whether to include tailing zeros if # of non zero entries is less than `max_non_zero_entry`. """ y = np.r_[y, 1] alpha_coefficient = 2.0 * self.bases.shape[0] min_alpha, max_alpha = 1e-4, 1000 self.alpha_t = 15.0 shrinkage_factor, expand_factor = 0.9, 1.5 clf = Lasso(alpha=self.alpha_t / alpha_coefficient) # Check if max_alpha is large enough (for debug purpose) # clf.alpha = max_alpha / alpha_coefficient # clf.fit(self.bases, y) # x = clf.coef_ # num_non_zero_entry = np.count_nonzero(x) # if num_non_zero_entry > self.max_non_zero_entry: # print("max_alpha = {0}, clf.alpha = {1} too small!".format(max_alpha, clf.alpha)) while True: clf.alpha = self.alpha_t / alpha_coefficient clf.fit(self.bases, y) x = clf.coef_ num_non_zero_entry = np.count_nonzero(x) if num_non_zero_entry > self.max_non_zero_entry: # too many non zero entries => current alpha too small min_alpha = self.alpha_t new_alpha_t = self.alpha_t * expand_factor if new_alpha_t > max_alpha: self.alpha_t = (new_alpha_t + max_alpha) / 2.0 else: self.alpha_t = new_alpha_t else: # too few non zero entries => current alpha too large max_alpha = self.alpha_t self.alpha_t *= shrinkage_factor if self.alpha_t < min_alpha: break if verbose: lf, l1, l2 = self.loss(x, y) print(lf, l1, l2) print("Alpha range: {0} {1}".format(min_alpha, max_alpha)) print("# of non-zero entries: {0}".format(np.count_nonzero(x))) # indices and values are all 1-D np.ndarrays indices = np.nonzero(x)[0] values = x[indices] if fill_to_max: num_base = self.bases.shape[1] random_basis_to_append = dict() i = 0 while i + num_non_zero_entry < self.max_non_zero_entry: r = random.randint(0, num_base - 1) # print(r, indices) if (r not in indices) and (r not in random_basis_to_append): random_basis_to_append[r] = True i += 1 indices = np.r_[ indices, np.array([key for key in random_basis_to_append.keys()])] values = np.r_[values, np.zeros(self.max_non_zero_entry - num_non_zero_entry)] return indices, values
def oppgave_5(o=5, level_of_noise=0, seed=1): #Setting the number of datapoints, the amount of noise #in the Franke function and the order of the polinomial #used as a model. number_of_datapoints = 40 np.random.seed(seed) #Making the input and output vektors of the dataset x, y = make_data(number_of_datapoints) z, noise = franke_function(x, y, level_of_noise, seed) #Flattening matrices for easier handling xDim1 = np.ravel(x) yDim1 = np.ravel(y) x = xDim1 y = yDim1 #Frankes function withou noise true = np.ravel(z) #Frankes function with noise noicy = true + np.ravel(noise) #Create instances of sklearn kFold klass to split data for kfoldcv lasso_regression = Lasso() #Create instances of sklearn kFold klass to split data for kfoldcv splits = 5 kfold = KFold(n_splits=splits, shuffle=True) #Sets a range of polynomial orders to fit to the data polynomial_order = np.arange(o) + 1 #Set a range og shrinkage factors for the LASSO regression alphas = np.logspace(-5, -2, 10) #Creates a dictionary to store dataframes for each LASSO parameter dataframe_dic = dict() for alph in alphas: print("Calculating LASSO, alpha: {}".format(alph)) #Creates a list to store the results of each iteration in dta = list() for order in polynomial_order: print("Using polynomial order {}".format(order)) #Creating designmatrix A = design_matrix(order, x, y) #Remove intecept A = A[:, 1:] alpha_mse_test = np.zeros(splits) alpha_mse_train = np.zeros(splits) counter = 0 #Initiating kfold cv for train_index, test_index in kfold.split(noicy): print("Calculating fold {} of {}".format(counter + 1, splits)) X_train, X_test = A[train_index], A[test_index] y_train = noicy[train_index] y_train_true, y_test_true = true[train_index], true[test_index] #Using current aplha and polynomial order solve using Lasso lasso_regression.alpha = alph lasso_regression.fit(X_train, y_train) #Estimate testing and training data ypredict = lasso_regression.predict(X_test) ytilde = lasso_regression.predict(X_train) #Get MSE metric for training and testing data alpha_mse_test[counter] = MSE(y_test_true, ypredict) alpha_mse_train[counter] = MSE(y_train_true, ytilde) counter = counter + 1 print("Running time: {} seconds".format(time() - t0)) dta.append([ "{}".format(order), alpha_mse_test.mean(), alpha_mse_train.mean() ]) df = pd.DataFrame( dta, columns=["Polynomial", "MSE test set", "MSE training set"]) dataframe_dic[alph] = df cmap = plt.get_cmap('jet_r') plt.figure() fig1 = plt.figure(figsize=(8, 4)) ax1 = fig1.add_subplot(111) ax1.set_position([0.1, 0.1, 0.6, 0.8]) ax1.set_xlabel("Polynomial order") ax1.set_ylabel("Training MSE") fig2 = plt.figure(figsize=(8, 4)) ax2 = fig2.add_subplot(111) ax2.set_position([0.1, 0.1, 0.6, 0.8]) ax2.set_xlabel("Polynomial order") ax2.set_ylabel("Testing MSE") n = 0 for df in dataframe_dic: ax1.plot(dataframe_dic[df]["Polynomial"], dataframe_dic[df]["MSE training set"], color=cmap(float(n) / len(alphas)), label="Alpha=%10.2E" % (df)) ax2.plot(dataframe_dic[df]["Polynomial"], dataframe_dic[df]["MSE test set"], color=cmap(float(n) / len(alphas)), label="Alpha=%10.2E" % (df)) print("alpha:", df) print(dataframe_dic[df]) n = n + 1 fig1.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) fig2.legend(bbox_to_anchor=(0.71, 0.5), loc="center left", borderaxespad=0) if level_of_noise < 0.21: lvl = "low" elif level_of_noise < 0.41: lvl = "med" else: lvl = "high" fig1.savefig("{}Oppg4LATrainSeed{}{}.png".format(plots_path, seed, lvl)) fig2.savefig("{}Oppg4LATestSeed{}{}.png".format(plots_path, seed, lvl))
def channel_select(sparsity, output_feature, fn_next_input_feature, next_module, method='greedy', p=2): """ output_feature中选一些不重要的channel,使得fn_next_input_feature(output_feature_try)的lp norm最小 next(_conv)_output_feature到next2_input_feature之间算是一种恒定的变换, 因此这里不比较i+2层卷积层的输入,转而比较i+1层卷积层的输出 """ original_num = output_feature.size(1) pruned_num = int(math.floor(original_num * sparsity)) # 向下取整 if method == 'greedy': # ThiNet: A Filter Level Pruning Method for Deep Neural Network Compression indices_pruned = [] while len(indices_pruned) < pruned_num: min_diff = 1e10 min_idx = 0 for idx in range(original_num): if idx in indices_pruned: continue indices_try = indices_pruned + [idx] output_feature_try = torch.zeros_like(output_feature) output_feature_try[:, indices_try, ...] = output_feature[:, indices_try, ...] next_output_feature_try = next_module( fn_next_input_feature(output_feature_try)) next_output_feature_try_norm = next_output_feature_try.norm(p) if next_output_feature_try_norm < min_diff: min_diff = next_output_feature_try_norm min_idx = idx indices_pruned.append(min_idx) elif method == 'lasso': # Channel Pruning for Accelerating Very Deep Neural Networks # FIXME 无法收敛。。。待解决 next_output_feature = next_module( fn_next_input_feature(output_feature)) num_el = next_output_feature.numel() next_output_feature = next_output_feature.data.view(num_el).cpu() next_output_feature_divided = [] for idx in range(original_num): # 每个channel单独拿出来,其他通道为0 output_feature_try = torch.zeros_like(output_feature) output_feature_try[:, idx, ...] = output_feature[:, idx, ...] next_output_feature_try = next_module( fn_next_input_feature(output_feature_try)) next_output_feature_divided.append( next_output_feature_try.data.view(num_el, 1)) next_output_feature_divided = torch.cat(next_output_feature_divided, dim=1).cpu() # import matplotlib.pyplot as plt # 可视化绘制 # X = next_output_feature_divided[:, 1:2] # y = next_output_feature # model = Lasso(alpha=0.000001, warm_start=True, selection='random', tol=4000) # model.fit(X, y) # predicted = model.predict(X) # # 绘制散点图 参数:x横轴 y纵轴 # plt.scatter(X, y, marker='x') # plt.plot(X, predicted, c='r') # # 绘制x轴和y轴坐标 # plt.xlabel("next_output_feature_divided[:, 0:1]") # plt.ylabel("next_output_feature") # # 显示图形 # plt.savefig('Lasso1.png') # first, try to find a alpha that provides enough pruned channels alpha_try = 5e-5 pruned_num_try = 0 solver = Lasso(alpha=alpha_try, warm_start=True, selection='random') while pruned_num_try < pruned_num: alpha_try *= 2 solver.alpha = alpha_try solver.fit(next_output_feature_divided, next_output_feature) pruned_num_try = sum(solver.coef_ == 0) print("lasso_alpha = {}, pruned_num_try = {}".format( alpha_try, pruned_num_try)) # then, narrow down alpha to get more close to the desired number of pruned channels alpha_min = 0 alpha_max = alpha_try pruned_num_tolerate_coeff = 1.1 # 死区 pruned_num_max = int(pruned_num * pruned_num_tolerate_coeff) while True: alpha = (alpha_min + alpha_max) / 2 solver.alpha = alpha solver.fit(next_output_feature_divided, next_output_feature) pruned_num_try = sum(solver.coef_ == 0) if pruned_num_try > pruned_num_max: alpha_max = alpha elif pruned_num_try < pruned_num: alpha_min = alpha else: print("lasso_alpha = {}".format(alpha)) break # finally, convert lasso coeff to indices indices_pruned = solver.coef_.nonzero()[0].tolist() elif method == 'random': indices_pruned = random.sample(range(original_num), pruned_num) else: raise NotImplementedError return indices_pruned
reg3 = Lasso(alpha=1) reg1.fit(trainX, trainy) reg1.coef_ reg2.fit(trainX, trainy) reg2.coef_ reg3.fit(trainX, trainy) reg3.coef_ alphas = np.logspace(-3, 3, 30) #30개 생성 linear_r2 = reg1.score(validX, validy) result = pd.DataFrame(index=alphas, columns=['Ridge', 'Lasso']) for alpha in alphas: reg2.alpha = alpha reg3.alpha = alpha reg2.fit(trainX, trainy) result.loc[alpha, 'Ridge'] = reg2.score(validX, validy) reg3.fit(trainX, trainy) result.loc[alpha, 'Lasso'] = reg3.score(validX, validy) plt.plot(np.log(alphas), result['Ridge'], label="Ridge") plt.plot(np.log(alphas), result['Lasso'], label="Lasso") plt.hlines(linear_r2, np.log(alphas[0]), np.log(alphas[-1]), ls=':', color="k", label='Ordinary') plt.legend()
"Cross Validation Score after cross validation: ", cross_val_score(model_rf_cv, X_val, y_val, cv=10, scoring='accuracy').mean()) acc_forest_cv = round(metrics.accuracy_score(y_test, pred_rf_cv), 4) print('Random Forest Accuracy after cross validation= ', acc_forest_cv) """# **Lasso Model**""" alpha_space = np.logspace(-4, 0, 50) model_scores = [] lasso_model = Lasso(normalize=True) for alpha in alpha_space: # Specify the alpha value to use lasso_model.alpha = alpha # Perform 10-fold CV lasso_cv_scores = cross_val_score(lasso_model, X, y, cv=10) # Append the mean of lasso_cv_scores to model_scores = [] model_scores.append(np.mean(lasso_cv_scores)) print(model_scores) # best alpha index for lasso print(np.argmax(model_scores)) #plus in this index to the list of alphas #best alpha to use print(alpha_space[0])