def LarsRegressorGS(X_train, X_test, y_train, y_test): reg = Lars() grid_values = { 'n_nonzero_coefs': list(range(100, 500, 100)), } grid_reg = GridSearchCV( reg, param_grid=grid_values, scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2'], refit='r2', n_jobs=-1, cv=2, verbose=100) grid_reg.fit(X_train, y_train) reg = grid_reg.best_estimator_ reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) best_params: dict = grid_reg.best_params_ saveBestParams(nameOfModel="LarsRegressorGS", best_params=best_params) logSave(nameOfModel="LarsRegressorGS", reg=reg, metrics=metrics, val_metrics=val_metrics)
def runLarsRegressor(self): lm = Lars(fit_intercept=True, normalize=True) print("Lars Regressor\n") lm.fit(self.m_X_train, self.m_y_train) predictY = lm.predict(self.m_X_test) score = lm.score(self.m_X_test, self.m_y_test) predictTraingY = lm.predict(self.m_X_train) self.displayPredictPlot(predictY) self.displayResidualPlot(predictY, predictTraingY) self.dispalyModelResult(lm, predictY, score)
def LarsRegressor(X_train, X_test, y_train, y_test): reg = Lars() reg.fit(X_train, y_train) y_pred = reg.predict(X_test) printMetrics(y_true=y_test, y_pred=y_pred) val_metrics = getMetrics(y_true=y_test, y_pred=y_pred) y_pred = reg.predict(X=X_train) metrics = getMetrics(y_true=y_train, y_pred=y_pred) printMetrics(y_true=y_train, y_pred=y_pred) logSave(nameOfModel="LarsRegressor", reg=reg, metrics=metrics, val_metrics=val_metrics)
class _LarsImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
# LARS Regression import numpy as np from sklearn import datasets from sklearn.linear_model import Lars # load the diabetes datasets dataset = datasets.load_diabetes() # fit a LARS model to the data model = Lars() model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted-expected)**2) print(mse) print(model.score(dataset.data, dataset.target))
# - データを半分に取り分けてLARSモデルで学習 # 変数定義 # --- 訓練データ数 train_n = 100 # インスタンス生成と学習 # --- 非ゼロ係数の数を12個とする lars_12 = Lars(n_nonzero_coefs=12) lars_12.fit(reg_data[:train_n], reg_target[:train_n]) # インスタンス生成と学習 # --- 非ゼロ係数の数を500個とする(デフォルト) lars_500 = Lars(n_nonzero_coefs=500) lars_500.fit(reg_data[:train_n], reg_target[:train_n]) # 平均二乗誤差 np.mean( np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2)) # 3 特徴量選択としてのLARS --------------------------------------------------------------------- # インスタンス生成 lcv = LarsCV() # 学習 lcv.fit(reg_data, reg_target) # 非ゼロの係数 np.sum(lcv.coef_ != 0)
# furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # # Author: Quan Pan <*****@*****.**> # License: MIT License # Create: 2016-12-02 from sklearn.linear_model import Lars # X = [[0., 0.], [1., 1.], [10., 10.]] X = [[0.0], [1.0], [10.0]] y = [0.0, 1.0, 10.0] # x_preb = [[5., 5.], [-10., -10.]] x_preb = [[5.], [-10.]] clf = Lars(n_nonzero_coefs=1) clf.fit(X, y) print(clf.coef_) y_pred = clf.predict(x_preb) print y_pred
## ridge回归 ridge = Ridge(alpha=0.8) ridge.fit(train_X, train_y) predictions = ridge.predict(test_X) print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y))) ## lasso回归 lasso = Lasso(alpha=0.9) lasso.fit(train_X, train_y) predictions = lasso.predict(test_X) print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y))) ## 最小角回归 lars = Lars(n_nozero_coefs=100) lars.fit(train_X, train_y) predictions = lars.predict(test_X) print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y))) ## 线性回归 lr = LinearRegression() lr.fit(train_X, train_y) predictions = lr.predict(train_X, train_y) print('MAE is ', mean_absolute_error(np.expm1(predictions), np.expm1(test_y))) ## 决策树回归 dtr = DecisionTreeRegressor(criterion='mae', max_depth=5, min_samples_split=4, max_features='sqrt', min_samples_leaf=2) dtr.fit(train_X, train_y)
sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i]) rank_result['Elastic_pca'] = sumsum / float(result_row) rs_score['Elastic_pca'] = r2_score(y_test, y) ElasticModel = ElasticNetCV() ElasticModel.fit(X_train_std, y_train) y = ElasticModel.predict(X_test_std) [result_row] = y.shape sumsum = 0 #print y for i in range(result_row): sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i]) rank_result['Elastic_std'] = sumsum / float(result_row) rs_score['Elastic_std'] = r2_score(y_test, y) LarsModel = Lars() LarsModel.fit(X_train_pca, y_train) y = LarsModel.predict(X_test_pca) [result_row] = y.shape sumsum = 0 #print y for i in range(result_row): sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i]) rank_result['Lars_pca'] = sumsum / float(result_row) rs_score['Lars_pca'] = r2_score(y_test, y) LarsModel = Lars() LarsModel.fit(X_train_std, y_train) y = LarsModel.predict(X_test_std) [result_row] = y.shape sumsum = 0 #print y for i in range(result_row): sumsum = sumsum + (y[i] - y_test[i]) * (y[i] - y_test[i])
reg_data, reg_target = make_regression(n_samples=200, n_features=500, n_informative=10, noise=2) from sklearn.linear_model import Lars lars = Lars(n_nonzero_coefs=10) lars.fit(reg_data, reg_target) print np.sum(lars.coef_ != 0) #10 train_n = 100 lars_12 = Lars(n_nonzero_coefs=12) lars_12.fit(reg_data[:train_n], reg_target[:train_n]) lars_500 = Lars() # it's 500 by default lars_500.fit(reg_data[:train_n], reg_target[:train_n]); #Now, to see how well each feature fit the unknown data, do the following: np.mean(np.power(reg_target[train_n:] - lars_12.predict(reg_data[train_n:]), 2)) #31.527714163321001 np.mean(np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2)) #9.6198147535136237e+30 from sklearn.linear_model import LarsCV lcv = LarsCV() lcv.fit(reg_data, reg_target) print np.sum(lcv.coef_ != 0) #44 #Using linear methods for classification –logistic regression 逻辑回归 from sklearn.datasets import make_classification
print 'MAE:', mean_absolute_error(testing_labels,preds), '\n' # PCA + Elastic Net elasticnet = ElasticNet(l1_ratio=0.5) elasticnet.fit(reduced_training_features, training_labels) preds = elasticnet.predict(reduced_testing_features) score = elasticnet.score(reduced_testing_features,testing_labels) print 'PCA + ElasticNet Results:' print 'R2 score:', score print 'MAE:', mean_absolute_error(testing_labels,preds) # Least-Angle Regression (LARS) from sklearn.linear_model import Lars lars = Lars() lars.fit(training_features, training_labels) preds = lars.predict(testing_features) score = lars.score(testing_features,testing_labels) print 'LARS Results:' print 'R2 score:', score print 'MAE:', mean_absolute_error(testing_labels,preds), '\n' # PCA + LARS lars = Lars() lars.fit(reduced_training_features, training_labels) preds = lars.predict(reduced_testing_features) score = lars.score(reduced_testing_features,testing_labels) print 'PCA + LARS Results:' print 'R2 score:', score print 'MAE:', mean_absolute_error(testing_labels,preds) # Orthogonal Matching Pursuit
# ('ppru', 'ppr_submission_user.csv', 'ppr_fitted_user.csv'), # ('pprg', 'ppr_submission_global.csv', 'ppr_fitted_global.csv'), ] fitted = pd.DataFrame(index=review_data.index) submission = pd.DataFrame(index=review_data_final.index) for name, sub_name, fit_name in blend_inputs: f_df = pd.read_csv(os.path.join('..', fit_name)) f_df.index = review_data.index fitted[name] = f_df['stars'] s_df = pd.read_csv(os.path.join('..', sub_name)) s_df.index = review_data_final.index submission[name] = s_df['stars'] gbr = GradientBoostingRegressor(max_depth=3,verbose=2) gbr.fit(fitted, review_data['stars']) pred = gbr.predict(submission) pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../gbr_submission.csv', index=False) lar = Lars(fit_intercept=True, verbose=2, normalize=True, fit_path=True) lar.fit(fitted, review_data['stars']) pred = lar.predict(submission) pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../lar_submission.csv', index=False) ridge = Ridge() ridge.fit(fitted, review_data['stars']) pred = ridge.predict(submission) pd.DataFrame({'review_id' : submission.index, 'stars' : np.maximum(1, np.minimum(5, pred))}).to_csv('../ridge_submission.csv', index=False) ## TODO: blend based on size of rating neighborhood
可以快速改造成lasso 缺点: 因为模型是对残差进行迭代设计,所以对噪声敏感 ''' rg = Lars(fit_intercept=True, verbose=False, normalize=True, precompute='auto', n_nonzero_coefs=500, eps=2.2204460492503131e-16, copy_X=True, fit_path=True, positive=False) rg.fit(X_train, Y_train) Y_pre = rg.predict(X_test) rg.score(X_test, Y_test) rg.coef_ rg.intercept_ ''' fit_intercept 是否训练截距 verbose 冗长度 normalize 归一化否 precompute 是否使用Gram矩阵来加速 n_nonzero_coefs 非零系数的目标数 eps 精确度,计算某个值时用到 copy_X 是否覆盖模型中的X fit_path 不太理解,暂时应该也用不到 positive 设置强制系数为正的嘛? '''
x_test = ml.loc[test_index] y_test = ml_outs.loc[test_index] # Scale scaler = StandardScaler().fit(x_train) x_train = scaler.transform(x_train) x_test = scaler.transform(x_test) # Implemnent Model linreg = Lars() # Better linreg = LarsCV() # one Better linreg = LassoLarsCV() # Same linreg = LinearRegression() linreg.fit(x_train, y_train) predictions = linreg.predict(x_test) # Plot predictions and y_test plt.figure() plt.plot(predictions, label='Predictions') plt.plot(pd.Series(predictions).rolling(5).mean(), label='rolling predictions') plt.plot(y_test.values, label='Shifted Currencies ( y_test values', color='grey') plt.plot(cu.loc[test_index, currency].values, label='UNSHIFTED') plt.legend() plt.show() # Print Score and summary score = linreg.score(x_test, y_test)
def train_error_data(n, J, x, y, train_size, nb_features, my_alphas): ''' Parameters ---------- n : number of repetitions. J : number of sparsity. x : data. y : desired output. train_size : number of training points. nb_features : number of features. my_alphas : array of different values for alpha. Returns : representation of MSE depending on sparsity for Lasso, OMP and Lars methods, for training points. ------- ''' #initialisation vec = np.zeros(train_size * J).reshape(train_size, J) res = np.zeros(n * J).reshape(n, J) somme = np.zeros(J) vec2 = np.zeros(train_size * J).reshape(train_size, J) res2 = np.zeros(n * J).reshape(n, J) somme2 = np.zeros(J) vec3 = np.zeros(train_size * J).reshape(train_size, J) res3 = np.zeros(n * J).reshape(n, J) somme3 = np.zeros(J) axes = np.arange(1, 11) # Average training squared error : n iterations and sparsity (1 to J) for i in range(n): X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=train_size) for j in range(J): alpha_coef = alpha(X_train, train_size=train_size, nb_features=nb_features, my_alphas=my_alphas) reg2 = Lasso(alpha=alpha_coef[j]).fit(X_train, y_train) reg = OrthogonalMatchingPursuit(n_nonzero_coefs=j + 1).fit(X_train, y_train) reg3 = Lars(n_nonzero_coefs=j + 1).fit(X_train, y_train) vec[:, j] = (y_train - reg.predict(X_train))**2 res[i, j] = sum(vec[:, j]) / train_size vec2[:, j] = (y_train - (reg2.predict(X_train)))**2 res2[i, j] = sum(vec2[:, j]) / train_size vec3[:, j] = (y_train - reg3.predict(X_train))**2 res3[i, j] = sum(vec3[:, j]) / train_size for j in range(J): for i in range(n): somme[j] = somme[j] + res[i, j] somme2[j] = somme2[j] + res2[i, j] somme3[j] = somme3[j] + res3[i, j] # plot the results plt.plot(axes, somme / n, label='OMP') plt.plot(axes, somme2 / n, label='Lasso') plt.plot(axes, somme3 / n, label='Lars') plt.xlabel('sparsity') plt.ylabel('train error') plt.title('Performance comparison on simulation data') plt.legend()
class LarsClass: """ Name : Lars Attribute : None Method : predict, predict_by_cv, save_model """ def __init__(self): # 알고리즘 이름 self._name = 'lars' # 기본 경로 self._f_path = os.path.abspath( os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = Lars(normalize=False) # 모델 학습 self._model.fit(self._x_train, self._y_train) # 데이터 전처리 def preprocessing(self, data): # 학습 x = [] # 레이블 y = [] # 기준점(7일) base_interval = 7 # 기온 temps = list(data["temperature"]) for i in range(len(temps)): if i < base_interval: continue y.append(temps[i]) xa = [] for p in range(base_interval): d = i + p - base_interval xa.append(temps[d]) x.append(xa) return x, y # 일반 예측 def predict(self, save_img=False, show_chart=False): # 예측 y_pred = self._model.predict(self._x_test) # 스코어 정보 score = r2_score(self._y_test, y_pred) # 리포트 확인 if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'): print(f'Coef = {self._model.coef_}') print(f'intercept = {self._model.intercept_}') print(f'Score = {score}') # 이미지 저장 여부 if save_img: self.save_chart_image(y_pred, show_chart) # 예측 값 & 스코어 return [list(y_pred), score] # CV 예측(Cross Validation) def predict_by_cv(self): # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현 return False # GridSearchCV 예측 def predict_by_gs(self): pass # 모델 저장 및 갱신 def save_model(self, renew=False): # 모델 저장 if not renew: # 처음 저장 joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') else: # 기존 모델 대체 if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'): os.rename( self._f_path + f'/model/{self._name}_rg.pkl', self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl') joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') # 회귀 차트 저장 def save_chart_image(self, data, show_chart): # 사이즈 plt.figure(figsize=(15, 10), dpi=100) # 레이블 plt.plot(self._y_test, c='r') # 예측 값 plt.plot(data, c='b') # 이미지로 저장 plt.savefig('./chart_images/tenki-kion-lr.png') # 차트 확인(Optional) if show_chart: plt.show() def __del__(self): del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
#LarsCV: fit_intercept, verbose, normalize, cv from sklearn.linear_model import LarsCV, Lars from sklearn.datasets import make_regression import matplotlib.pyplot as plt X, y = make_regression(n_samples=200, noise=4.0, random_state=0) reg = LarsCV(cv=5).fit(X, y) reg.score(X, y) reg.alpha_ pred = reg.predict(X[:, ]) plt.scatter(X[:, 0], y, color='black') plt.scatter(X[:, 0], pred, color='red') plt.show() reg2 = Lars().fit(X, y) reg2.score(X, y) reg2.alpha_ pred = reg2.predict(X[:, ]) #%% LassoLars: alpha, fit_intercept, normalize #LassoLarsCV: alpha, fit_intercept, normalize, cv from sklearn import linear_model reg = linear_model.LassoLars(alpha=0.01) reg.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1]) print(reg.coef_) reg2 = linear_model.LassoLarsCV() reg2.fit([[-1, 1], [0, 0], [1, 1]], [-1, 0, -1])
# LARS Regression # The Least Angle Regression (LARS) method is a computationally efficient algorithm for fitting a regression model. # It is useful for highdimensional data and is commonly used in conjunction with regularization (such as LASSO). import numpy as np from sklearn import datasets from sklearn.linear_model import Lars # load the diabetes datasets dataset = datasets.load_diabetes() # fit a LARS model to the data model = Lars() model.fit(dataset.data, dataset.target) print(model) # make predictions expected = dataset.target predicted = model.predict(dataset.data) # summarize the fit of the model mse = np.mean((predicted-expected)**2) print(mse) print(model.score(dataset.data, dataset.target))
def Lar_regr(features, labels): from sklearn.linear_model import Lars model = Lars() model.fit(features, labels) pred = model.predict(features) AsGraph(labels, pred)
#print(std(x_scaled)) from sklearn.linear_model import Lars from sklearn.model_selection import train_test_split lars = Lars(fit_intercept=False, normalize=False, n_nonzero_coefs=100, verbose=True) x_train, x_test, y_train, y_test = train_test_split(x_scaled, y_scaled, test_size=0.2, random_state=42) lars.fit(x_train, y_train) #print(x_test[0]) lars.predict(x_test)[0] lars.score(x_test, y_test) lars.get_params() beta = generate_random_points(n=10, p=10) scaler.fit(beta) scaler.fit_transform(beta) beta = scaler.fit_transform(beta) epsilons = generate_random_points(n=100, p=10) #print(epsilons) y = [[] for _ in range(10)] for k in range(10): y[k] = np.matmul(beta, np.asarray(x[k])) + epsilons[k]