def Lars_regression(self, X_train, y_train, X_test, y_test): my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42) best_model = LarsCV(cv=my_cv, n_jobs=-1) best_model.fit(X_train, y_train) y_pred = best_model.predict(X_test) mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) return best_model, mse, mae, r2
class _LarsCVImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
def _larscv(*, train, test, x_predict=None, metrics, fit_intercept=True, verbose=False, max_iter=500, normalize=True, precompute='auto', cv=None, max_n_alphas=1000, n_jobs=None, eps=2.220446049250313e-16, copy_X=True): """For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LarsCV.html#sklearn.linear_model.LarsCV """ model = LarsCV(fit_intercept=fit_intercept, verbose=verbose, max_iter=max_iter, normalize=normalize, precompute=precompute, cv=cv, max_n_alphas=max_n_alphas, n_jobs=n_jobs, eps=eps, copy_X=copy_X) model.fit(train[0], train[1]) model_name = 'LarsCV' y_hat = model.predict(test[0]) if metrics == 'mse': accuracy = _mse(test[1], y_hat) if metrics == 'rmse': accuracy = _rmse(test[1], y_hat) if metrics == 'mae': accuracy = _mae(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def ResultsLARS(DataSet, Y): X_train, X_test, y_train, y_test = train_test_split(DataSet, Y, train_size=0.75) LAR_cv = LarsCV(normalize=True) LAR_model = LAR_cv.fit(X_train, y_train) LAR_prediction = LAR_model.predict(X_test) LAR_mae = np.mean(np.abs(y_test - LAR_prediction)) LAR_coefs = dict( zip(['Intercept'] + DataSet.columns.tolist(), np.round( np.concatenate((LAR_model.intercept_, LAR_model.coef_), axis=None), 3))) print('Least Angle Regression MAE: {}'.format(np.round(LAR_mae, 3))) print('Least Angle Regression coefficients:{}'.format(LAR_coefs)) del LAR_coefs['Intercept'] DictionaryPlot(LAR_coefs, 'Least Angle Regression')
class LarsCvClass: """ Name : LarsCV Attribute : None Method : predict, predict_by_cv, save_model """ def __init__(self): # 알고리즘 이름 self._name = 'larscv' # 기본 경로 self._f_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = LarsCV(normalize=False) # 모델 학습 self._model.fit(self._x_train, self._y_train) # 데이터 전처리 def preprocessing(self, data): # 학습 x = [] # 레이블 y = [] # 기준점(7일) base_interval = 7 # 기온 temps = list(data["temperature"]) for i in range(len(temps)): if i < base_interval: continue y.append(temps[i]) xa = [] for p in range(base_interval): d = i + p - base_interval xa.append(temps[d]) x.append(xa) return x, y # 일반 예측 def predict(self, save_img=False, show_chart=False): # 예측 y_pred = self._model.predict(self._x_test) # 스코어 정보 score = r2_score(self._y_test, y_pred) # 리포트 확인 if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'): print(f'Coef = {self._model.coef_}') print(f'intercept = {self._model.intercept_}') print(f'Score = {score}') # 이미지 저장 여부 if save_img: self.save_chart_image(y_pred, show_chart) # 예측 값 & 스코어 return [list(y_pred), score] # CV 예측(Cross Validation) def predict_by_cv(self): # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현 return False # GridSearchCV 예측 def predict_by_gs(self): pass # 모델 저장 및 갱신 def save_model(self, renew=False): # 모델 저장 if not renew: # 처음 저장 joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') else: # 기존 모델 대체 if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'): os.rename(self._f_path + f'/model/{self._name}_rg.pkl', self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl') joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') # 회귀 차트 저장 def save_chart_image(self, data, show_chart): # 사이즈 plt.figure(figsize=(15, 10), dpi=100) # 레이블 plt.plot(self._y_test, c='r') # 예측 값 plt.plot(data, c='b') # 이미지로 저장 plt.savefig('./chart_images/tenki-kion-lr.png') # 차트 확인(Optional) if show_chart: plt.show() def __del__(self): del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
np.concatenate( (elastic_net_model.intercept_, elastic_net_model.coef_), axis=None), 3))) print('Elastic Net MSE: {}'.format(np.round(elastic_net_mae, 3))) print('Elastic Net coefficients:', elastic_net_coefs) ############################################################################## ###################### LEAST ANGLE REGRESSION ################################ ############################################################################## print( "##############################################################################" ) print("LEAST ANGLE REGRESSION") LAR_cv = LarsCV(normalize=True) LAR_model = LAR_cv.fit(X_train, y_train) LAR_prediction = LAR_model.predict(X_test) LAR_mae = mean_squared_error(y_test, LAR_prediction) LAR_coefs = dict( zip(['Intercept'] + data.columns.tolist()[:-1], np.round( np.concatenate((LAR_model.intercept_, LAR_model.coef_), axis=None), 3))) print('Least Angle Regression MSE: {}'.format(np.round(LAR_mae, 3))) print('Least Angle Regression coefficients:', LAR_coefs) ############################################################################## ################## PRINCIPAL COMPONENTS REGRESSION ########################### ############################################################################## print(
# - データを半分に取り分けてLARSモデルで学習 # 変数定義 # --- 訓練データ数 train_n = 100 # インスタンス生成と学習 # --- 非ゼロ係数の数を12個とする lars_12 = Lars(n_nonzero_coefs=12) lars_12.fit(reg_data[:train_n], reg_target[:train_n]) # インスタンス生成と学習 # --- 非ゼロ係数の数を500個とする(デフォルト) lars_500 = Lars(n_nonzero_coefs=500) lars_500.fit(reg_data[:train_n], reg_target[:train_n]) # 平均二乗誤差 np.mean( np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2)) # 3 特徴量選択としてのLARS --------------------------------------------------------------------- # インスタンス生成 lcv = LarsCV() # 学習 lcv.fit(reg_data, reg_target) # 非ゼロの係数 np.sum(lcv.coef_ != 0)
# Print support and ranking print(feat_selector.support_) #False means feature can be eliminated?? (what is the algorithm process? still not sure) print(feat_selector.ranking_) #Ranking print(X.columns) ################## Use LarsCV for hyperparameter optimization (wrapper) #LARS works by starting with one variable, increasing its corresponding coefficient, and when the residual has #correlation with some other variable as much as it does the variable you started with, adding that in #and increasing in the joint least squares direction (find through fitting just those variables??), iterating. #This is a feature selection method because at the end you will find some coefficients are 0. # Instantiate lars_mod = LarsCV(cv=5, normalize=False) # Fit feat_selector = lars_mod.fit(X, y) # Print r-squared score and estimated alpha print(lars_mod.score(X, y)) print(lars_mod.alpha_) ################# Using a RandomForestRegressor for feature selection (Tree-based methods) #The way feature importance is calculated: Create trees. Then take one feature variable, #permute it randomly (shuffle), then rerun the observations through the trees. Calculate #the rate of misclassification. The % increase of misclassification rate gives the feature importance #https://link.springer.com/article/10.1023/A:1010933404324 # Instantiate rf_mod = RandomForestRegressor(max_depth=2, random_state=123, n_estimators=100, oob_score=True)
def fit_lars_cv(self, X, y, n_fold=10): from sklearn.linear_model import LarsCV lars_cv = LarsCV(cv=n_fold) lars_cv.fit(X, y) return lars_cv
#10 train_n = 100 lars_12 = Lars(n_nonzero_coefs=12) lars_12.fit(reg_data[:train_n], reg_target[:train_n]) lars_500 = Lars() # it's 500 by default lars_500.fit(reg_data[:train_n], reg_target[:train_n]); #Now, to see how well each feature fit the unknown data, do the following: np.mean(np.power(reg_target[train_n:] - lars_12.predict(reg_data[train_n:]), 2)) #31.527714163321001 np.mean(np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2)) #9.6198147535136237e+30 from sklearn.linear_model import LarsCV lcv = LarsCV() lcv.fit(reg_data, reg_target) print np.sum(lcv.coef_ != 0) #44 #Using linear methods for classification –logistic regression 逻辑回归 from sklearn.datasets import make_classification X, y = make_classification(n_samples=1000, n_features=4) from sklearn.linear_model import LogisticRegression lr = LogisticRegression() X_train = X[:-200] X_test = X[-200:]
##################################################################### # (High Dimensional) Linear Regression # ##################################################################### ##################################################################### ## Scikit Learn ## ##################################################################### lasso_model = LassoCV() lasso_model.fit(x_train_values, y_train_values) lasso_model_predictions = lasso_model.predict(x_test_values) generate_submission_file(lasso_model_predictions, test_data["Id"], "../results/" + user + "_LassoCV.csv") lars_model = LarsCV() lars_model.fit(x_train_values, y_train_values) lars_model_predictions = lars_model.predict(x_test_values) generate_submission_file(lars_model_predictions, test_data["Id"], "../results/" + user + "_LarsCV.csv") lassolars_model = LassoLarsCV() lassolars_model.fit(x_train_values, y_train_values) lassolars_model_predictions = lassolars_model.predict(x_test_values) generate_submission_file(lassolars_model_predictions, test_data["Id"], "../results/" + user + "_LassoLarsCV.csv") en_model = ElasticNetCV() en_model.fit(x_train_values, y_train_values) en_model_predictions = en_model.predict(x_test_values) generate_submission_file(en_model_predictions, test_data["Id"], "../results/" + user + "_ElasticNetCV.csv")