def _larscv(*, train, test, x_predict=None, metrics, fit_intercept=True, verbose=False, max_iter=500, normalize=True, precompute='auto', cv=None, max_n_alphas=1000, n_jobs=None, eps=2.220446049250313e-16, copy_X=True): """For more info visit : https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LarsCV.html#sklearn.linear_model.LarsCV """ model = LarsCV(fit_intercept=fit_intercept, verbose=verbose, max_iter=max_iter, normalize=normalize, precompute=precompute, cv=cv, max_n_alphas=max_n_alphas, n_jobs=n_jobs, eps=eps, copy_X=copy_X) model.fit(train[0], train[1]) model_name = 'LarsCV' y_hat = model.predict(test[0]) if metrics == 'mse': accuracy = _mse(test[1], y_hat) if metrics == 'rmse': accuracy = _rmse(test[1], y_hat) if metrics == 'mae': accuracy = _mae(test[1], y_hat) if x_predict is None: return (model_name, accuracy, None) y_predict = model.predict(x_predict) return (model_name, accuracy, y_predict)
def Lars_regression(self, X_train, y_train, X_test, y_test): my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42) best_model = LarsCV(cv=my_cv, n_jobs=-1) best_model.fit(X_train, y_train) y_pred = best_model.predict(X_test) mae = mean_absolute_error(y_test, y_pred) mse = mean_squared_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) return best_model, mse, mae, r2
class _LarsCVImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X)
class LarsCvClass: """ Name : LarsCV Attribute : None Method : predict, predict_by_cv, save_model """ def __init__(self): # 알고리즘 이름 self._name = 'larscv' # 기본 경로 self._f_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir)) # 경고 메시지 삭제 warnings.filterwarnings('ignore') # 원본 데이터 로드 data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8") # 학습 및 테스트 데이터 분리 self._x = (data["year"] <= 2017) self._y = (data["year"] >= 2018) # 학습 데이터 분리 self._x_train, self._y_train = self.preprocessing(data[self._x]) # 테스트 데이터 분리 self._x_test, self._y_test = self.preprocessing(data[self._y]) # 모델 선언 self._model = LarsCV(normalize=False) # 모델 학습 self._model.fit(self._x_train, self._y_train) # 데이터 전처리 def preprocessing(self, data): # 학습 x = [] # 레이블 y = [] # 기준점(7일) base_interval = 7 # 기온 temps = list(data["temperature"]) for i in range(len(temps)): if i < base_interval: continue y.append(temps[i]) xa = [] for p in range(base_interval): d = i + p - base_interval xa.append(temps[d]) x.append(xa) return x, y # 일반 예측 def predict(self, save_img=False, show_chart=False): # 예측 y_pred = self._model.predict(self._x_test) # 스코어 정보 score = r2_score(self._y_test, y_pred) # 리포트 확인 if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'): print(f'Coef = {self._model.coef_}') print(f'intercept = {self._model.intercept_}') print(f'Score = {score}') # 이미지 저장 여부 if save_img: self.save_chart_image(y_pred, show_chart) # 예측 값 & 스코어 return [list(y_pred), score] # CV 예측(Cross Validation) def predict_by_cv(self): # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현 return False # GridSearchCV 예측 def predict_by_gs(self): pass # 모델 저장 및 갱신 def save_model(self, renew=False): # 모델 저장 if not renew: # 처음 저장 joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') else: # 기존 모델 대체 if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'): os.rename(self._f_path + f'/model/{self._name}_rg.pkl', self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl') joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl') # 회귀 차트 저장 def save_chart_image(self, data, show_chart): # 사이즈 plt.figure(figsize=(15, 10), dpi=100) # 레이블 plt.plot(self._y_test, c='r') # 예측 값 plt.plot(data, c='b') # 이미지로 저장 plt.savefig('./chart_images/tenki-kion-lr.png') # 차트 확인(Optional) if show_chart: plt.show() def __del__(self): del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
# (High Dimensional) Linear Regression # ##################################################################### ##################################################################### ## Scikit Learn ## ##################################################################### lasso_model = LassoCV() lasso_model.fit(x_train_values, y_train_values) lasso_model_predictions = lasso_model.predict(x_test_values) generate_submission_file(lasso_model_predictions, test_data["Id"], "../results/" + user + "_LassoCV.csv") lars_model = LarsCV() lars_model.fit(x_train_values, y_train_values) lars_model_predictions = lars_model.predict(x_test_values) generate_submission_file(lars_model_predictions, test_data["Id"], "../results/" + user + "_LarsCV.csv") lassolars_model = LassoLarsCV() lassolars_model.fit(x_train_values, y_train_values) lassolars_model_predictions = lassolars_model.predict(x_test_values) generate_submission_file(lassolars_model_predictions, test_data["Id"], "../results/" + user + "_LassoLarsCV.csv") en_model = ElasticNetCV() en_model.fit(x_train_values, y_train_values) en_model_predictions = en_model.predict(x_test_values) generate_submission_file(en_model_predictions, test_data["Id"], "../results/" + user + "_ElasticNetCV.csv")