Ejemplo n.º 1
0
 def Lars_regression(self, X_train, y_train, X_test, y_test):
     
     my_cv = RepeatedKFold(n_splits=10, n_repeats=10, random_state=42)
     best_model = LarsCV(cv=my_cv, n_jobs=-1)
     best_model.fit(X_train, y_train)
     y_pred = best_model.predict(X_test)
     mae = mean_absolute_error(y_test, y_pred)
     mse = mean_squared_error(y_test, y_pred)
     r2 = r2_score(y_test, y_pred)
     
     return best_model, mse, mae, r2
Ejemplo n.º 2
0
class _LarsCVImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Ejemplo n.º 3
0
def _larscv(*,
            train,
            test,
            x_predict=None,
            metrics,
            fit_intercept=True,
            verbose=False,
            max_iter=500,
            normalize=True,
            precompute='auto',
            cv=None,
            max_n_alphas=1000,
            n_jobs=None,
            eps=2.220446049250313e-16,
            copy_X=True):
    """For more info visit : 
        https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LarsCV.html#sklearn.linear_model.LarsCV
    """

    model = LarsCV(fit_intercept=fit_intercept,
                   verbose=verbose,
                   max_iter=max_iter,
                   normalize=normalize,
                   precompute=precompute,
                   cv=cv,
                   max_n_alphas=max_n_alphas,
                   n_jobs=n_jobs,
                   eps=eps,
                   copy_X=copy_X)
    model.fit(train[0], train[1])
    model_name = 'LarsCV'
    y_hat = model.predict(test[0])

    if metrics == 'mse':
        accuracy = _mse(test[1], y_hat)
    if metrics == 'rmse':
        accuracy = _rmse(test[1], y_hat)
    if metrics == 'mae':
        accuracy = _mae(test[1], y_hat)

    if x_predict is None:
        return (model_name, accuracy, None)

    y_predict = model.predict(x_predict)
    return (model_name, accuracy, y_predict)
Ejemplo n.º 4
0
def ResultsLARS(DataSet, Y):
    X_train, X_test, y_train, y_test = train_test_split(DataSet,
                                                        Y,
                                                        train_size=0.75)
    LAR_cv = LarsCV(normalize=True)
    LAR_model = LAR_cv.fit(X_train, y_train)
    LAR_prediction = LAR_model.predict(X_test)
    LAR_mae = np.mean(np.abs(y_test - LAR_prediction))
    LAR_coefs = dict(
        zip(['Intercept'] + DataSet.columns.tolist(),
            np.round(
                np.concatenate((LAR_model.intercept_, LAR_model.coef_),
                               axis=None), 3)))
    print('Least Angle Regression MAE: {}'.format(np.round(LAR_mae, 3)))
    print('Least Angle Regression coefficients:{}'.format(LAR_coefs))
    del LAR_coefs['Intercept']
    DictionaryPlot(LAR_coefs, 'Least Angle Regression')
Ejemplo n.º 5
0
class LarsCvClass:
    """
    Name      : LarsCV
    Attribute : None
    Method    : predict, predict_by_cv, save_model
    """

    def __init__(self):
        # 알고리즘 이름
        self._name = 'larscv'

        # 기본 경로
        self._f_path = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir))

        # 경고 메시지 삭제
        warnings.filterwarnings('ignore')

        # 원본 데이터 로드
        data = pd.read_csv(self._f_path + "/regression/resource/regression_sample.csv", sep=",", encoding="utf-8")

        # 학습 및 테스트 데이터 분리
        self._x = (data["year"] <= 2017)
        self._y = (data["year"] >= 2018)

        # 학습 데이터 분리
        self._x_train, self._y_train = self.preprocessing(data[self._x])
        # 테스트 데이터 분리
        self._x_test, self._y_test = self.preprocessing(data[self._y])

        # 모델 선언
        self._model = LarsCV(normalize=False)

        # 모델 학습
        self._model.fit(self._x_train, self._y_train)

    # 데이터 전처리
    def preprocessing(self, data):
        # 학습
        x = []
        # 레이블
        y = []
        # 기준점(7일)
        base_interval = 7
        # 기온
        temps = list(data["temperature"])

        for i in range(len(temps)):
            if i < base_interval:
                continue
            y.append(temps[i])

            xa = []

            for p in range(base_interval):
                d = i + p - base_interval
                xa.append(temps[d])
            x.append(xa)
        return x, y

    # 일반 예측
    def predict(self, save_img=False, show_chart=False):
        # 예측
        y_pred = self._model.predict(self._x_test)

        # 스코어 정보
        score = r2_score(self._y_test, y_pred)

        # 리포트 확인
        if hasattr(self._model, 'coef_') and hasattr(self._model, 'intercept_'):
            print(f'Coef = {self._model.coef_}')
            print(f'intercept = {self._model.intercept_}')

        print(f'Score = {score}')

        # 이미지 저장 여부
        if save_img:
            self.save_chart_image(y_pred, show_chart)

        # 예측 값  & 스코어
        return [list(y_pred), score]

    #  CV 예측(Cross Validation)
    def predict_by_cv(self):
        # Regression 알고리즘은 실 프로젝트 상황에 맞게 Cross Validation 구현
        return False

    #  GridSearchCV 예측
    def predict_by_gs(self):
        pass

    # 모델 저장 및 갱신
    def save_model(self, renew=False):
        # 모델 저장
        if not renew:
            # 처음 저장
            joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl')
        else:
            # 기존 모델 대체
            if os.path.isfile(self._f_path + f'/model/{self._name}_rg.pkl'):
                os.rename(self._f_path + f'/model/{self._name}_rg.pkl',
                          self._f_path + f'/model/{str(self._name) + str(time.time())}_rg.pkl')
            joblib.dump(self._model, self._f_path + f'/model/{self._name}_rg.pkl')

    # 회귀 차트 저장
    def save_chart_image(self, data, show_chart):
        # 사이즈
        plt.figure(figsize=(15, 10), dpi=100)

        # 레이블
        plt.plot(self._y_test, c='r')

        # 예측 값
        plt.plot(data, c='b')

        # 이미지로 저장
        plt.savefig('./chart_images/tenki-kion-lr.png')

        # 차트 확인(Optional)
        if show_chart:
            plt.show()

    def __del__(self):
        del self._x_train, self._x_test, self._y_train, self._y_test, self._x, self._y, self._model
Ejemplo n.º 6
0
            np.concatenate(
                (elastic_net_model.intercept_, elastic_net_model.coef_),
                axis=None), 3)))

print('Elastic Net MSE: {}'.format(np.round(elastic_net_mae, 3)))
print('Elastic Net coefficients:', elastic_net_coefs)

##############################################################################
###################### LEAST ANGLE REGRESSION ################################
##############################################################################
print(
    "##############################################################################"
)
print("LEAST ANGLE REGRESSION")
LAR_cv = LarsCV(normalize=True)
LAR_model = LAR_cv.fit(X_train, y_train)
LAR_prediction = LAR_model.predict(X_test)
LAR_mae = mean_squared_error(y_test, LAR_prediction)
LAR_coefs = dict(
    zip(['Intercept'] + data.columns.tolist()[:-1],
        np.round(
            np.concatenate((LAR_model.intercept_, LAR_model.coef_), axis=None),
            3)))

print('Least Angle Regression MSE: {}'.format(np.round(LAR_mae, 3)))
print('Least Angle Regression coefficients:', LAR_coefs)

##############################################################################
################## PRINCIPAL COMPONENTS REGRESSION ###########################
##############################################################################
print(
# - データを半分に取り分けてLARSモデルで学習

# 変数定義
# --- 訓練データ数
train_n = 100

# インスタンス生成と学習
# --- 非ゼロ係数の数を12個とする
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])

# インスタンス生成と学習
# --- 非ゼロ係数の数を500個とする(デフォルト)
lars_500 = Lars(n_nonzero_coefs=500)
lars_500.fit(reg_data[:train_n], reg_target[:train_n])

# 平均二乗誤差
np.mean(
    np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))

# 3 特徴量選択としてのLARS ---------------------------------------------------------------------

# インスタンス生成
lcv = LarsCV()

# 学習
lcv.fit(reg_data, reg_target)

# 非ゼロの係数
np.sum(lcv.coef_ != 0)
# Print support and ranking
print(feat_selector.support_) #False means feature can be eliminated?? (what is the algorithm process? still not sure) 
print(feat_selector.ranking_) #Ranking
print(X.columns)

################## Use LarsCV for hyperparameter optimization (wrapper)
#LARS works by starting with one variable, increasing its corresponding coefficient, and when the residual has 
#correlation with some other variable as much as it does the variable you started with, adding that in 
#and increasing in the joint least squares direction (find through fitting just those variables??), iterating. 
#This is a feature selection method because at the end you will find some coefficients are 0. 

# Instantiate
lars_mod = LarsCV(cv=5, normalize=False)

# Fit
feat_selector = lars_mod.fit(X, y)

# Print r-squared score and estimated alpha
print(lars_mod.score(X, y))
print(lars_mod.alpha_)

################# Using a RandomForestRegressor for feature selection (Tree-based methods)
#The way feature importance is calculated: Create trees. Then take one feature variable, 
#permute it randomly (shuffle), then rerun the observations through the trees. Calculate 
#the rate of misclassification. The % increase of misclassification rate gives the feature importance
#https://link.springer.com/article/10.1023/A:1010933404324

# Instantiate
rf_mod = RandomForestRegressor(max_depth=2, random_state=123, 
              n_estimators=100, oob_score=True)
Ejemplo n.º 9
0
 def fit_lars_cv(self, X, y, n_fold=10):
     from sklearn.linear_model import LarsCV
     lars_cv = LarsCV(cv=n_fold)
     lars_cv.fit(X, y)
     return lars_cv
Ejemplo n.º 10
0
#10

train_n = 100
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])
lars_500 = Lars() # it's 500 by default
lars_500.fit(reg_data[:train_n], reg_target[:train_n]);
#Now, to see how well each feature fit the unknown data, do the following:
np.mean(np.power(reg_target[train_n:] - lars_12.predict(reg_data[train_n:]), 2))
#31.527714163321001
np.mean(np.power(reg_target[train_n:] - lars_500.predict(reg_data[train_n:]), 2))
#9.6198147535136237e+30

from sklearn.linear_model import LarsCV
lcv = LarsCV()
lcv.fit(reg_data, reg_target)

print np.sum(lcv.coef_ != 0)
#44


#Using linear methods for classification –logistic regression  逻辑回归

from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4)

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

X_train = X[:-200]
X_test = X[-200:]
Ejemplo n.º 11
0
#####################################################################
# (High Dimensional) Linear Regression                              #
#####################################################################

#####################################################################
## Scikit Learn                                                    ##
#####################################################################

lasso_model = LassoCV()
lasso_model.fit(x_train_values, y_train_values)
lasso_model_predictions = lasso_model.predict(x_test_values)
generate_submission_file(lasso_model_predictions, test_data["Id"],
                         "../results/" + user + "_LassoCV.csv")

lars_model = LarsCV()
lars_model.fit(x_train_values, y_train_values)
lars_model_predictions = lars_model.predict(x_test_values)
generate_submission_file(lars_model_predictions, test_data["Id"],
                         "../results/" + user + "_LarsCV.csv")

lassolars_model = LassoLarsCV()
lassolars_model.fit(x_train_values, y_train_values)
lassolars_model_predictions = lassolars_model.predict(x_test_values)
generate_submission_file(lassolars_model_predictions, test_data["Id"],
                         "../results/" + user + "_LassoLarsCV.csv")

en_model = ElasticNetCV()
en_model.fit(x_train_values, y_train_values)
en_model_predictions = en_model.predict(x_test_values)
generate_submission_file(en_model_predictions, test_data["Id"],
                         "../results/" + user + "_ElasticNetCV.csv")