Beispiel #1
0
def save_elastic_net_regression():
    features = df.columns.tolist()
    del features[10]
    del features[0]
    X = df[features]
    y = df["class"]
    baseAlpha = 0.1
    ElNet = ElasticNet(random_state=0, alpha=baseAlpha)
    ElNet.fit(X, y)
    baseScore = ElNet.score(X, y, sample_weight=None)

    for x in range(1, 1000):
        alpha = 0.1 * x
        ElNet = ElasticNet(random_state=0, alpha=alpha)
        ElNet.fit(X, y)
        # print("alpha  : ", str(alpha), ", score:  "
        #       , str(ElNet.score(X, y, sample_weight=None)))
        if (ElNet.score(X, y, sample_weight=None) > baseScore):
            baseAlpha = alpha
            baseScore = ElNet.score(X, y, sample_weight=None)

    ElNet = ElasticNet(random_state=0, alpha=baseAlpha)
    ElNet.fit(X, y)
    params = np.append(ElNet.intercept_, ElNet.coef_)
    predictions = ElNet.predict(X)
    params = np.round(params, 4)
    myDF3 = get_formatted_data_frame_from_predictions(X, y, predictions,
                                                      params, features)
    f = open("./results/elasticNetRegression.txt", "w")
    f.write("Alpha  = " + str(baseAlpha) + "\n\n")
    f.write("R-squared  = " + str(ElNet.score(X, y, sample_weight=None)) +
            "\n\n")
    f.write(str(myDF3))
Beispiel #2
0
def linear_regression1():
    # 糖尿病情数据集
    X, y = load_diabetes().data, load_diabetes().target
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=8)
    elastic_net = ElasticNet(alpha=1, l1_ratio=1,
                             max_iter=100000).fit(X_train, y_train)
    print('--------------')
    print('elastic_net.coef_:{}'.format(elastic_net.coef_[:]))
    print('elastic_net.intercept_:{}'.format(elastic_net.intercept_))
    print('----------')
    print('训练数据集得分:{:.2f}'.format(elastic_net.score(X_train, y_train)))
    print('测试数据集得分:{:.2f}'.format(elastic_net.score(X_test, y_test)))
    print('弹性网回归使用的特征数:{}'.format(np.sum(elastic_net.coef_ != 0)))
Beispiel #3
0
    def elastic_net_model(self, X_train, y_train, X_test, y_test):
        elast_net_model = ElasticNet(alpha=.253)

        elast_net_model.fit(X_train, y_train)

        y_train_pred = elast_net_model.predict(X_train)
        y_test_pred = elast_net_model.predict(X_test)

        # Scoring the model
        print(elast_net_model.score(X_train, y_train))
        print(elast_net_model.score(X_test, y_test))
        print('MSE train: %.6f, MSE test: %.6f' % (mean_squared_error(
            y_train, y_train_pred), mean_squared_error(y_test, y_test_pred)))
        print('R^2 train: %.6f, R^2 test: %.6f' %
              (r2_score(y_train, y_train_pred), r2_score(y_test, y_test_pred)))
def enet_granger_causality_test(X_t, y_t, top_df, max_iter=10000000):
    """
    Return the cv-parameters tested across the whole data
    :param X_t:
    :param y_t:
    :param top_df:
    :return: res_df, test_betas
    """

    test_errs = np.zeros(len(top_df))
    scores = np.zeros(len(top_df))
    dfs = np.zeros(len(top_df))

    test_coefs = np.zeros((len(top_df), X_t.shape[1]))
    for i in range(len(top_df)):
        alpha = top_df.iloc[i]["alpha"]
        lambda_min = top_df.iloc[i]["lambda.min"]
        enet = ElasticNet(l1_ratio=alpha, alpha=lambda_min, max_iter=max_iter)
        enet.fit(X_t, y_t)
        y_pred = enet.predict(X_t)
        test_errs[i] = np.average((y_t - y_pred)**2)
        scores[i] = enet.score(X_t, y_t)
        test_coefs[i] = enet.coef_

        dfs[i] = len(np.where(enet.coef_)[0])

    top_df["test_err"] = test_errs
    top_df["score"] = scores
    top_df["df"] = dfs


    return top_df, test_coefs
Beispiel #5
0
def EN(data=data, city='all', label="label_activity_density"):

    if city == 'all':
        data2 = data.copy()
    else:
        data2 = data[data["city_district"].str.contains(city)].copy()

    target = data2[["city_district", label]]
    features = data2[features_columns]

    X = features.values
    y = target[label].values

    alphas = [0.0000001, 0.0000001, 0.000001, 0.00001,\
      0.0001, 0.001, 0.01, \
      0.03, 0.05, 0.07, 0.1]

    best_res = (0, 0, 0, 0)

    for a in alphas:
        model = ElasticNet(alpha=a).fit(X, y)
        score = model.score(X, y)
        pred_y = model.predict(X)
        mse = mean_squared_error(y, pred_y)
        print("Alpha:{0:.5f}, R2:{1:.2f}, MSE:{2:.2f}, RMSE:{3:.2f}".format(
            a, score, mse, np.sqrt(mse)))
        if score > best_res[1]:
            best_res = (a, score, mse, np.sqrt(mse))

    return best_res
Beispiel #6
0
    def elasticRegression(self):
        er = ElasticNet(alpha=0.0)
        X = self.train[:, :-1]
        y = self.train[:, -1:]

        er.fit(X, y)
        self.erScore = er.score(X, y)

        X = self.erTest[:, :-1]

        preds = er.predict(X)

        self.erTest = pd.DataFrame(self.erTest)
        self.erTest['er_Amount'] = preds

        self.erTest.to_csv('Elastic_Net_Apply.csv')

        yTrue = self.erTest.loc[:, 9]
        yPred = self.erTest.loc[:, 'er_Amount']

        self.erMse = sklearn.metrics.mean_squared_error(yTrue, yPred)
        self.erMae = sklearn.metrics.mean_absolute_error(yTrue, yPred)

        self.erCoef = er.coef_
        self.erNIter = er.n_iter_

        self.er = er
Beispiel #7
0
def elastic_net(problem, **kwargs):
    r"""High level description.

    Parameters
    ----------
    problem : type
        Description
        kwargs['elastic_net_reg_coef'] must be a nonnegative float.  This is
        the multiplier for the penalty term
        kwargs['elastic_net_ratio'] must be between 0 and 1
        kwargs['coef_tolerance'] must be a nonnegative float

    Returns
    -------
    output : tuple
        (optimum, maximum)

    """
    data_list = [datum['data']['values'] for datum in problem.data]
    data = numpy.array(data_list)
    elastic_net = ElasticNet(alpha=kwargs['elastic_net_reg_coef'],
                             l1_ratio=kwargs['elastic_net_ratio'])
    elastic_net.fit(data.T, problem.goal['data']['values'])
    elastic_net_coefficients = elastic_net.coef_
    optimum = [
        problem.data[index]
        for index, element in enumerate(elastic_net_coefficients)
        if abs(element) > kwargs['coef_tolerance']
    ]
    maximum = elastic_net.score(data.T, problem.goal['data']['values'])
    output = (optimum, maximum)
    return output
Beispiel #8
0
    def LinearRegression_Elastic_Net(self, X_train, X_test, Y_train, Y_test,
                                     list_of_columns, colslist, alpha_val):
        print('---------------------------------------------')
        print('LinearRegression Elastic Net')
        # values converts it into a numpy array
        x_train = X_train[list_of_columns]
        x_test = X_test[list_of_columns]

        y_train = Y_train
        y_test = Y_test

        linear_regressor = ElasticNet(
            fit_intercept=True, alpha=alpha_val)  # create object for the class
        linear_regressor.fit(x_train, y_train)  # perform linear regression
        Y_pred = linear_regressor.predict(x_test)  # make predictions
        print('---------------------------------------------')
        print('Coeff :', linear_regressor.coef_)
        print('Intercept', linear_regressor.intercept_)
        print('LScore', linear_regressor.score(x_test, y_test))

        print('---------------------------------------------')
        print('Evaluation of Test Data')
        y_test_pred = linear_regressor.predict(x_test)
        # Model Evaluation
        self.FindErrors(x_test, y_test, y_test_pred,
                        'Linear Regressor Elastic Net', colslist, alpha_val)
def test_ElasticNet_alpha_beta(*data):
    train_x, test_x, train_y, test_y = data
    alphas = np.logspace(0, 2)
    betas = np.linspace(0.01, 1)
    scores_ElasticNet = []
    for alpha in alphas:
        for beta in betas:
            regr_ElasticNet = ElasticNet(alpha=alpha, l1_ratio=beta)
            regr_ElasticNet.fit(train_x, train_y)
            scores_ElasticNet.append(regr_ElasticNet.score(test_x, test_y))

    #绘图
    alphas1, betas1 = np.meshgrid(alphas, betas)
    scores = np.array(scores_ElasticNet).reshape(alphas1.shape)

    fig = plt.figure()
    ax = Axes3D(fig)
    surf = ax.plot_surface(alphas1,
                           betas1,
                           scores,
                           rstride=1,
                           cstride=1,
                           cmap=cm.jet,
                           antialiased=False)
    fig.colorbar(surf)
    ax.set_xlabel(r'$\alpha$', fontproperties=myfont)
    ax.set_ylabel(r'$\beta$', fontproperties=myfont)
    ax.set_zlabel(r'score', fontproperties=myfont)
    ax.set_title('ElasticNet回归', fontproperties=myfont)
    plt.show()
    return
def enet_granger_causality_test(X_t, y_t, top_df, max_iter=10000000):
    """
    Return the cv-parameters tested across the whole data
    :param X_t:
    :param y_t:
    :param top_df:
    :return: res_df, test_betas
    """

    test_errs = np.zeros(len(top_df))
    scores = np.zeros(len(top_df))
    dfs = np.zeros(len(top_df))

    test_coefs = np.zeros((len(top_df), X_t.shape[1]))
    for i in range(len(top_df)):
        alpha = top_df.iloc[i]["alpha"]
        lambda_min = top_df.iloc[i]["lambda.min"]
        enet = ElasticNet(l1_ratio=alpha, alpha=lambda_min, max_iter=max_iter)
        enet.fit(X_t, y_t)
        y_pred = enet.predict(X_t)
        test_errs[i] = np.average((y_t - y_pred)**2)
        scores[i] = enet.score(X_t, y_t)
        test_coefs[i] = enet.coef_

        dfs[i] = len(np.where(enet.coef_)[0])

    top_df["test_err"] = test_errs
    top_df["score"] = scores
    top_df["df"] = dfs

    return top_df, test_coefs
Beispiel #11
0
def enet(a):
    print ("Doing elastic net")
    clf3 = ElasticNet(alpha=a)
    clf3.fit(base_X, base_Y)
    print ("Score = %f" % clf3.score(base_X, base_Y))
    clf3_pred = clf3.predict(X_test)
    write_to_file("elastic.csv", clf3_pred)
def test_Ridge_lasso_alpha(*data):
    train_x, test_x, train_y, test_y = data
    alphas = np.logspace(0, 3, num=10)
    scores_Ridge = []
    scores_lasso = []
    scores_ElasticNet = []
    for i, alpha in enumerate(alphas):
        regr_ridge = Ridge(alpha=alpha)
        regr_ridge.fit(train_x, train_y)
        scores_Ridge.append(regr_ridge.score(test_x, test_y))

        regr_lasso = Lasso(alpha=alpha)
        regr_lasso.fit(train_x, train_y)
        scores_lasso.append(regr_lasso.score(test_x, test_y))

        regr_ElasticNet = ElasticNet(alpha=alpha)
        regr_ElasticNet.fit(train_x, train_y)
        scores_ElasticNet.append(regr_ElasticNet.score(test_x, test_y))

    ax3 = plt_helper('ax3',
                     'alph参数与回归性能',
                     xlabel=r'$\alpha$取值',
                     ylabel='归模型的预测性能')
    ax3.plot(alphas, scores_Ridge, label='岭回归')
    ax3.plot(alphas, scores_lasso, label='Lasso回归')
    ax3.plot(alphas, scores_ElasticNet, label='ElasticNet回归')
    ax3.legend(loc='best', prop=myfont)
    ax3.set_xscale('log')

    return
Beispiel #13
0
def perform_elastinet_regression(df_X, df_Y, test_X, test_Y):
    clf = ElasticNet(alpha=0.1, l1_ratio=0.7)
    clf.fit(df_X, df_Y)
    pred_Y = clf.predict(test_X)
    r2_score_rr = round(r2_score(test_Y, pred_Y), 3)
    accuracy = round(clf.score(df_X, df_Y) * 100, 2)
    returnval = {'model': 'ElasticNet', 'r2_score': r2_score_rr}
    return returnval
Beispiel #14
0
def _elastic_net_regularization(X_train, X_test, y_train, y_test):
    ela = ElasticNet(alpha=1.0, l1_ratio=2)
    ela.fit(X_train, y_train)
    y_pred = ela.predict(X_test)
    score = ela.score(X_test, y_test)
    err = mean_squared_error(y_test, y_pred)

    return score, err
Beispiel #15
0
def ElasticModel(X, Y):
    elastic = ElasticNet(alpha=0.05, l1_ratio=0.5, normalize=False)
    elastic.fit(X, Y)
    y_predict_elastic = elastic.predict(X)
    #calculating mse
    print('For our Elastic model, are values are:')
    print('Training MSE:', mean_squared_error(Y, y_predict_elastic))
    print('Training r^2:', elastic.score(X, Y))
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples,
                            n_features,
                            n_informative,
                            positive=positive)

    X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
    y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]

    s_clf = ElasticNet(
        alpha=alpha,
        l1_ratio=0.8,
        fit_intercept=fit_intercept,
        max_iter=max_iter,
        tol=1e-7,
        positive=positive,
        warm_start=True,
    )
    s_clf.fit(X_train, y_train)

    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert s_clf.score(X_test, y_test) > 0.85

    # check the convergence is the same as the dense version
    d_clf = ElasticNet(
        alpha=alpha,
        l1_ratio=0.8,
        fit_intercept=fit_intercept,
        max_iter=max_iter,
        tol=1e-7,
        positive=positive,
        warm_start=True,
    )
    d_clf.fit(X_train.toarray(), y_train)

    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert d_clf.score(X_test, y_test) > 0.85

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)

    # check that the coefs are sparse
    assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
def test_model(X_test, y_test, l1_ratio=.5, alpha=.5, X_train=None, y_train=None):
    """This Trains a model on the training data with the specified parameters and then returns
    the score for that model with the test data"""
    reg = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=50000)
    reg.fit(X_train, y_train)
    score = reg.score(X_test, y_test)

    return score
Beispiel #18
0
def Elastic():
    global x1, x2, y1, y2, dict1
    model = ElasticNet()
    name = "Elastic Net"
    model.fit(x1, y1)
    y_pred = model.predict(x2)
    error = mean_squared_error(y2, y_pred)
    score = model.score(y2, y_pred)
    plotgraph(y_pred, name, error, score)
def enet_train(alpha,l1_ratio,x,y):
    clf=ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=1000,normalize=False)
    #clf.fit(x,y)
    
    cval = cross_val_score(clf, x, y, scoring='r2', cv=3)
    cval[np.where(cval < 0)[0]] = 0
    return cval.mean()
    
    return clf.score(x, y)
    def elastic_net(X_train_scaled, X_test_scaled, y_train_scaled,
                    y_test_scaled):
        from sklearn.linear_model import ElasticNet
        elasticnet = ElasticNet(alpha=.01).fit(X_train_scaled, y_train_scaled)
        elasticnet_predictions = elasticnet.predict(X_test_scaled)
        MSE_elastic = mean_squared_error(y_test_scaled, elasticnet_predictions)
        r2_elastic = elasticnet.score(X_test_scaled, y_test_scaled)

        return elasticnet_predictions, MSE_elastic, r2_elastic
Beispiel #21
0
def enet(a):
    print ("Doing elastic net")
    clf3 = ElasticNet(alpha=a)
    clf3.fit(base_X, base_Y)
    print ("Score = %f" % clf3.score(base_X, base_Y))
    df_test_new = pd.read_csv("X_test2.csv")
    X_test = df_test_new.values
    clf3_pred = clf3.predict(X_test)
    write_to_file("elastic.csv", clf3_pred)
 def model_el_net(args, y):
     alpha = 0.1
     l1_ratio = 0.7
     enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
     enet.fit(args, y)
     res = enet.score(args, y)
     params = enet.get_params()
     coefs = [enet.intercept_]
     coefs = coefs + list(enet.coef_)
     return res, params, coefs
Beispiel #23
0
def linlasso(alpha):
    x = np.linspace(0, np.pi, 100)
    z = np.random.random(100)
    y = []
    X = []
    [X.append([x[i]**2, np.exp(np.sin(z[i]))]) for i in range(len(x))]
    [y.append([0.1 * z[i] + (x[i])]) for i in range(len(x))]
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
    x = []
    z = []
    for i in range(alpha):
        lr = ElasticNet(alpha=float(i), l1_ratio=float(i) / 10,
                        max_iter=1e5).fit(X_train, y_train)
        #L1 & L2 Regularization (hyperparameter): as alpha increases model underfits
        x.append(lr.score(X_train, y_train))
        z.append(lr.score(X_test, y_test))
    I = [i for i in range(alpha)]
    plt.plot(I, x, 'r', I, z, 'k')
    plt.show()
Beispiel #24
0
def analyze_ElasticNet(l1_ratio, concrete_train_X, concrete_test_X,
                       concrete_train_y, concrete_test_y):
    model = ElasticNet(l1_ratio=l1_ratio)
    model.fit(concrete_train_X, concrete_train_y)

    predict = model.predict(concrete_test_X)
    score = model.score(concrete_test_X, concrete_test_y)
    return make_analyze_response("ElasticNet L1_RATIO_%s" % (l1_ratio),
                                 concrete_test_X, concrete_test_y, predict,
                                 score)
Beispiel #25
0
def train(training_pandas_data, test_pandas_data, label_col, feat_cols, alpha,
          l1_ratio, training_data_path, test_data_path):

    print("train:         " + training_data_path)
    print("test:          " + test_data_path)
    print("alpha:        ", alpha)
    print("l1-ratio:     ", l1_ratio)
    print("label-col:     " + label_col)
    for col in feat_cols:
        print("feat-cols:     " + col)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandas_data[label_col].values
    trainingFeatures = training_pandas_data[feat_cols].values

    testLabels = test_pandas_data[label_col].values
    testFeatures = test_pandas_data[feat_cols].values

    #We will use a linear Elastic Net model.
    en = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

    # Here we train the model.
    en.fit(trainingFeatures, trainingLabels)

    # Calculating the scores of the model.
    test_rmse = mean_squared_error(testLabels, en.predict(testFeatures))**0.5
    r2_score_training = en.score(trainingFeatures, trainingLabels)
    r2_score_test = en.score(testFeatures, testLabels)
    print("Test RMSE:", test_rmse)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    #Logging the RMSE and r2 scores.
    mlflow.log_metric("Test RMSE", test_rmse)
    mlflow.log_metric("Train R2", r2_score_training)
    mlflow.log_metric("Test R2", r2_score_test)

    #Saving the model as an artifact.
    sklearn.log_model(en, "model")

    run_id = mlflow.active_run().info.run_uuid
    print("Run with id %s finished" % run_id)
def ElasticNetPrediction(X_train, y_train, X_test, y_test):
    elasticnet = ElasticNet()
    elasticnet.fit(X_train, y_train)
    elasticnet_score = elasticnet.score(X_test, y_test)
    elasticnet_score
    elasticnet_pred = elasticnet.predict(X_test)
    # The mean squared error
    elasticnetRMSE = sqrt(mean_squared_error(y_test, elasticnet_pred))
    print("Root mean squared error: %.2f" % elasticnetRMSE)
    print('R-squared elasticnet: %.2f' % r2_score(y_test, elasticnet_pred))
    chart_regression(elasticnet_pred, y_test, 'ElasticNetPrediction')
    return elasticnet_score, elasticnetRMSE
Beispiel #27
0
    def runElasticNetRegressor(self):
        lm = ElasticNet(fit_intercept=True, normalize=True)

        print("ElasticNet Regressor\n")
        reg = lm.fit(self.m_X_train, self.m_y_train)
        predictY = lm.predict(self.m_X_test)
        score = lm.score(self.m_X_test, self.m_y_test)
        predictTraingY = lm.predict(self.m_X_train)

        self.displayPredictPlot(predictY)
        self.displayResidualPlot(predictY, predictTraingY)
        self.dispalyModelResult(lm, predictY, score)
class boroReg:
    def __init__(self, X, y, idx, pipe_X, pipe_y):
        self.X = X[idx, :]  # shift to fix 1 indexing using np broadcasting
        self.y = y[idx, :]
        self._gridSearch = None
        self.pipeline_X = pipe_X
        self.pipeline_y = pipe_y
        self._searchSpace = None
        self._params = None
        self.lm = ElasticNet()

    def __imputeVals(self, in_df):
        return imputeVals(in_df)

    def gridSearch(self, params, cv=5, njobs=-1, verbose=50):
        self._searchSpace = params

        self._gridSearch = GridSearchCV(self.lm,
                                        params,
                                        cv=cv,
                                        scoring="neg_mean_squared_error",
                                        n_jobs=njobs,
                                        verbose=verbose)
        self._gridSearch.fit(self.X, self.y)

    def getBestParams(self):
        if self._gridSearch is not None:
            return self._gridSearch.best_params_
        else:
            raise ValueError()

    def getBestScore(self):
        if self._gridSearch is not None:
            return self._gridSearch.best_score_
        else:
            raise ValueError()

    def fitModel(self, params):
        self._params = params

        self.lm.set_params(**params)
        self.lm.fit(self.X, self.y)

    def __invert(self, y):
        return np.exp(self.pipeline_y.inverse_transform(y))

    def getTrainScore(self):
        return self.lm.score(self.X, self.y)

    def predict(self, test_X):
        piped_X = self.pipeline_X.transform(self.__imputeVals(test_X))
        preds = self.lm.predict(piped_X)
        return self.__invert(preds)
Beispiel #29
0
def train(training_pandas_data, test_pandas_data, label_col, feat_cols, alpha,
          l1_ratio, training_data_path, test_data_path):

    print("training-data-path:    " + training_data_path)
    print("test-data-path:        " + test_data_path)
    print("alpha:        ", alpha)
    print("l1-ratio:     ", l1_ratio)
    print("label-col:     " + label_col)
    for col in feat_cols:
        print("feat-cols:     " + col)

    # Split data into training labels and testing labels.
    trainingLabels = training_pandas_data[label_col].values
    trainingFeatures = training_pandas_data[feat_cols].values

    testLabels = test_pandas_data[label_col].values
    testFeatures = test_pandas_data[feat_cols].values

    #We will use a linear Elastic Net model.
    en = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

    # Here we train the model.
    en.fit(trainingFeatures, trainingLabels)

    # Calculating the score of the model.
    r2_score_training = en.score(trainingFeatures, trainingLabels)
    r2_score_test = 0
    r2_score_test = en.score(testFeatures, testLabels)
    print("Training set score:", r2_score_training)
    print("Test set score:", r2_score_test)

    #Logging the r2 score for both sets.
    mlflow.log_metric("R2 score for training set", r2_score_training)
    mlflow.log_metric("R2 score for test set", r2_score_test)

    #Saving the model as an artifact.
    sklearn.log_model(en, "model")

    run_id = mlflow.tracking.active_run().info.run_uuid
    print("Run with id %s finished" % run_id)
def compare_elastic_high_dimension_coef_():
    """利用得分曲线,展示Lasso回归和Ridge回归结合在一起,在固定高维数据集的情况下,不同alpha值的效果"""
    # 运行时间比较长
    X_train, X_test, y_train, y_test = load_train_test_extended_boston()

    alpha_range = [pow(10, (alpha / 10)) for alpha in range(-50, 0, 3)]
    lasso_train_score, lasso_test_score = [], []
    ridge_train_score, ridge_test_score = [], []
    elastic_train_score, elastic_test_score = [], []
    fix_elastic_train_score, fix_elastic_test_score = [], []
    for alpha in alpha_range:
        lasso = Lasso(alpha=alpha, max_iter=100000).fit(X_train, y_train)
        lasso_train_score.append(lasso.score(X_train, y_train))
        lasso_test_score.append(lasso.score(X_test, y_test))

        ridge = Ridge(alpha=alpha).fit(X_train, y_train)
        ridge_train_score.append(ridge.score(X_train, y_train))
        ridge_test_score.append(ridge.score(X_test, y_test))

        elastic = ElasticNet(alpha=alpha, l1_ratio=alpha).fit(X_train, y_train)
        elastic_train_score.append(elastic.score(X_train, y_train))
        elastic_test_score.append(elastic.score(X_test, y_test))

        # 将最好的L1正则化系数固定,变动L2系数
        elastic = ElasticNet(alpha=alpha, l1_ratio=0.005).fit(X_train, y_train)
        fix_elastic_train_score.append(elastic.score(X_train, y_train))
        fix_elastic_test_score.append(elastic.score(X_test, y_test))

    plt.plot(alpha_range, lasso_train_score, label='lasso 训练集得分')
    plt.plot(alpha_range, ridge_train_score, label='ridge 训练集得分')
    plt.plot(alpha_range, elastic_train_score, label='elastic 训练集得分')
    plt.plot(alpha_range, fix_elastic_train_score, label='fix elastic 训练集得分')
    plt.plot(alpha_range, lasso_test_score, label='lasso 测试集得分')
    plt.plot(alpha_range, ridge_test_score, label='ridge 测试集得分')
    plt.plot(alpha_range, elastic_test_score, label='elastic 测试集得分')
    plt.plot(alpha_range, fix_elastic_test_score, label='fix elastic 测试集得分')
    plt.legend(ncol=4, loc=(0, 1))
    plt.xlabel("alpha")
    plt.ylabel("score")
    plt.suptitle("不同alpha值的四种回归的系数曲线图")
Beispiel #31
0
def explore_coefficients(dataset, alphas):
    ###
    if dataset in g_datasets:
        data = DataHelper(dataset)
        X_train, X_test, y_train, y_test = do_split(data.X,
                                                    data.y,
                                                    ratio=0.2,
                                                    seed=42)
    else:
        raise ValueError(f'{dataset} dataset is not available here.')

    ###
    coeffs = {'Ridge': [], 'Lasso': [], 'ElasticNet': []}
    scores = {'Ridge': [], 'Lasso': [], 'ElasticNet': []}

    for alpha in alphas:
        # RIDGE
        ridge = Ridge(alpha=alpha, fit_intercept=False)
        ridge.fit(X_train, y_train)
        coeffs['Ridge'].append(ridge.coef_)
        scores['Ridge'].append(
            (ridge.score(X_train, y_train), ridge.score(X_test, y_test)))

        # LASSO
        lasso = Lasso(alpha=alpha, fit_intercept=False)
        lasso.fit(X_train, y_train)
        coeffs['Lasso'].append(lasso.coef_)
        scores['Lasso'].append(
            (lasso.score(X_train, y_train), lasso.score(X_test, y_test)))

        elasticnet = ElasticNet(alpha=alpha, l1_ratio=0.5)
        elasticnet.fit(X_train, y_train)
        coeffs['ElasticNet'].append(elasticnet.coef_)
        scores['ElasticNet'].append(
            (elasticnet.score(X_train,
                              y_train), elasticnet.score(X_test, y_test)))

    return coeffs, scores
Beispiel #32
0
# print ('final score: ' + str(np.mean(final_score)))

# mean_coef = final_coef/10
# mean_intercept = np.mean(final_intercept)


# lin_reg.coef_ = mean_coef
# lin_reg.intercept_ = mean_intercept


# LABEL NEW ERROR TICKET LABELS ON EXISTING DATA

y_new = lin_reg.predict(X_test)

# print(lin_reg.coef_)
print('final score for linear Elastic Net: ' + str(lin_reg.score(X_test, y_test)))


# DEFINITION FOR ERROR TICKET

rate = 0.7
lower = y_new * (1-rate)
result = np.expand_dims((np.squeeze(y_test) - lower) < 0, axis=1)


# results = np.array(results)
# plt.scatter(X_test, y_test, color='black', s=1)
# plt.plot(X_test, y_new, color='blue')
# plt.ylabel('price (USD)')
# plt.xlabel('week score')
# plt.xticks()
Beispiel #33
0
def fit_linear_model(X, y, results, keys,
                     alpha = np.logspace(-5,2,50),
                     l1_ratio = np.array([.1, .5, .7, .9, .95, .99, 1]),
                     num_cv = 5, 
                     verbose = False, 
                     intercept_scaling = 10,
                     plot_results = False, 
                     labels = None
                     ):
    X = pp.scale(X)
    clf = []
    R2 = []
    coef = []
    prob = []
    score = []
    group_keys = []
    if num_cv > 1:
        num_cv2 = num_cv
    else:
        num_cv2 = 10
    # Find best alpha and lambda
    if (np.size(alpha)>1) or (np.size(l1_ratio)>1):
        print "Determining best values for L1 ratio and alpha..."
        clf_temp = ENCV(
                    l1_ratio = l1_ratio,
                    alphas = alpha,
                    cv = num_cv2,
                    fit_intercept = False,
                    verbose = verbose
                    )
        clf_temp.fit(X,y)
        best_alpha = clf_temp.alpha_
        best_l1_ratio = clf_temp.l1_ratio_
        print "Best L1 ratio: " + str(best_l1_ratio) + ", best alpha: " + str(best_alpha)
    else:
        best_alpha = alpha
        best_l1_ratio = l1_ratio
    # Now do cross-validation to estimate accuracy
    if num_cv > 1:
        if labels == None:
            kf = KFold(n = len(y), n_folds = num_cv)
        else:
            kf = LOLO(labels)
        #
        for train, test in kf:
            X_train, X_test, y_train, y_test, results_test, keys_test = X[train], X[test], y[train], y[test], results[test], keys[test]
            clf_temp2 = EN(
                            l1_ratio = best_l1_ratio,
                            alpha = best_alpha,
                            fit_intercept = False)
            clf_temp2.fit(X_train,y_train)
            pred = clf_temp2.predict(X_test)
            clf.append(clf_temp2)
            R2.append(clf_temp2.score(X_test,y_test))
            coef.append(clf_temp2.coef_)
            prob.append(diff_to_prob(pred))
            score.append(lossFx(results_test,pred))
            group_keys.append(keys_test)
    else:
        clf_temp2 = EN(
                l1_ratio = best_l1_ratio,
                alpha = best_alpha,
                fit_intercept = False)
        clf_temp2.fit(X,y)
        pred = clf_temp2.predict(X)
        clf = clf_temp2
        R2 = clf_temp2.score(X,y)
        coef = clf_temp2.coef_
        prob = diff_to_prob(pred)
        score = lossFx(results,pred)
        group_keys = keys
    if num_cv > 1:
        return clf, R2, score, coef, prob, kf, group_keys
    else:
        return clf, R2, score, coef, prob, group_keys
# ElasticNet Regression
import numpy as np
from sklearn import datasets
from sklearn.linear_model import ElasticNet
# load the diabetes datasets
dataset = datasets.load_diabetes()
# fit a model to the data
model = ElasticNet(alpha=0.1)
model.fit(dataset.data, dataset.target)
print(model)
# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)
# summarize the fit of the model
mse = np.mean((predicted-expected)**2)
print(mse)
print(model.score(dataset.data, dataset.target))
Beispiel #35
0
net = ElasticNet(alpha=1.5)
lasso = Lasso(alpha=5)
ridge = Ridge(alpha=3)
lr = LinearRegression()
dtr = DecisionTreeRegressor(max_depth=17)
bagger = BaggingRegressor(net, verbose = 1)

X_train, X_test, y_train, y_test = train_test_split(X_model, y)

dtr.fit(X_train,y_train)
dtr.score(X_test, y_test)
pred = dtr.predict(X_test)
plt.scatter(y_test, (pred*0.8)-y_test)

net.fit(X_train, y_train)
net.score(X_test, y_test)
preds = net.predict(X_test)
plt.scatter(y_test, (preds) - y_test, alpha = 0.7)

scores = cross_val_score(net, scale(X_model), y, cv=12)
scores.mean()

X2 = pivoted[['compilation_0', 'compilation_1', 'compilation_2']]
y2 = pivoted.compilation_3

X_train, X_test, y_train, y_test = train_test_split(X2, y2, test_size=0.2)

lr.fit(X_train, y_train)
lr.score(X_test, y_test)
pivoted.head()
mapped_pivot = pd.read_csv('pivot_catcherr.csv')