Beispiel #1
0
 def test_model_poisson_regressor(self):
     X, y = make_regression(n_features=5,
                            n_samples=100,
                            n_targets=1,
                            random_state=42,
                            n_informative=3)
     y = numpy.abs(y)
     y = y / y.max() + 1e-5
     model = linear_model.PoissonRegressor().fit(X, y)
     model_onnx = convert_sklearn(
         model,
         "linear regression",
         [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.check_model(model_onnx, X.astype(numpy.float32))
     dump_data_and_model(X.astype(numpy.float32),
                         model,
                         model_onnx,
                         basename="SklearnPoissonRegressor-Dec4")
     model_onnx = convert_sklearn(
         model,
         "linear regression",
         [("input", DoubleTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     dump_data_and_model(X.astype(numpy.float64),
                         model,
                         model_onnx,
                         basename="SklearnPoissonRegressor64")
Beispiel #2
0
 def fit_glm(self, tol=1e-12, pretest=False):  # added on D5
     clf = linear_model.PoissonRegressor(fit_intercept=False,
                                         tol=tol,
                                         verbose=3,
                                         alpha=0)
     clf.fit(spr.diags(1 / self.d_a) @ self.C(),
             self.μhat_a,
             sample_weight=self.d_a)
     return clf.coef_[0:-self.nbk], clf.coef_[-self.nbk:]
Beispiel #3
0
    def solveGLM(self, σ, tol=1e-9):  # added on D3
        ptm = time()
        muhat_a = (self.n_x.reshape((self.nbx, -1)) @ self.m_y.reshape(
            (-1, self.nby))).flatten() / self.n_x.sum()
        ot_as_glm = linear_model.PoissonRegressor(fit_intercept=False,
                                                  tol=tol,
                                                  verbose=3,
                                                  alpha=0)
        ot_as_glm.fit(-self.M_z_a().T,
                      muhat_a * np.exp(-self.Φ_a / σ),
                      sample_weight=np.exp(self.Φ_a / σ))

        p = σ * ot_as_glm.coef_
        u_x, v_y = p[:self.nbx] - p[0], p[self.nbx:] + p[0]
        μ_x_y = np.exp((self.Φ_a.reshape((self.nbx, -1)) - u_x.reshape(
            (-1, 1)) - v_y.reshape((1, -1))) / σ)
        valobs = self.Φ_a.dot(μ_x_y.flatten())
        valtot = valobs - σ * sum_xlogx(μ_x_y)
        taken = time() - ptm
        return μ_x_y, u_x, v_y, valobs, valtot, None, taken, 'GLM'
Beispiel #4
0
def generate_model(pred_vars,
                   log_transform=True,
                   one_hot_week=False,
                   method="lm"):
    """
    Generate the model for transforming and predicting.
    ...
    """
    assert method in ['lm',
                      'poisson'], "method must be one of 'lm' or 'poisson'"
    if log_transform:
        ft = preprocessing.FunctionTransformer(np.log)
    else:
        ft = preprocessing.FunctionTransformer()

    if one_hot_week:
        model_prep = compose.ColumnTransformer(
            [("onehot_categorical", preprocessing.OneHotEncoder(),
              ["week_num"]), ("num_scaler", ft, pred_vars)],
            remainder="drop",
        )
    else:
        model_prep = compose.ColumnTransformer(
            [("num_scaler", ft, pred_vars + ['ca_prop'])],
            remainder="drop",
        )
    if method == 'lm':
        pipe = pipeline.Pipeline([("preprocessor", model_prep),
                                  ("regressor",
                                   linear_model.LinearRegression())])
    elif method == 'poisson':
        pipe = pipeline.Pipeline([
            ("preprocessor", model_prep),
            ("regressor",
             linear_model.PoissonRegressor(alpha=1e-12, max_iter=10000))
        ])
    return pipe
Beispiel #5
0
        regression(linear_model.ElasticNetCV(random_state=RANDOM_SEED)),
        regression(linear_model.GammaRegressor()),
        regression(linear_model.HuberRegressor()),
        regression(linear_model.Lars()),
        regression(linear_model.LarsCV()),
        regression(linear_model.Lasso(random_state=RANDOM_SEED)),
        regression(linear_model.LassoCV(random_state=RANDOM_SEED)),
        regression(linear_model.LassoLars()),
        regression(linear_model.LassoLarsCV()),
        regression(linear_model.LassoLarsIC()),
        regression(linear_model.LinearRegression()),
        regression(linear_model.OrthogonalMatchingPursuit()),
        regression(linear_model.OrthogonalMatchingPursuitCV()),
        regression(
            linear_model.PassiveAggressiveRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.PoissonRegressor()),
        regression(
            linear_model.RANSACRegressor(
                base_estimator=tree.ExtraTreeRegressor(**TREE_PARAMS),
                random_state=RANDOM_SEED)),
        regression(linear_model.Ridge(random_state=RANDOM_SEED)),
        regression(linear_model.RidgeCV()),
        regression(linear_model.SGDRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TheilSenRegressor(random_state=RANDOM_SEED)),
        regression(linear_model.TweedieRegressor(power=0.0)),
        regression(linear_model.TweedieRegressor(power=1.0)),
        regression(linear_model.TweedieRegressor(power=1.5)),
        regression(linear_model.TweedieRegressor(power=2.0)),
        regression(linear_model.TweedieRegressor(power=3.0)),

        # Statsmodels Linear Regression
import matplotlib.pyplot as plt

from sklearn import linear_model
from sklearn.model_selection import train_test_split

from helper import prepare_data

df = prepare_data()

y = df["Berri1"]

X = df[[
    "day", "month", "day_of_week", "Mean Temp (°C)", "Total Precip (mm)",
    "Snow on Grnd (cm)", "Min Temp (°C)", "Max Temp (°C)"
]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

clf = linear_model.PoissonRegressor(max_iter=200)

clf.fit(X_train, y_train)

print(clf.score(X_test, y_test))

result = clf.predict(X)

plt.plot(list(y.index), y, label="true")
plt.plot(list(y.index), result, label="predicted")
plt.legend()
plt.show()
Beispiel #7
0
# The coefficients
# print('Coefficients: \n', regr.coef_)
# The mean squared error
print('Mean squared error: %.2f'
#       % mean_squared_error(y_test, y_pred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
#       % r2_score(y_test, y_pred))

scores_length_no_reg = cross_val_score(regr, X_train_std, y_train, cv=5, scoring='r2') 
regr.fit(X_train_std, y_train)
#The mean score and the standard deviation are hence given by:
print("%0.2f (alpha = 1.0) accuracy with a standard deviation of %0.2f" % (scores_length_no_reg.mean(), scores_length_no_reg.std()))

# Alpha = 0.1
regr_l1_1 = linear_model.PoissonRegressor(alpha=0.01)
scores_length_l1_1_reg = cross_val_score(regr_l1_1, X_train_std, y_train, cv=5, scoring='r2') 
regr_l1_1.fit(X_train_std, y_train)
#The mean score and the standard deviation are hence given by:
print("%0.2f (alpha = 0.01) accuracy with a standard deviation of %0.2f" % (scores_length_l1_1_reg.mean(), scores_length_l1_1_reg.std()))

# Alpha = 30
regr_l1_30 = linear_model.PoissonRegressor(alpha=30)
scores_length_l1_30_reg = cross_val_score(regr_l1_30, X_train_std, y_train, cv=5, scoring='r2') 
regr_l1_30.fit(X_train_std, y_train)
#The mean score and the standard deviation are hence given by:
print("%0.2f (alpha = 30) accuracy with a standard deviation of %0.2f" % (scores_length_l1_30_reg.mean(), scores_length_l1_30_reg.std()))

# Alpha = 100
regr_l1_100 = linear_model.PoissonRegressor(alpha=0.001)
scores_length_l1_100_reg = cross_val_score(regr_l1_100, X_train_std, y_train, cv=5, scoring='r2') 
Beispiel #8
0
 def _init_model(self):
     return linear_model.PoissonRegressor(alpha=1e-4, warm_start=True)
Beispiel #9
0
plt.scatter(X, y, color='blue', alpha=0.5, label='ToyData')
#plt.legend()
plt.show()

df = pd.DataFrame([X, y]).T.rename({0: 'x', 1: 'y'}, axis=1)
# df['x_round']=df['x'].apply(lambda x:np.round(x,1))
# for i in range(x_min, x_max):
#     plt.hist( df[df['x_round']==i]['y'])
#     plt.title(np.exp(w[0]+w[1]*i))
#     plt.show()

lr = linear_model.LinearRegression()
df['logy'] = np.log(df['y'])
lr.fit(df[['x']], df['logy'])
print([lr.intercept_, lr.coef_[0]])
pr = linear_model.PoissonRegressor(alpha=0, fit_intercept=True, max_iter=300)
pr.fit(df[['x']], df['y'])
print([pr.intercept_, pr.coef_[0]])

plt.scatter(X, y, color='blue', alpha=0.5)
plt.plot(x, lam, color='red', label='True')
plt.plot(x,
         lam + np.sqrt(lam),
         color='red',
         label='True+sd',
         linestyle='dashed')
plt.plot(x,
         lam - np.sqrt(lam),
         color='red',
         label='True-sd',
         linestyle='dashed')