forked from LAL/GAM
/
GeneralizedAdditivRegressor.py
199 lines (148 loc) · 7.76 KB
/
GeneralizedAdditivRegressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from sklearn.linear_model import RidgeCV
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor
from hyperopt import tpe, hp, STATUS_OK, space_eval
from hyperopt import fmin as hyperopt_fmin
import sklearn.metrics as met
from sklearn.base import clone
class GeneralizedAdditiveRegressor(object):
"""Fit Generalized Additive Model with symmetric backfitting (we actualize the residuals once per batch)
This way, the order of the features doesn't matter.
Parameters
---------
smoothers : list of estimators fo the shape funtions. It could be any estimator with fit() and predict() functions implemented.
Note that the length of this list (the number of estimators) has to be equal to the number of features ( one function per feature).
If you want the same smoothers for all the shape functions, you can pass only the estimator (not in a list !) and it will create a list of cloned estimator of length (n_features) before the fit.
max_iter : the number of iteration to run in the backfitting algorithm. (default is 10)
ridge_alpha : the regularization coefficient for ridge regression upon the shape functions in the backfitting
(in order to rescale them)
Attributes
----------
smoothers_ : list of the fitted smoothers (the shape functions)
ridge : ridge regressor used after each batch in the backfitting to rescale the shape functions
Its parameter alphas is given by the user (ridge_alphas)
"""
def __init__(self, smoothers, max_iter=10, ridge_alphas=10.):
self.smoothers = smoothers
self.max_iter = max_iter
self.ridge_alphas = ridge_alphas
def fit(self, X, y):
"""Fit the shape function of each features with the backfitting algorithm.
Please note that the shape functions are centered (not reduced).
Parameters
----------
X : array-like, shape=(n_samples, n_features)
The input samples.
Returns
-------
self : object
The Generalized Additive Model with the fitted shape functions
"""
n_samples, n_features = X.shape
if not isinstance(self.smoothers, list):
self.smoothers_ = [clone(self.smoothers) for i in range(n_features) ]
self.ridge = RidgeCV(alphas = [self.ridge_alphas]*len(self.smoothers_), fit_intercept=False)
else:
self.smoothers_ = [clone(self.smoothers[j]) for j in range(n_features) ]
self.ridge = RidgeCV(alphas = [self.ridge_alphas]*len(self.smoothers_), fit_intercept=False)
self.y_mean_ = np.mean(y)
self.rmse_ = [] # array to stock the train error over the iteration
y -= y.mean()
temp = np.zeros(shape=(n_samples, n_features)) # array to stock the shape function for re-use in the next iteration
shape_functions = np.zeros(shape=(n_samples, n_features))
for i in range(self.max_iter):
for j in range(n_features):
# select all the columns except the j-th one
idx = list(set(np.arange(0, n_features, 1)) - set([j]))
#Compute the residuals of the previous iteration
residuals = y.reshape((n_samples,1)) - temp[:, idx].sum(axis=1, keepdims=True).reshape((n_samples, 1))
residuals -=residuals.mean()
residuals = residuals
#print(np.amin(residuals), np.amax(residuals), 'iteration number %s'%(i+1))
self.smoothers_[j].fit(X[:, j:j+1], residuals.reshape((n_samples,))) #reshape cause deprecation warning
shape_functions[:, j]= self.smoothers_[j].predict(X[:, j:j+1])
shape_functions[:, j] -= shape_functions[:, j].mean()
# RidgeRegression on top of the shape function in order to 're-scale' each shape functions
self.ridge.fit(shape_functions, y)
coef = self.ridge.coef_
shape_functions *= coef
y_pred = shape_functions.sum(axis=1)
y_pred -= y_pred.mean()
self.rmse_.append(met.mean_squared_error(y_pred, y))
temp=shape_functions.copy()
#plt.scatter(1, np.abs(residuals.min()), c='g', label='iteration = %s'%i)
#plt.scatter(2, np.abs(residuals.max()), c='r')
#plt.legend()
#plt.show()
return self
def tranform(self, X):
""" Transform function, return the prediction of the shape functions in each dimensions.
For pipeline use.
Parameters
----------
X : array like of shape (n_samples, n_features).
data to be transformed
Returns
-------
shape_functions : np.array of shape(n_ sampes, n_features).
Prediction of each shape function for each feature.
"""
n_samples, n_features = X.shape
shape_functions = np.empty_like(X)
for j in range(n_features):
shape_functions[:, j] = self.smoothers_[j].predict(X[:, j])
return shape_functions
def predict(self, X):
""" Predict regression target for X.
The prediction is made using the GAM model (sum of the shape functions)
"""
n_samples, n_features = X.shape
y = np.ones(n_samples) * self.y_mean_
for j in range(n_features):
y += self.smoothers_[j].predict(X[:, j:j+1])
return y
def obj_function(param):
"""obj_func for hyperopt on a GAM with 0.18 GP as smoothers
return : global mse on the test set.
"""
n_features = (len(param) - 2)//2
#print('param={0}'.format(param))
shape_parameters = [0]*(2*n_features) #a list of dict to contain the parameters of each GP
c=0
i=0
while i <= 2* n_features - 1 :
#print('iteration nb : %s'%obj_function.nb_iter)
shape_parameters[c] = {}
shape_parameters[c]['constant_value'] = param[i]
shape_parameters[c]['length_scale'] = param[i+1]
i=i+2
c+=1
alpha_ = param[-2] # all the GPs share the same nugget (noise variance estimation)
ridge_alphas = param[-1]
smoothers=[]
for i in range(n_features):
constant_value = shape_parameters[i]['constant_value']
length_scale = shape_parameters[i]['length_scale']
smoothers.append(GaussianProcessRegressor(kernel=C(constant_value)*RBF(length_scale), alpha=alpha_, optimizer=None))
gam = GeneralizedAdditiveRegressor(smoothers, max_iter=15, ridge_alphas=ridge_alphas)
gam.fit(X_train, Y_train)
y_pred = gam.predict(X_test)
score = met.mean_squared_error(Y_test - Y_test.mean(), y_pred - y_pred.mean())
#print('score=%s \n'%score)
return score
def create_space(n_feature):
""" Function to create a search space for the hypers of the a sklearn 0.18 GP kernel.
The order of the hypers is designed to work with obj_function().
Return a search space for hyperopt_fmin()"""
n_sample = X_train.shape[0]
size = [ X[:, i].max() - X[:, i].min() for i in range(n_feature)]
constant_space = [hp.loguniform('constant_value%s'%i, np.log(10)*-2, np.log(10)*2) for i in range(n_feature)]
l_scale_space = [hp.uniform('length_scale%s'%i, (size[i]/n_sample), size[i]) for i in range(n_feature)]
space =[]
#create an alternate search space for hyper-opt
for j in range(n_feature):
space.append(constant_space[j])
space.append(l_scale_space[j])
space.append(hp.loguniform('alpha', -11, 2))
space.append(hp.loguniform('ridge', -2, 2))
return space