forked from karlnapf/kernel_exp_family
/
gaussian.py
231 lines (174 loc) · 7.72 KB
/
gaussian.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
from abc import abstractmethod
from kernel_exp_family.estimators.estimator_oop import EstimatorBase
from kernel_exp_family.estimators.parameter_search_bo import BayesOptSearch
from kernel_exp_family.kernels.kernels import gaussian_kernel, \
gaussian_kernel_grad, theano_available
from kernel_exp_family.tools.assertions import assert_array_shape
from kernel_exp_family.tools.log import Log
import numpy as np
if theano_available:
from kernel_exp_family.kernels.kernels import gaussian_kernel_hessian_theano, \
gaussian_kernel_third_order_derivative_tensor_theano
logger = Log.get_logger()
def compute_b(X, Y, K_XY, sigma):
assert X.shape[1] == Y.shape[1]
assert K_XY.shape[0] == X.shape[0]
assert K_XY.shape[1] == Y.shape[0]
NX = len(X)
D = X.shape[1]
b = np.zeros(NX)
K1 = np.sum(K_XY, 1)
for l in np.arange(D):
x_l = X[:, l]
y_l = Y[:, l]
s_l = x_l ** 2
t_l = y_l ** 2
# Replaces dot product with np.diag via broadcasting
# See http://mail.scipy.org/pipermail/numpy-discussion/2007-March/026809.html
D_s_K = s_l[:, np.newaxis] * K_XY
D_x_K = x_l[:, np.newaxis] * K_XY
b += 2. / sigma * (K_XY.dot(t_l) \
+ np.sum(D_s_K, 1) \
- 2 * D_x_K.dot(y_l)) - K1
return b
def compute_C(X, Y, K, sigma):
assert X.shape[1] == Y.shape[1]
assert K.shape[0] == X.shape[0]
assert K.shape[1] == Y.shape[0]
D = X.shape[1]
NX = X.shape[0]
C = np.zeros((NX, NX))
for l in np.arange(D):
x_l = X[:, l]
y_l = Y[:, l]
# Replaces dot product with np.diag via broadcasting
# See http://mail.scipy.org/pipermail/numpy-discussion/2007-March/026809.html
D_x_KXY = x_l[:, np.newaxis] * K
KXY_D_y = K * y_l
KXY_T_D_x = K.T * x_l
D_y_KXY_T = y_l[:, np.newaxis] * K.T
C += (D_x_KXY - KXY_D_y).dot(KXY_T_D_x - D_y_KXY_T)
return C
def fit(X, Y, sigma, lmbda, K=None):
# compute kernel matrix if needed
if K is None:
K = gaussian_kernel(X, Y, sigma=sigma)
b = compute_b(X, Y, K, sigma)
C = compute_C(X, Y, K, sigma)
# solve regularised linear system
a = -sigma / 2. * np.linalg.solve(C + (K + np.eye(len(C))) * lmbda,
b)
return a
def objective(X, Y, sigma, lmbda, alpha, K=None, K_XY=None, b=None, C=None):
if K_XY is None:
K_XY = gaussian_kernel(X, Y, sigma=sigma)
if K is None and lmbda > 0:
if X is Y:
K = K_XY
else:
K = gaussian_kernel(X, sigma=sigma)
if b is None:
b = compute_b(X, Y, K_XY, sigma)
if C is None:
C = compute_C(X, Y, K_XY, sigma)
NX = len(X)
first = 2. / (NX * sigma) * alpha.dot(b)
if lmbda > 0:
second = 2. / (NX * sigma ** 2) * alpha.dot(
(C + (K + np.eye(len(C))) * lmbda).dot(alpha)
)
else:
second = 2. / (NX * sigma ** 2) * alpha.dot((C).dot(alpha))
J = first + second
return J
class KernelExpLiteGaussian(EstimatorBase):
def __init__(self, sigma, lmbda, D, N):
self.sigma = sigma
self.lmbda = lmbda
self.D = D
self.N = N
# initial RKHS function is flat
self.alpha = np.zeros(0)
self.X = np.zeros((0, D))
def fit(self, X):
assert_array_shape(X, ndim=2, dims={1: self.D})
# sub-sample if data is larger than previously set N
if len(X) > self.N:
inds = np.random.permutation(len(X))[:self.N]
self.X = X[inds]
else:
self.X = np.copy(X)
self.alpha = self.fit_wrapper_()
@abstractmethod
def fit_wrapper_(self):
self.K = gaussian_kernel(self.X, sigma=self.sigma)
return fit(self.X, self.X, self.sigma, self.lmbda, self.K)
def log_pdf(self, x):
assert_array_shape(x, ndim=1, dims={0: self.D})
k = gaussian_kernel(self.X, x.reshape(1, self.D), self.sigma)[:, 0]
return np.dot(self.alpha, k)
def grad(self, x):
assert_array_shape(x, ndim=1, dims={0: self.D})
k = gaussian_kernel_grad(x, self.X, self.sigma)
return np.dot(self.alpha, k)
if theano_available:
def hessian(self, x):
"""
Computes the Hessian of the learned log-density function.
WARNING: This implementation slow, so don't call repeatedly.
"""
assert_array_shape(x, ndim=1, dims={0: self.D})
H = np.zeros((self.D, self.D))
for i, a in enumerate(self.alpha):
H += a * gaussian_kernel_hessian_theano(x, self.X[i], self.sigma)
return H
def third_order_derivative_tensor(self, x):
"""
Computes the third order derivative tensor of the learned log-density function.
WARNING: This implementation is slow, so don't call repeatedly.
"""
assert_array_shape(x, ndim=1, dims={0: self.D})
G3 = np.zeros((self.D, self.D, self.D))
for i, a in enumerate(self.alpha):
G3 += a * gaussian_kernel_third_order_derivative_tensor_theano(x, self.X[i], self.sigma)
return G3
def log_pdf_multiple(self, X):
assert_array_shape(X, ndim=2, dims={1: self.D})
k = gaussian_kernel(self.X, X, self.sigma)
return np.dot(self.alpha, k)
def objective(self, X):
assert_array_shape(X, ndim=2, dims={1: self.D})
return objective(self.X, X, self.sigma, self.lmbda, self.alpha, self.K)
def get_parameter_names(self):
return ['sigma', 'lmbda']
class KernelExpLiteGaussianAdaptive(KernelExpLiteGaussian):
def __init__(self, sigma, lmbda, D, N,
num_initial_evaluations=3, num_evaluations=3, minimum_size_learning=100,
num_initial_evaluations_relearn=1, num_evaluations_relearn=1,
param_bounds={'sigma': [-3, 3]}):
KernelExpLiteGaussian.__init__(self, sigma, lmbda, D, N)
self.bo = None
self.param_bounds = param_bounds
self.num_initial_evaluations = num_initial_evaluations
self.num_iter = num_evaluations
self.minimum_size_learning = minimum_size_learning
self.n_initial_relearn = num_initial_evaluations_relearn
self.n_iter_relearn = num_evaluations_relearn
self.learning_parameters = False
def fit(self, X):
# avoid infinite recursion from x-validation fit call
if not self.learning_parameters and len(X) >= self.minimum_size_learning:
self.learning_parameters = True
if self.bo is None:
logger.info("Bayesian optimisation from scratch.")
self.bo = BayesOptSearch(self, X, self.param_bounds, num_initial_evaluations=self.num_initial_evaluations)
best_params = self.bo.optimize(self.num_iter)
else:
logger.info("Bayesian optimisation using prior model.")
self.bo.re_initialise(X, self.n_initial_relearn)
best_params = self.bo.optimize(self.n_iter_relearn)
self.set_parameters_from_dict(best_params)
self.learning_parameters = False
logger.info("Learnt %s" % str(self.get_parameters()))
# standard fit call from superclass
KernelExpLiteGaussian.fit(self, X)