forked from a-taherkhani/AdaBoost_CNN
/
multi_adaboost_CNN.py
337 lines (255 loc) · 12.5 KB
/
multi_adaboost_CNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
__author__ = 'Xin, Aboozar'
import numpy as np
from numpy.core.umath_tests import inner1d
from copy import deepcopy
##kerase & CNN:
#from keras import models as Models
from keras.models import Sequential
from sklearn.preprocessing import OneHotEncoder #LabelBinarizer
class AdaBoostClassifier(object):
'''
Parameters
-----------
base_estimator: object
The base model from which the boosted ensemble is built.
n_estimators: integer, optional(default=50)
The maximum number of estimators
learning_rate: float, optional(default=1)
algorithm: {'SAMME','SAMME.R'}, optional(default='SAMME.R')
SAMME.R uses predicted probabilities to update wights, while SAMME uses class error rate
random_state: int or None, optional(default=None)
Attributes
-------------
estimators_: list of base estimators
estimator_weights_: array of floats
Weights for each base_estimator
estimator_errors_: array of floats
Classification error for each estimator in the boosted ensemble.
Reference:
1. [multi-adaboost](https://web.stanford.edu/~hastie/Papers/samme.pdf)
2. [scikit-learn:weight_boosting](https://github.com/scikit-learn/
scikit-learn/blob/51a765a/sklearn/ensemble/weight_boosting.py#L289)
'''
def __init__(self, *args, **kwargs):
if kwargs and args:
raise ValueError(
'''AdaBoostClassifier can only be called with keyword
arguments for the following keywords: base_estimator ,n_estimators,
learning_rate,algorithm,random_state''')
allowed_keys = ['base_estimator', 'n_estimators', 'learning_rate', 'algorithm', 'random_state', 'epochs']
keywords_used = kwargs.keys()
for keyword in keywords_used:
if keyword not in allowed_keys:
raise ValueError(keyword + ": Wrong keyword used --- check spelling")
n_estimators = 50
learning_rate = 1
algorithm = 'SAMME.R'
random_state = None
#### CNN (5)
epochs = 6
if kwargs and not args:
if 'base_estimator' in kwargs:
base_estimator = kwargs.pop('base_estimator')
else:
raise ValueError('''base_estimator can not be None''')
if 'n_estimators' in kwargs: n_estimators = kwargs.pop('n_estimators')
if 'learning_rate' in kwargs: learning_rate = kwargs.pop('learning_rate')
if 'algorithm' in kwargs: algorithm = kwargs.pop('algorithm')
if 'random_state' in kwargs: random_state = kwargs.pop('random_state')
### CNN:
if 'epochs' in kwargs: epochs = kwargs.pop('epochs')
self.base_estimator_ = base_estimator
self.n_estimators_ = n_estimators
self.learning_rate_ = learning_rate
self.algorithm_ = algorithm
self.random_state_ = random_state
self.estimators_ = list()
self.estimator_weights_ = np.zeros(self.n_estimators_)
self.estimator_errors_ = np.ones(self.n_estimators_)
self.epochs= epochs
def _samme_proba(self, estimator, n_classes, X):
"""Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
References
----------
.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009.
"""
proba = estimator.predict_proba(X)
# Displace zero probabilities so the log is defined.
# Also fix negative elements which may occur with
# negative sample weights.
proba[proba < np.finfo(proba.dtype).eps] = np.finfo(proba.dtype).eps
log_proba = np.log(proba)
return (n_classes - 1) * (log_proba - (1. / n_classes)
* log_proba.sum(axis=1)[:, np.newaxis])
def fit(self, X, y, batch_size):
## CNN:
self.batch_size = batch_size
# self.epochs = epochs
self.n_samples = X.shape[0]
# There is hidden trouble for classes, here the classes will be sorted.
# So in boost we have to ensure that the predict results have the same classes sort
self.classes_ = np.array(sorted(list(set(y))))
############for CNN (2):
# yl = np.argmax(y)
# self.classes_ = np.array(sorted(list(set(yl))))
self.n_classes_ = len(self.classes_)
for iboost in range(self.n_estimators_):
if iboost == 0:
sample_weight = np.ones(self.n_samples) / self.n_samples
sample_weight, estimator_weight, estimator_error = self.boost(X, y, sample_weight)
# early stop
if estimator_error == None:
break
# append error and weight
self.estimator_errors_[iboost] = estimator_error
self.estimator_weights_[iboost] = estimator_weight
if estimator_error <= 0:
break
return self
def boost(self, X, y, sample_weight):
if self.algorithm_ == 'SAMME':
return self.discrete_boost(X, y, sample_weight)
elif self.algorithm_ == 'SAMME.R':
return self.real_boost(X, y, sample_weight)
def real_boost(self, X, y, sample_weight):
# estimator = deepcopy(self.base_estimator_)
############################################### my code:
if len(self.estimators_) == 0:
#Copy CNN to estimator:
estimator = self.deepcopy_CNN(self.base_estimator_)#deepcopy of self.base_estimator_
else:
#estimator = deepcopy(self.estimators_[-1])
estimator = self.deepcopy_CNN(self.estimators_[-1])#deepcopy CNN
###################################################
if self.random_state_:
estimator.set_params(random_state=1)
# estimator.fit(X, y, sample_weight=sample_weight)
#################################### CNN (3) binery label:
# lb=LabelBinarizer()
# y_b = lb.fit_transform(y)
lb=OneHotEncoder(sparse=False)
y_b=y.reshape(len(y),1)
y_b=lb.fit_transform(y_b)
estimator.fit(X, y_b, sample_weight=sample_weight, epochs = self.epochs, batch_size = self.batch_size)
############################################################
y_pred = estimator.predict(X)
############################################ (4) CNN :
y_pred_l = np.argmax(y_pred, axis=1)
incorrect = y_pred_l != y
#########################################################
estimator_error = np.dot(incorrect, sample_weight) / np.sum(sample_weight, axis=0)
# if worse than random guess, stop boosting
if estimator_error >= 1.0 - 1 / self.n_classes_:
return None, None, None
y_predict_proba = estimator.predict_proba(X)
# repalce zero
y_predict_proba[y_predict_proba < np.finfo(y_predict_proba.dtype).eps] = np.finfo(y_predict_proba.dtype).eps
y_codes = np.array([-1. / (self.n_classes_ - 1), 1.])
y_coding = y_codes.take(self.classes_ == y[:, np.newaxis])
# for sample weight update
intermediate_variable = (-1. * self.learning_rate_ * (((self.n_classes_ - 1) / self.n_classes_) *
inner1d(y_coding, np.log(
y_predict_proba)))) #dot iterate for each row
# update sample weight
sample_weight *= np.exp(intermediate_variable)
sample_weight_sum = np.sum(sample_weight, axis=0)
if sample_weight_sum <= 0:
return None, None, None
# normalize sample weight
sample_weight /= sample_weight_sum
# append the estimator
self.estimators_.append(estimator)
return sample_weight, 1, estimator_error
def deepcopy_CNN(self, base_estimator0):
#Copy CNN (self.base_estimator_) to estimator:
config=base_estimator0.get_config()
#estimator = Models.model_from_config(config)
estimator = Sequential.from_config(config)
weights = base_estimator0.get_weights()
estimator.set_weights(weights)
estimator.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return estimator
def discrete_boost(self, X, y, sample_weight):
# estimator = deepcopy(self.base_estimator_)
############################################### my code:
if len(self.estimators_) == 0:
#Copy CNN to estimator:
estimator = self.deepcopy_CNN(self.base_estimator_)#deepcopy of self.base_estimator_
else:
#estimator = deepcopy(self.estimators_[-1])
estimator = self.deepcopy_CNN(self.estimators_[-1])#deepcopy CNN
###################################################
if self.random_state_:
estimator.set_params(random_state=1)
# estimator.fit(X, y, sample_weight=sample_weight)
#################################### CNN (3) binery label:
# lb=LabelBinarizer()
# y_b = lb.fit_transform(y)
lb=OneHotEncoder(sparse=False)
y_b=y.reshape(len(y),1)
y_b=lb.fit_transform(y_b)
estimator.fit(X, y_b, sample_weight=sample_weight, epochs = self.epochs, batch_size = self.batch_size)
############################################################
y_pred = estimator.predict(X)
#incorrect = y_pred != y
############################################ (4) CNN :
y_pred_l = np.argmax(y_pred, axis=1)
incorrect = y_pred_l != y
#######################################################
estimator_error = np.dot(incorrect, sample_weight) / np.sum(sample_weight, axis=0)
# if worse than random guess, stop boosting
if estimator_error >= 1 - 1 / self.n_classes_:
return None, None, None
# update estimator_weight
# estimator_weight = self.learning_rate_ * np.log((1 - estimator_error) / estimator_error) + np.log(
# self.n_classes_ - 1)
estimator_weight = self.learning_rate_ * (np.log((1. - estimator_error) / estimator_error) + np.log(self.n_classes_ - 1.))
if estimator_weight <= 0:
return None, None, None
# update sample weight
sample_weight *= np.exp(estimator_weight * incorrect)
sample_weight_sum = np.sum(sample_weight, axis=0)
if sample_weight_sum <= 0:
return None, None, None
# normalize sample weight
sample_weight /= sample_weight_sum
# append the estimator
self.estimators_.append(estimator)
return sample_weight, estimator_weight, estimator_error
def predict(self, X):
n_classes = self.n_classes_
classes = self.classes_[:, np.newaxis]
pred = None
if self.algorithm_ == 'SAMME.R':
# The weights are all 1. for SAMME.R
pred = sum(self._samme_proba(estimator, n_classes, X) for estimator in self.estimators_)
else: # self.algorithm == "SAMME"
# pred = sum((estimator.predict(X) == classes).T * w
# for estimator, w in zip(self.estimators_,
# self.estimator_weights_))
########################################CNN disc
pred = sum((estimator.predict(X).argmax(axis=1) == classes).T * w
for estimator, w in zip(self.estimators_,
self.estimator_weights_))
###########################################################
pred /= self.estimator_weights_.sum()
if n_classes == 2:
pred[:, 0] *= -1
pred = pred.sum(axis=1)
return self.classes_.take(pred > 0, axis=0)
return self.classes_.take(np.argmax(pred, axis=1), axis=0)
def predict_proba(self, X):
if self.algorithm_ == 'SAMME.R':
# The weights are all 1. for SAMME.R
proba = sum(self._samme_proba(estimator, self.n_classes_, X)
for estimator in self.estimators_)
else: # self.algorithm == "SAMME"
proba = sum(estimator.predict_proba(X) * w
for estimator, w in zip(self.estimators_,
self.estimator_weights_))
proba /= self.estimator_weights_.sum()
proba = np.exp((1. / (n_classes - 1)) * proba)
normalizer = proba.sum(axis=1)[:, np.newaxis]
normalizer[normalizer == 0.0] = 1.0
proba /= normalizer
return proba