def adaboost_predict (A, H, I, x, t): n=x.shape[0] out = np.zeros(n) for i in range(t): out += A[i] * ds.stump_predict(x[:,I[i]], H[i]) return np.sign(out);
def adaboost_find_t (A, H, I, x, y): n=x.shape[0] out = np.zeros(n) t=len(A) HE = [] for i in range(t): out += A[i] * ds.stump_predict(x[:,I[i]], H[i]) HE.append( (np.sign(out) * y < 0).sum() / float(n) ) idx = np.argmin(HE) return HE, idx + 1
def adaboost_find_t (A, H, I, x, y): n=x.shape[0] out = np.zeros(n) t=len(A) HE = [] for i in range(t): out += A[i] * ds.stump_predict(x[:,I[i]], H[i]) HE.append( (np.sign(out) * y < 0).sum() / float(n) ) idx = min(enumerate(HE), key=operator.itemgetter(1))[0] return HE, idx
def adaboost_train (x, y, T): cf = x.shape[1] n = y.shape[0] weights = ones(n)/n H = [] A = [] I = [] TE = [] for t in range(T): pplus = sum(weights * (y > 0)) # Let's train on all the features and find the one that works the best decisionVariables = [] score = [] we = [] for idx in range(cf): f = x[:,idx] # train the stump (dv, err) = ds.stump_fit(f, y, weights, pplus) we.append( err ) decisionVariables.append(dv) # score the classifiers on all features for this round score.append(abs(.5-err)) print "Round: ", t, str(datetime.now()) # choose the one feature we'll use for this round's classifier I.append(np.argmax(score)) H.append(decisionVariables[I[t]]) eps = we[I[t]] # calculate our alpha A.append(.5 * math.log((1-eps)/eps)) # update the weights numerators = weights * np.exp( -A[t] * y * ds.stump_predict(x[:,I[t]], H[t]) ) Z = numerators.sum() weights = numerators / Z # Calculate the overall training errors y_hat = adaboost_predict(A,H,I,x, len(A)) TE.append((y_hat * y < 0).sum() / float(n)) return A, H, I, TE