def fit(self, X, y, n_estimators):
        '''
        Parameters
        ----------
        X : shape (n_samples, n_features)
            Training data
        y : shape (n_samples,)
            Target values, 1 or -1
        n_estimators : The number of estimators at which boosting is terminated
        '''
        n_samples = X.shape[0]

        self.__alpha = np.zeros(n_estimators)
        self.__estimators = []

        w = np.full(n_samples, 1 / n_samples)
        for i in range(n_estimators):
            model = decision_stump.DecisionStump()
            model.fit(X, y, w)
            h = model.predict(X)

            eta = np.sum(w[np.flatnonzero(h != y)]) / np.sum(w)
            beta = np.sqrt((1 - eta) / (eta + 1e-8))
            w *= beta**(-y * h)

            self.__alpha[i] = np.log(beta)
            self.__estimators.append(model)
def check_decision_stump():
    np.random.seed(8)
    test_sample = np.random.rand(5, 5)

    try:
        import decision_stump
        test_decision_stump = decision_stump.DecisionStump(1, 0.5, 3)
        predictions = test_decision_stump.predict(test_sample.tolist())

    except:
        return 0
    if np.squeeze(predictions).tolist() == [1, 1, 1, -1, -1]:
        score = 0.5
    else:
        score = 0
    return round(score, 1)
Exemple #3
0
assert(len(X_train) == 80)
assert(len(X_train[0]) == 4)
assert(len(X_test) == 20)
assert(len(X_test[0]) == 4)
assert(len(y_train) == 80)
assert(len(y_test) == 20)

# set classifiers
h_set = set()
s_set = {1, -1}
b_set = set(np.linspace(0, 10, 51))
d_set = {0, 1, 2, 3}
for s in s_set:
	for b in b_set:
		for d in d_set:
			h_set.add(decision_stump.DecisionStump(s,b,d))

# training
Adas = []
for idx, T in enumerate([10, 20, 30]):
	Adas.append(boosting.AdaBoost(h_set, T=T))
	Adas[idx].train(X_train, y_train)


# testing
Ada_preds = []
Ada_accus = []
for Ada in Adas:
	pred = Ada.predict(X_test)
	Ada_preds.append(pred)
	Ada_accus.append(accuracy_score(pred, y_test))