def cross():
    train, target = load_vote()
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack(
            (train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)],
                                 target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(k_fold_train, train_target, test,
                                       test_target)

        overall_auc += auc
        overall_acc += acc
        overall_error += err
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)
Esempio n. 2
0
    def train(self, x):
        """
        xは0, 1, ..., 9の順で同じ数並んでいるとする
        """

        # 1クラスの数
        num = len(x) // self.class_num

        for j, label in enumerate(tqdm(range(self.class_num))):

            # labelとotherを結合
            # otherは1クラスの数分をランダムで取ってくる
            label_x = x[label * num:(label + 1) * num]
            other_x = np.delete(
                x, [i for i in range(label * num, (label + 1) * num)], axis=0)
            other_x = other_x[np.random.choice(len(x) - num, num), :]
            vs_x = np.concatenate([label_x, other_x])

            # ラベルがlabelのとき1, それ以外を-1にする
            vs_y = np.array([1 if i < num else -1 for i in range(num * 2)])

            # 学習
            # self.model_list[j].train(vs_x, vs_y)
            # copy_model = copy(self.model).train(vs_x, vs_y)
            # self.model_list.append(copy_model)

            ImageSize = 28
            binary_SVM = SVM(ImageSize**2)
            adaboost = AdaBoost(binary_SVM, 10)
            adaboost.train(vs_x, vs_y)
            self.model_list.append(adaboost)
    def active_learning(self, train, train_target, test, test_target):
        param = 0.05
        increment = 0.05
        init_size = int(len(train) * param)
        increment_size = int(len(train) * increment)

        X = train[:init_size]
        Y = train_target[:init_size]
        R = train[init_size:]
        RY = train_target[init_size:]

        while param < 0.5:
            print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
            adaboost = AdaBoost(OptimalWeakLearner())
            acc, err, auc = adaboost.boost(X, Y, test, test_target)
            self.result.append((acc, err, auc))
            H = adaboost.hypothesis(R)
            H_abs = np.abs(H)
            sorted_indices = H_abs.argsort().tolist()
            selected = sorted_indices[:increment_size]
            remained = sorted_indices[increment_size:]

            X = np.vstack((X, R[selected]))
            # Y = np.append(Y, adaboost.sign(H[selected]))
            Y = np.append(Y, RY[selected])
            R = R[remained]
            RY = RY[remained]
            param += increment
    def train(self,
              train,
              train_target,
              test,
              test_target,
              T=100,
              percentage=0.5):
        k, n = self.selected_code.shape

        train = train[:int(len(train) * percentage)]
        train_target = train_target[:int(len(train_target) * percentage)]
        first_time = True
        predictors = None
        for f in range(n):
            print "Run Adaboost on function %f" % f
            codes = self.selected_code[:, f]
            labels = self.convert_to_binary(train_target, codes)
            test_labels = self.convert_to_binary(test_target, codes)
            learner = OptimalWeakLearner()
            if not first_time:
                learner.set_predictors(predictors)
            adaboost = AdaBoost(learner)
            adaboost.boost(train,
                           labels,
                           test,
                           test_labels,
                           T,
                           calculate_auc=False)
            self.functions.append(adaboost)
            if first_time:
                first_time = False
                predictors = learner.get_predictors()
Esempio n. 5
0
    def train(self, x):
        """
        xは0, 1, ..., 9の順で同じ数並んでいるとする

        """

        # 1クラスの数
        num = len(x) // self.class_num

        for j, combi in enumerate(tqdm(self.combinations)):

            # combi[0]とcombi[1]のデータを結合
            vs_x = np.concatenate([
                x[num * combi[0]:num * (combi[0] + 1)], x[num * combi[1]:num *
                                                          (combi[1] + 1)]
            ],
                                  axis=0)
            # combi[0]が+1, combi[1]が-1
            vs_y = np.array([1 if i < num else -1 for i in range(num * 2)])

            # 学習
            # self.model_list[j].train(vs_x, vs_y)

            ImageSize = 28
            binary_SVM = SVM(ImageSize**2)
            adaboost = AdaBoost(binary_SVM, 10)
            adaboost.train(vs_x, vs_y)
            self.model_list.append(adaboost)
def cross():
    train, target = load_crx()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)], target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target)

        overall_auc += auc
        overall_acc += acc
        overall_error += err
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)
    def active_learning(self, train, train_target, test, test_target):
        param = 0.05
        increment = 0.05
        init_size = int(len(train) * param)
        increment_size = int(len(train) * increment)

        X = train[:init_size]
        Y = train_target[:init_size]
        R = train[init_size:]
        RY = train_target[init_size:]

        while param < 0.5:
            print "labeled data: %.2f%%" % (100.0 * len(X)/len(train))
            adaboost = AdaBoost(OptimalWeakLearner())
            acc, err, auc = adaboost.boost(X, Y, test, test_target)
            self.result.append((acc, err, auc))
            H = adaboost.hypothesis(R)
            H_abs = np.abs(H)
            sorted_indices = H_abs.argsort().tolist()
            selected = sorted_indices[:increment_size]
            remained = sorted_indices[increment_size:]

            X = np.vstack((X, R[selected]))
            # Y = np.append(Y, adaboost.sign(H[selected]))
            Y = np.append(Y, RY[selected])
            R = R[remained]
            RY = RY[remained]
            param += increment
Esempio n. 8
0
def optimal_weak_learner_on_random_data():
    data, target = load_spambase()
    train, test, train_target, test_target = train_test_shuffle_split(
        data, target,
        len(data) / 10)
    train_target = np.array(
        map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    indices = range(len(train))
    param = 0.05
    res = []
    while param < 0.5:
        print "Choose %.2f%% of data" % (param * 100)
        choose_size = int(len(indices) * param)
        choose_indices = random.sample(indices, choose_size)

        X = train[choose_indices]
        Y = train_target[choose_indices]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(X, Y, test, test_target)
        res.append((acc, err, auc))
        param += 0.05

    print res
def entire():
    data, target = load_crx()
    train, test, train_target, test_target = train_test_shuffle_split(data, target, len(data) / 10)
    train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    adaboost = AdaBoost(OptimalWeakLearner())
    adaboost.boost(train, train_target, test, test_target, discrete_features=range(train.shape[1]))
def random_weak_learner():
    print '==============Random Weak Learner============'
    train, target = load_spambase()
    train, test, train_target, test_target = train_test_shuffle_split(train, target, len(train) / 10)
    train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))
    adaboost = AdaBoost(RandomChooseLeaner())
    adaboost.boost(train, train_target, test, test_target, T=200)
Esempio n. 11
0
def random_weak_learner():
    print '==============Random Weak Learner============'
    train, target = load_spambase()
    train, test, train_target, test_target = train_test_shuffle_split(
        train, target,
        len(train) / 10)
    train_target = np.array(
        map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))
    adaboost = AdaBoost(RandomChooseLeaner())
    adaboost.boost(train, train_target, test, test_target, T=200)
def entire():
    data, target = load_vote()
    train, test, train_target, test_target = train_test_shuffle_split(
        data, target,
        len(data) / 10)
    train_target = np.array(
        map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    adaboost = AdaBoost(OptimalWeakLearner())
    adaboost.boost(train,
                   train_target,
                   test,
                   test_target,
                   discrete_features=range(train.shape[1]))
Esempio n. 13
0
    def train(self, train, train_target, test, test_target, T=100, percentage = 0.5):
        k, n = self.selected_code.shape

        train = train[:int(len(train) * percentage)]
        train_target = train_target[:int(len(train_target) * percentage)]
        first_time = True
        predictors = None
        for f in range(n):
            print "Run Adaboost on function %f" % f
            codes = self.selected_code[:, f]
            labels = self.convert_to_binary(train_target, codes)
            test_labels = self.convert_to_binary(test_target, codes)
            learner = OptimalWeakLearner()
            if not first_time:
                learner.set_predictors(predictors)
            adaboost = AdaBoost(learner)
            adaboost.boost(train, labels, test, test_labels, T, calculate_auc=False)
            self.functions.append(adaboost)
            if first_time:
                first_time = False
                predictors = learner.get_predictors()
def random_c():
    data, target = load_crx()
    train, test, train_target, test_target = train_test_shuffle_split(data, target, len(data) / 10)
    train_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, train_target))
    test_target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, test_target))

    indices = range(len(train))
    param = 0.05
    res = []
    while param < 0.5:
        print "Choose %.2f%% of data" % (param * 100)
        choose_size = int(len(indices) * param)
        choose_indices = random.sample(indices, choose_size)

        X = train[choose_indices]
        Y = train_target[choose_indices]

        adaboost = AdaBoost(OptimalWeakLearner())
        acc, err, auc = adaboost.boost(X, Y, test, test_target, discrete_features=[0, 3, 4, 5, 6, 8, 9, 11, 12])
        res.append((acc, err, auc))
        param += 0.05

    print res
Esempio n. 15
0
init_dataset = train[:init_size]
init_target = train_target[:init_size]

remain_dataset = train[init_size:]
remain_target = train_target[init_size:]

# Active learning
X = init_dataset
Y = init_target
R = remain_dataset
RY = remain_target
result = []
while percentage < 0.5:
    print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
    adaboost = AdaBoost(OptimalWeakLearner())
    acc, err, auc = adaboost.boost(X, Y, test, test_target)
    result.append((acc, err, auc))
    H = adaboost.hypothesis(R)
    H_abs = np.abs(H)
    sorted_indices = H_abs.argsort().tolist()
    selected = sorted_indices[:increment_size]
    remained = sorted_indices[increment_size:]

    X = np.vstack((X, R[selected]))
    # Y = np.append(Y, adaboost.sign(H[selected]))
    Y = np.append(Y, RY[selected])
    R = R[remained]
    RY = RY[remained]
    percentage += increment
Esempio n. 16
0
    TestSampleNum = 100  # テストサンプル総数
    ClassNum = 10  # クラス数(今回は10)
    # ImageSize = 8 # 画像サイズ(今回は縦横ともに8)
    ImageSize = 28
    # TrainingDataFile = './Images/TrainingCompressionSamples/{0:1d}-{1:04d}.png'
    # TestDataFile = './Images/TestCompressionSamples/{0:1d}-{1:04d}.png'
    TrainingDataFile = './Images/TrainingSamples/{0:1d}-{1:04d}.png'
    TestDataFile = './Images/TestSamples/{0:1d}-{1:04d}.png'

    train_x, train_t = LoadDataset(TrainingDataFile, TrainingSampleNum,
                                   ClassNum, ImageSize)
    test_x, test_t = LoadDataset(TestDataFile, TestSampleNum, ClassNum,
                                 ImageSize)

    # Adaboost
    binary_SVM = SVM(ImageSize**2)
    adaboost = AdaBoost(binary_SVM, 5)

    # multi = one_vs_one(binary_SVM, ClassNum, ImageSize**2)
    # multi = one_vs_other(binary_SVM, ClassNum, ImageSize**2)

    multi = one_vs_one(SVM, ClassNum, ImageSize**2)
    # multi = one_vs_other(adaboost, ClassNum, ImageSize**2)

    # 学習
    multi.train(train_x)
    # 推測
    y = multi.eval(test_x)

    TestResult(y, test_t, ClassNum)
init_dataset = train[:init_size]
init_target = train_target[:init_size]

remain_dataset = train[init_size:]
remain_target = train_target[init_size:]


# Active learning
X = init_dataset
Y = init_target
R = remain_dataset
RY = remain_target
result = []
while percentage < 0.5:
    print "labeled data: %.2f%%" % (100.0 * len(X) / len(train))
    adaboost = AdaBoost(OptimalWeakLearner())
    acc, err, auc = adaboost.boost(X, Y, test, test_target)
    result.append((acc, err, auc))
    H = adaboost.hypothesis(R)
    H_abs = np.abs(H)
    sorted_indices = H_abs.argsort().tolist()
    selected = sorted_indices[:increment_size]
    remained = sorted_indices[increment_size:]

    X = np.vstack((X, R[selected]))
    # Y = np.append(Y, adaboost.sign(H[selected]))
    Y = np.append(Y, RY[selected])
    R = R[remained]
    RY = RY[remained]
    percentage += increment
Esempio n. 18
0
        return cum_x[:, x + w,
                     y + h] - cum_x[:, x + w,
                                    y] - cum_x[:, x, y + h] + cum_x[:, x, y]


if __name__ == '__main__':
    print('Load MINST')
    x, y = load_mnist(path='minst')
    x = x > 0
    cum_x = np.cumsum(np.cumsum(x, axis=1), axis=2)
    print('Traning ECOC')

    n, width, height = x.shape
    features = np.zeros((n, 200))
    for i in range(100):
        w = (np.random.randint(3, 10) + 1) * 2
        h = np.random.choice([k for k in range(int(130 / w), int(170 / w), 2)],
                             1)[0]
        x_pos = np.random.randint(int(width - w))
        y_pos = np.random.randint(int(height - h))

        haar_h = HAARFeature(x_pos, y_pos, w, h, 'horizontal')
        features[:, i] = haar_h.feature_value(cum_x)
        haar_v = HAARFeature(x_pos, y_pos, w, h, 'vertical')
        features[:, 100 + i] = haar_v.feature_value(cum_x)

    ecoc = ECOC(lambda: AdaBoost(200, OptimalDecisionStump), k=50)
    ecoc.fit(features, y)
    pred = ecoc.predict(features)
    print('Training Accuracy', np.equal(pred, y).mean())
def optimal_weak_learner():
    print '==============Optimal Weak Learner============'
    train, target = load_spambase()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack((train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)], target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        plot = False
        if fold == 1:
            plot = True
        else:
            plot = False
        acc, err, auc = adaboost.boost(k_fold_train, train_target, test, test_target, plot=plot)

        if plot:
            test_err_points = np.array(adaboost.test_err_array)
            train_err_points = np.array(adaboost.train_err_array)
            auc_points = np.array(adaboost.test_auc_array)
            round_err_points = np.array(adaboost.weighted_err_array)
            plt.xlabel('Round')
            plt.ylabel('Error Rate')
            plt.plot(test_err_points[:, 0], test_err_points[:, 1], c='r', label='Test Error')
            plt.plot(test_err_points[:, 0], train_err_points[:, 1], c='g', label='Train Error')
            plt.plot(test_err_points[:, 0], round_err_points[:, 1], c='b', label='Round Error')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
            plt.show()

            plt.xlabel('Round')
            plt.ylabel('AUC')
            plt.plot(test_err_points[:, 0], auc_points[:, 1], c='r', label='AUC')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.)
            plt.show()

        overall_auc += auc
        overall_acc += acc
        overall_error += err

        if fold == 1:
            hypo = adaboost.hypothesis(test)
            roc_points = roc(test_target, hypo, 1.0, -1.0)
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.xlim(xmin=0)
            plt.ylim(ymin=0)
            plt.scatter(roc_points[:, 1], roc_points[:, 0])
            plt.show()
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)
Esempio n. 20
0
from boosting import AdaBoost, OptimalDecisionStump

if __name__ == '__main__':

    k = 10
    T = 300

    print('Original Dataset')
    print('Reading data')
    data = np.genfromtxt('../HW1/data/spambase/spambase.data', delimiter=',')
    x = data[:, :-1]
    y = data[:, -1]

    print('Training model')
    # bst = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1, splitter='best', criterion='entropy'), n_estimators=T, algorithm='SAMME.R')
    bst = AdaBoost(T, OptimalDecisionStump)
    bst.fit(x, y, test_data=(x, y))
    print('Analyzing')
    score = np.zeros(x.shape[1])
    # for i in range(T):
    # sub_tree = bst.estimators_[i]
    # score[sub_tree.tree_.feature] += (y * bst.estimator_weights_[i] * sub_tree.predict(x)).sum()
    for i in range(T):
        sub_tree = bst.classifiers[i]
        score[sub_tree.feature] += (y * bst.alpha[i] *
                                    sub_tree.predict(x)).sum()
    score /= score.sum()
    print('Top 15 features', np.argsort(score)[::-1][:15])
    pred = bst.predict(x)
    print("Accuracy:", np.mean(np.equal(pred, y)))
Esempio n. 21
0
def optimal_weak_learner():
    print '==============Optimal Weak Learner============'
    train, target = load_spambase()
    train, target = shuffle(train, target)
    target = np.array(map(lambda v: -1.0 if v == 0 else 1.0, target))

    k = 10
    train_size = len(train)
    test_index_generator = k_fold_cross_validation(train_size, k)
    fold = 1
    overall_acc = 0
    overall_error = 0
    overall_auc = 0

    for start, end in test_index_generator:
        print "====================Fold %s============" % fold
        k_fold_train = np.vstack(
            (train[range(0, start)], train[range(end, train_size)]))
        test = train[range(start, end)]
        train_target = np.append(target[range(0, start)],
                                 target[range(end, train_size)])
        test_target = target[range(start, end)]

        adaboost = AdaBoost(OptimalWeakLearner())
        plot = False
        if fold == 1:
            plot = True
        else:
            plot = False
        acc, err, auc = adaboost.boost(k_fold_train,
                                       train_target,
                                       test,
                                       test_target,
                                       plot=plot)

        if plot:
            test_err_points = np.array(adaboost.test_err_array)
            train_err_points = np.array(adaboost.train_err_array)
            auc_points = np.array(adaboost.test_auc_array)
            round_err_points = np.array(adaboost.weighted_err_array)
            plt.xlabel('Round')
            plt.ylabel('Error Rate')
            plt.plot(test_err_points[:, 0],
                     test_err_points[:, 1],
                     c='r',
                     label='Test Error')
            plt.plot(test_err_points[:, 0],
                     train_err_points[:, 1],
                     c='g',
                     label='Train Error')
            plt.plot(test_err_points[:, 0],
                     round_err_points[:, 1],
                     c='b',
                     label='Round Error')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                       loc=3,
                       ncol=2,
                       mode="expand",
                       borderaxespad=0.)
            plt.show()

            plt.xlabel('Round')
            plt.ylabel('AUC')
            plt.plot(test_err_points[:, 0],
                     auc_points[:, 1],
                     c='r',
                     label='AUC')
            plt.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                       loc=3,
                       ncol=2,
                       mode="expand",
                       borderaxespad=0.)
            plt.show()

        overall_auc += auc
        overall_acc += acc
        overall_error += err

        if fold == 1:
            hypo = adaboost.hypothesis(test)
            roc_points = roc(test_target, hypo, 1.0, -1.0)
            plt.xlabel('FPR')
            plt.ylabel('TPR')
            plt.xlim(xmin=0)
            plt.ylim(ymin=0)
            plt.scatter(roc_points[:, 1], roc_points[:, 0])
            plt.show()
        fold += 1

    print "Overall test accuracy: %s, overall test error: %s, overall test auc: %s" % (
        overall_acc / k, overall_error / k, overall_auc / k)